CS153/hw6/bin/backend.ml

851 lines
29 KiB
OCaml
Raw Permalink Normal View History

(* ll ir compilation -------------------------------------------------------- *)
open Ll
open Llutil
open X86
module Platform = Util.Platform
(* Backend "Layout" compilation strategy ------------------------------------ *)
(* allocated llvmlite function bodies --------------------------------------- *)
module Alloc = struct
(* X86 locations *)
type loc =
| LVoid (* no storage *)
| LReg of X86.reg (* x86 register *)
| LStk of int (* a stack slot offset from %rbp (not a byte offset!)*)
| LLbl of X86.lbl (* an assembler label *)
type operand =
| Null
| Const of int64
| Gid of X86.lbl
| Loc of loc
type insn =
| ILbl of loc
| PMov of (loc * ty * operand) list
| Binop of loc * bop * ty * operand * operand
| Alloca of loc * ty
| Load of loc * ty * operand
| Store of ty * operand * operand
| Icmp of loc * Ll.cnd * ty * operand * operand
| Call of loc * ty * operand * (ty * operand) list
| Bitcast of loc * ty * operand * ty
| Gep of loc * ty * operand * operand list
| Ret of ty * operand option
| Br of loc
| Cbr of operand * loc * loc
let str_loc = function
| LVoid -> "LVoid"
| LReg r -> X86.string_of_reg r
| LStk n -> Printf.sprintf "LStk %d" n
| LLbl l -> l
let str_operand = function
| Null -> "null"
| Const x -> "Const _"
| Gid l -> l
| Loc l -> str_loc l
module LocSet = Set.Make (struct type t = loc let compare = compare end)
module UidSet = Datastructures.UidS
type fbody = (insn * LocSet.t) list
let map_operand f g : Ll.operand -> operand = function
| Null -> Null
| Const i -> Const i
| Gid x -> Gid (g x)
| Id u -> Loc (f u)
let map_insn f g : uid * Ll.insn -> insn =
let mo = map_operand f g in function
| x, Binop (b,t,o,o') -> Binop (f x, b,t,mo o,mo o')
| x, Alloca t -> Alloca (f x, t)
| x, Load (t,o) -> Load (f x, t, mo o)
| _, Store (t,o,o') -> Store (t, mo o, mo o')
| x, Icmp (c,t,o,o') -> Icmp (f x, c, t, mo o, mo o')
| x, Call (t,o,args) -> Call (f x, t, mo o, List.map (fun (t,o) -> t, mo o) args)
| x, Bitcast (t,o,t') -> Bitcast (f x, t, mo o, t')
| x, Gep (t,o,is) -> Gep (f x, t, mo o, List.map mo is)
let map_terminator f g : uid * Ll.terminator -> insn =
let mo = map_operand f g in function
| _, Ret (t,None) -> Ret (t, None)
| _, Ret (t,Some o) -> Ret (t, Some (mo o))
| _, Br l -> Br (f l)
| _, Cbr (o,l,l') -> Cbr (mo o,f l,f l')
let map_lset f (s:UidSet.t) : LocSet.t =
UidSet.fold (fun x t -> LocSet.add (f x) t) s LocSet.empty
let of_block
(f:Ll.uid -> loc)
(g:Ll.gid -> X86.lbl)
(live_in:uid -> UidSet.t)
(b:Ll.block) : fbody =
List.map (fun (u,i) ->
(* Uncomment this to enable verbose debugging output... *)
(* Platform.verb @@ Printf.sprintf
" * of_block: %s live_in = %s\n" u (UidSet.to_string (live_in u)); *)
map_insn f g (u,i), map_lset f @@ live_in u) b.insns
@ let x,t = b.term in
[map_terminator f g (x,t), map_lset f @@ live_in x]
let of_lbl_block f g live_in (l,b:Ll.lbl * Ll.block) : fbody =
(ILbl (f l), map_lset f @@ live_in l)::of_block f g live_in b
let of_cfg
(f : Ll.uid -> loc)
(g : Ll.gid -> X86.lbl)
(live_in : uid -> UidSet.t)
(e, bs : Ll.cfg) : fbody =
List.(flatten @@ of_block f g live_in e :: map (of_lbl_block f g live_in) bs)
end
module LocSet = Alloc.LocSet
module UidSet = Alloc.UidSet
let str_locset (lo:LocSet.t) : string =
String.concat " " (List.map Alloc.str_loc (LocSet.elements lo))
(* streams of x86 instructions ---------------------------------------------- *)
type x86elt =
| I of X86.ins
| L of (X86.lbl * bool)
type x86stream = x86elt list
let lift : X86.ins list -> x86stream =
List.rev_map (fun i -> I i)
let ( >@ ) x y = y @ x
let ( >:: ) x y = y :: x
let prog_of_x86stream : x86stream -> X86.prog =
let rec loop p iis = function
| [] -> (match iis with [] -> p | _ -> failwith "stream has no initial label")
| (I i)::s' -> loop p (i::iis) s'
| (L (l,global))::s' -> loop ({ lbl=l; global; asm=Text iis }::p) [] s'
in loop [] []
(* locals and layout -------------------------------------------------------- *)
(* The layout for this version of the backend is slightly more complex
than we saw earlier. It consists of
- uid_loc a function that maps LL uids to their target x86 locations
- the number of bytes to be allocated on the stack due to spills
*)
type layout =
{ uid_loc : uid -> Alloc.loc
; spill_bytes : int
}
(* The liveness analysis will return a record, with fields live_in and live_out,
which are functions from uid to the set of variables that are live in (or
live out) at a given program point denoted by the uid *)
type liveness = Liveness.liveness
(* The set of all caller-save registers available for register allocation *)
let caller_save : LocSet.t =
[ Rdi; Rsi; Rdx; Rcx; R09; R08; Rax; R10; R11 ]
|> List.map (fun r -> Alloc.LReg r) |> LocSet.of_list
(* excludes Rbp, Rsp, and Rip, since they have special meanings
The current backend does not use callee-save registers except in
the special case of through registers. It uses R15 as a function
pointer, but ensures that it is saved/restored.
*)
let callee_save : LocSet.t =
[ Rbx; R12; R13; R14; R15 ]
|> List.map (fun r -> Alloc.LReg r) |> LocSet.of_list
let arg_reg : int -> X86.reg option = function
| 0 -> Some Rdi
| 1 -> Some Rsi
| 2 -> Some Rdx
| 3 -> Some Rcx
| 4 -> Some R08
| 5 -> Some R09
| n -> None
let arg_loc (n:int) : Alloc.loc =
match arg_reg n with
| Some r -> Alloc.LReg r
| None -> Alloc.LStk (n-4)
let alloc_fdecl (layout:layout) (liveness:liveness) (f:Ll.fdecl) : Alloc.fbody =
let dst = List.map layout.uid_loc f.f_param in
let tdst = List.combine (fst f.f_ty) dst in
let movs = List.mapi (fun i (t,x) -> x, t, Alloc.Loc (arg_loc i)) tdst in
(Alloc.PMov movs, LocSet.of_list dst)
:: Alloc.of_cfg layout.uid_loc Platform.mangle liveness.live_in f.f_cfg
(* compiling operands ------------------------------------------------------ *)
let compile_operand : Alloc.operand -> X86.operand =
let open Alloc in function
| Null -> Asm.(~$0)
| Const i -> Asm.(Imm (Lit i))
| Gid l -> Asm.(~$$l)
| Loc LVoid -> failwith "compiling uid without location"
| Loc (LStk i) -> Asm.(Ind3 (Lit (Int64.of_int @@ i * 8), Rbp))
| Loc (LReg r) -> Asm.(~%r)
| Loc (LLbl l) -> Asm.(Ind1 (Lbl l))
let emit_mov (src:X86.operand) (dst:X86.operand) : x86stream =
let open X86 in match src, dst with
| Imm (Lbl l), Reg _ -> lift Asm.[ Leaq, [Ind3 (Lbl l, Rip); dst ] ]
| Imm (Lbl l), _ -> lift Asm.[ Leaq, [Ind3 (Lbl l, Rip); ~%Rax ]
; Movq, [~%Rax; dst ] ]
| Reg r, Reg r' when r = r' -> []
| Reg _, _ -> lift Asm.[ Movq, [src; dst] ]
| _, Reg _ -> lift Asm.[ Movq, [src; dst] ]
| _, _ -> lift Asm.[ Pushq, [src]; Popq, [dst] ]
(* compiling parallel moves ------------------------------------------------- *)
(* Compiles a parallel move instruction into a sequence of moves, pushing and
popping values to the stack when there are not enough registers to directly
shuffle the sources to the targets. It uses liveness information to simply
not move dead operands.
The PMov instruction is used at the beginning of a function declaration to
move the incoming arguments to their destination uids/registers.
compile_pmov is directly used when compiling a function call to move
the arguments.
Inputs:
live - the liveness information
ol - a list of triples of the form (dest, type, src)
Note: the destinations are assumed to be distinct, but might also be sources
Outputs:
an x86 instruction stream that (efficiently) moves each src to its
destination
The algorithm works like this:
1. Filter out the triples in which srcs are dead or already in the right
place. (none of those need to be moved)
Then do a recursive algorithm that processes the remaining list of triples:
2. See if there are triples of the form (dest, type, src) where dest
is not also source in some other triple. For each such triple we can
directly move the src to its dest (which won't "clobber" some other
source). These are the "ready" moves.
3. If there are no "ready" moves to make (i.e. every destination is also
a source of some other triple), we pick the first triple, push its
src to the stack, recursively process the remaining list, and then
pop the stack into the destination.
ol ol' 2 2 3 2
x <- y x <- y w <- x MOV x, w MOV x, w MOV x, w
z <- z ==> ==> ------ ==> -------- ==> PUSH y ==> PUSH y
w <- x w <- x x <- y x <- y y <- z MOV z, y
y <- z y <- z y <- z y <- z POP x POP x
*)
let compile_pmov live (ol:(Alloc.loc * Ll.ty * Alloc.operand) list) : x86stream =
let open Alloc in
let module OpSet = Set.Make (struct type t = operand let compare = compare end) in
(* Filter the moves to keep the needed ones:
The operands that actually need to be moved are those that are
- not in the right location already, and
- still live *)
let ol' = List.filter (fun (x, _, o) -> Loc x <> o && LocSet.mem x live) ol in
let rec loop outstream ol =
(* Find the _set_ of all sources that still need to be moved. *)
let srcs = List.fold_left (fun s (_, _, o) -> OpSet.add o s) OpSet.empty ol in
match List.partition (fun (x, _, o) -> OpSet.mem (Loc x) srcs) ol with
| [], [] -> outstream
(* when no moves are ready to be emitted, push onto stack *)
| (x,_,o)::ol', [] ->
let os = loop (outstream >:: I Asm.( Pushq, [compile_operand o]))
ol' in
os >:: I Asm.( Popq, [compile_operand (Loc x)] )
(* when some destination of a move is not also a source *)
| ol', ready ->
loop (List.fold_left (fun os (x,_,o) ->
os >@
emit_mov (compile_operand o) (compile_operand (Loc x))) outstream ready)
ol'
in
loop [] ol'
(* compiling call ---------------------------------------------------------- *)
let compile_call live (fo:Alloc.operand) (os:(ty * Alloc.operand) list) : x86stream =
let oreg, ostk, _ =
List.fold_left (fun (oreg, ostk, i) (t, o) ->
match arg_reg i with
| Some r -> (Alloc.LReg r, t, o)::oreg, ostk, i+1
| None -> oreg, o::ostk, i+1
) ([], [], 0) os in
let nstack = List.length ostk in
let live' = LocSet.of_list @@ List.map (fun (r,_,_) -> r) oreg in
lift (List.map (fun o -> Pushq, [compile_operand o]) ostk)
>@ compile_pmov (LocSet.union live live') oreg
>:: I Asm.( Callq, [compile_operand fo] )
>@ lift (if nstack <= 0 then []
else Asm.[ Addq, [~$(nstack * 8); ~%Rsp] ])
(* compiling getelementptr (gep) ------------------------------------------- *)
let rec size_ty tdecls t : int =
begin match t with
| Void | I8 | Fun _ -> 0
| I1 | I64 | Ptr _ -> 8 (* Target 64-bit only subset of X86 *)
| Struct ts -> List.fold_left (fun acc t -> acc + (size_ty tdecls t)) 0 ts
| Array (n, t) -> n * (size_ty tdecls t)
| Namedt id -> size_ty tdecls (List.assoc id tdecls)
end
(* Compute the size of the offset (in bytes) of the nth element of a region
of memory whose types are given by the list. Also returns the nth type. *)
let index_into tdecls (ts:ty list) (n:int) : int * ty =
let rec loop ts n acc =
begin match (ts, n) with
| (u::_, 0) -> (acc, u)
| (u::us, n) -> loop us (n-1) (acc + (size_ty tdecls u))
| _ -> failwith "index_into encountered bogus index"
end
in loop ts n 0
let imm_of_int (n:int) = Imm (Lit (Int64.of_int n))
let compile_getelementptr tdecls (t:Ll.ty) (o:Alloc.operand)
(path: Alloc.operand list) : x86stream =
let rec loop ty path (code : x86stream) =
match (ty, path) with
| (_, []) -> code
| (Struct ts, Alloc.Const n::rest) ->
let (offset, u) = index_into tdecls ts (Int64.to_int n) in
loop u rest @@ (
code >:: I Asm.(Addq, [~$offset; ~%Rax])
)
| (Array(_, u), Alloc.Const n::rest) ->
(* Statically calculate the offset *)
let offset = (size_ty tdecls u) * (Int64.to_int n) in
loop u rest @@ (
code >:: I Asm.(Addq, [~$offset; ~%Rax])
)
| (Array(_, u), offset_op::rest) ->
loop u rest @@ (
code >@
([I Asm.(Movq, [~%Rax; ~%Rcx])] >@
(emit_mov (compile_operand offset_op) (Reg Rax)) >@
[I Asm.(Imulq, [imm_of_int @@ size_ty tdecls u; ~%Rax])] >@
[I Asm.(Addq, [~%Rcx; ~%Rax])]
)
)
| (Namedt t, p) -> loop (List.assoc t tdecls) p code
| _ -> failwith "compile_gep encountered unsupported getelementptr data" in
match t with
| Ptr t -> loop (Array(0, t)) path (emit_mov (compile_operand o) (Reg Rax))
| _ -> failwith "compile_gep got incorrect parameters"
(* compiling instructions within function bodies ---------------------------- *)
let compile_fbody tdecls (af:Alloc.fbody) : x86stream =
let rec loop (af:Alloc.fbody) (outstream:x86stream) : x86stream =
let cb = function
| Ll.Add -> Addq | Ll.Sub -> Subq | Ll.Mul -> Imulq
| Ll.Shl -> Shlq | Ll.Lshr -> Shrq | Ll.Ashr -> Sarq
| Ll.And -> Andq | Ll.Or -> Orq | Ll.Xor -> Xorq in
let cc = function
| Ll.Eq -> Set Eq | Ll.Ne -> Set Neq | Ll.Slt -> Set Lt
| Ll.Sle -> Set Le | Ll.Sgt -> Set Gt | Ll.Sge -> Set Ge in
let co = compile_operand in
let open Alloc in
match af with
| [] -> outstream
| (ILbl (LLbl l), _)::rest ->
loop rest @@
(outstream
>:: L (l, false) )
| (PMov ol, live)::rest ->
loop rest @@
( outstream
>@ compile_pmov live ol )
| (Icmp (LVoid, _,_,_,_), _)::rest -> loop rest outstream
| (Binop (LVoid, _,_,_,_), _)::rest -> loop rest outstream
| (Alloca (LVoid, _), _)::rest -> loop rest outstream
| (Bitcast (LVoid, _,_,_), _)::rest -> loop rest outstream
| (Load (LVoid, _,_), _)::rest -> loop rest outstream
| (Gep (LVoid, _,_,_), _)::rest -> loop rest outstream
| (Icmp (x, c,_,Loc (LReg o),o'), _)::rest ->
loop rest @@
( outstream
>@ lift Asm.[ Cmpq, [co o'; ~%o]
; cc c, [co (Loc x)]
; Andq, [~$1; co (Loc x)] ] )
| (Icmp (x, c,_,o,o'), _)::rest ->
loop rest @@
( outstream
>@ emit_mov (co o) (Reg Rax)
>@ lift Asm.[ Cmpq, [co o'; ~%Rax]
; cc c, [co (Loc x)]
; Andq, [~$1; co (Loc x)] ] )
(* Shift instructions must use Rcx or Immediate as second arg *)
| (Binop (x, bop,_,o,o'), _)::rest
when (bop = Shl || bop = Lshr || bop = Ashr)
->
loop rest @@
( outstream
>@ emit_mov (co o) (Reg Rax)
>@ emit_mov (co o') (Reg Rcx)
>@ lift Asm.[ cb bop, [~%Rcx; ~%Rax]
; Movq, [~%Rax; co (Loc x)] ] )
| (Binop (LReg r, bop,_,o,o'), _)::rest
when Loc (LReg r) = o' &&
(bop = Add || bop = Mul || bop = And || bop = Or || bop = Xor) ->
loop rest @@
( outstream
>:: I Asm.( cb bop, [co o; ~%r] ) )
| (Binop (LReg r, b,_,o,o'), _)::rest when Loc (LReg r) <> o' ->
loop rest @@
( outstream
>@ emit_mov (co o) (Reg r)
>:: I Asm.( cb b, [co o'; ~%r] ) )
| (Binop (x, b,_,o,o'), _)::rest ->
loop rest @@
( outstream
>@ emit_mov (co o) (Reg Rax)
>@ lift Asm.[ cb b, [co o'; ~%Rax]
; Movq, [~%Rax; co (Loc x)] ] )
| (Alloca (x, at), _)::rest ->
loop rest @@
( outstream
>@ lift Asm.[ Subq, [~$(size_ty tdecls at); ~%Rsp]
; Movq, [~%Rsp; co (Loc x)] ] )
| (Bitcast (x, _,o,_), _)::rest ->
loop rest @@
( outstream
>@ emit_mov (co o) (Reg Rax)
>:: I Asm.( Movq, [~%Rax; co (Loc x)] ) )
| (Load (LReg x, _, Loc (LReg src)), _)::rest ->
loop rest @@
( outstream
>:: I Asm.( Movq, [Ind2 src; ~%x] ) )
| (Load (x, _, src), _)::rest ->
loop rest @@
( outstream
>@ emit_mov (co src) (Reg Rax)
>@ lift Asm.[ Movq, [Ind2 Rax; ~%Rax]
; Movq, [~%Rax; co (Loc x)] ] )
| (Store (_,Loc (LReg src),Loc (LReg dst)), _)::rest ->
loop rest @@
( outstream
>:: I Asm.( Movq, [~%src; Ind2 dst] ) )
| (Store (_,src,dst), _)::rest ->
loop rest @@
( outstream
>@ emit_mov (co src) (Reg Rax)
>@ emit_mov (co dst) (Reg Rcx)
>:: I Asm.( Movq, [~%Rax; Ind2 Rcx] ) )
| (Gep (x, at,o,os), _)::rest ->
loop rest @@
( outstream
>@ compile_getelementptr tdecls at o os
>:: I Asm.( Movq, [~%Rax; co (Loc x)] ) )
| (Call (x, t,fo,os), live)::rest ->
(* Corner: fo is Loc (LReg r) and r is used in the calling conventions.
Then we use R15 to hold the function pointer, saving and restoring it,
since it is a callee-save register. *)
let fptr_op, init_fp, restore_fp =
begin match fo with
| Loc (LReg (Rdi | Rsi | Rdx | Rcx | R08 | R09)) ->
Loc (LReg R15),
[I Asm.(Pushq, [~%R15])] >@ (emit_mov (co fo) (Reg R15)),
[I Asm.(Popq, [~%R15])]
| _ -> fo, [], []
end
in
let () = Platform.verb @@ Printf.sprintf "call: %s live = %s\n"
(str_operand fo) (str_locset live)
in
let save = LocSet.(elements @@ inter (remove x live) caller_save) in
loop rest @@
( outstream
>@ init_fp
>@ lift (List.rev_map (fun x -> Pushq, [co (Loc x)]) save)
>@ compile_call live fptr_op os
>@ lift (List.map (fun x -> Popq, [co (Loc x)]) save)
>@ restore_fp
>@ (if t = Ll.Void || x = LVoid then []
else lift Asm.[ Movq, [~%Rax; co (Loc x)] ]) )
| (Ret (_,None), _)::rest ->
loop rest @@
( outstream
>@ lift Asm.[ Movq, [~%Rbp; ~%Rsp]
; Popq, [~%Rbp]
; Retq, [] ] )
| (Ret (_,Some o), _)::rest ->
loop rest @@
( outstream
>@ emit_mov (co o) (Reg Rax)
>@ lift Asm.[ Movq, [~%Rbp; ~%Rsp]
; Popq, [~%Rbp]
; Retq, [] ] )
| (Br (LLbl l), _)::rest ->
loop rest @@
( outstream
>:: I Asm.( Jmp, [~$$l] ) )
| (Cbr (Const i,(LLbl l1),(LLbl l2)), _)::rest ->
loop rest @@
( outstream
>:: (if i <> 0L
then I Asm.( Jmp, [~$$l1] )
else I Asm.( Jmp, [~$$l2] ) ) )
| (Cbr (o,(LLbl l1),(LLbl l2)), _)::rest ->
loop rest @@
( outstream
>@ lift Asm.[ Cmpq, [~$0; co o]
; J Neq, [~$$l1]
; Jmp, [~$$l2] ] )
| _ -> failwith "codegen failed to find instruction"
in
loop af []
(* compile_fdecl ------------------------------------------------------------ *)
(* Processes a function declaration by processing each of the subcomponents
in turn:
- first fold over the function parameters
- then fold over the entry block
- then fold over the subsequent blocks in their listed order
To fold over a block:
- fold over the label
- then the instructions (in block order)
- then the terminator
See the examples no_reg_layout and greedy_layout for how to use this function.
*)
let fold_fdecl (f_param : 'a -> uid * Ll.ty -> 'a)
(f_lbl : 'a -> lbl -> 'a)
(f_insn : 'a -> uid * Ll.insn -> 'a)
(f_term : 'a -> uid * Ll.terminator -> 'a)
(init:'a) (f:Ll.fdecl) : 'a =
let fold_params ps a =
List.fold_left f_param a ps in
let fold_block {insns; term} a =
f_term (List.fold_left f_insn a insns) term in
let fold_lbl_block (l,blk) a =
fold_block blk (f_lbl a l) in
let fold_lbl_blocks bs a =
List.fold_left (fun a b -> fold_lbl_block b a) a bs in
let entry,bs = f.f_cfg in
init
|> fold_params (List.combine f.f_param (fst f.f_ty))
|> fold_block entry
|> fold_lbl_blocks bs
(* no layout ---------------------------------------------------------------- *)
(* This register allocation strategy puts all uids into stack
slots. It does not use liveness information.
*)
let insn_assigns : Ll.insn -> bool = function
| Ll.Call (Ll.Void, _, _) | Ll.Store _ -> false
| _ -> true
let no_reg_layout (f:Ll.fdecl) (_:liveness) : layout =
let lo, n_stk =
fold_fdecl
(fun (lo, n) (x, _) -> (x, Alloc.LStk (- (n + 1)))::lo, n + 1)
(fun (lo, n) l -> (l, Alloc.LLbl (Platform.mangle l))::lo, n)
(fun (lo, n) (x, i) ->
if insn_assigns i
then (x, Alloc.LStk (- (n + 1)))::lo, n + 1
else (x, Alloc.LVoid)::lo, n)
(fun a _ -> a)
([], 0) f in
{ uid_loc = (fun x -> List.assoc x lo)
; spill_bytes = 8 * n_stk
}
(* greedy layout ------------------------------------------------------------ *)
(* This example register allocation strategy puts the first few uids in
available registers and spills the rest. It uses liveness information to
recycle available registers when their current value becomes dead.
There is a corner case where we might have to try to allocate a location
but there is a live variable who's location is unknown! (This can happen
in a loop... see gcd_euclidean.ll for an example.) In that case, we
should just spill to avoid conflicts.
*)
let greedy_layout (f:Ll.fdecl) (live:liveness) : layout =
let n_arg = ref 0 in
let n_spill = ref 0 in
let spill () = (incr n_spill; Alloc.LStk (- !n_spill)) in
(* Allocates a destination location for an incoming function parameter.
Corner case: argument 3, in Rcx occupies a register used for other
purposes by the compiler. We therefore always spill it.
*)
let alloc_arg () =
let res =
match arg_loc !n_arg with
| Alloc.LReg Rcx -> spill ()
| x -> x
in
incr n_arg; res
in
(* The available palette of registers. Excludes Rax and Rcx *)
let pal = LocSet.(caller_save
|> remove (Alloc.LReg Rax)
|> remove (Alloc.LReg Rcx)
)
in
(* Allocates a uid greedily based on liveness information *)
let allocate lo uid =
let loc =
try
let used_locs =
UidSet.fold (fun y -> LocSet.add (List.assoc y lo)) (live.live_in uid) LocSet.empty
in
let available_locs = LocSet.diff pal used_locs in
LocSet.choose available_locs
with
| Not_found -> spill ()
in
Platform.verb @@ Printf.sprintf "allocated: %s <- %s\n" (Alloc.str_loc loc) uid; loc
in
let lo =
fold_fdecl
(fun lo (x, _) -> (x, alloc_arg())::lo)
(fun lo l -> (l, Alloc.LLbl (Platform.mangle l))::lo)
(fun lo (x, i) ->
if insn_assigns i
then (x, allocate lo x)::lo
else (x, Alloc.LVoid)::lo)
(fun lo _ -> lo)
[] f in
{ uid_loc = (fun x -> List.assoc x lo)
; spill_bytes = 8 * !n_spill
}
(* better register allocation ----------------------------------------------- *)
(* TASK: Implement a (correct) register allocation strategy that
outperforms the greedy layout strategy given above, assuming that
the liveness information is calculated using the dataflow analysis
from liveness.ml.
Your implementation does _not_ necessarily have to do full-blown
coalescing graph coloring as described in lecture. You may choose
a simpler strategy. In particular, a non-coalescing graph coloring
algorithm that uses some simple preference heuristics should be
able to beat the greedy algorithm.
To measure the effectiveness of your strategy, our testing infrastructure
uses a simple heuristic to compare it with the 'greedy' strategy given above.
QUALITY HEURISTIC:
The 'quality score' of a register assignment for an x86 program is based
on two things:
- the total number of memory accesses, which is the sum of:
- the number of Ind2 and Ind3 operands
- the number of Push and Pop instructions
- size(p) the total number of instructions in the x86 program
Your goal for register allocation should be to minimize the number of
memory operations and, secondarily, the overall size of the program.
registers.ml provides some helper functions that you can use to
get the size and total number of memory operations in a program. It
also provides a function that computes a histogram of the register usage,
which can be helpful when testing your register allocator.
To see whether your register assignment is better than the greedy one,
we check:
if #mem_ops(yours) < #mem_ops(greedy) then yours is better
otherwise if size(yours) < size(greedy) then yours is better
otherwise greedy wins.
Hints:
- The Datastructures file provides a UidMap that can be used to
create your interference graph.
- It may be useful to understand how this version of the compiler
deals with function calls (see compile_pmov) and what the
greedy allocator does.
- The compiler uses Rax and Rcx in its code generation, so they
are _not_ generally available for your allocator to use.
. other caller_save registers are freely available
. if you want to use callee_save registers you might have to
adjust the code generated by compile_fdecl to save/restore them.
*)
let better_layout (f:Ll.fdecl) (live:liveness) : layout =
failwith "Backend.better_layout not implemented"
(* register allocation options ---------------------------------------------- *)
(* A trivial liveness analysis that conservatively says that every defined
uid is live across every edge. *)
let trivial_liveness (f:Ll.fdecl) : liveness =
let s =
fold_fdecl
(fun s (x, _) -> UidSet.add x s)
(fun s _ -> s)
(fun s (x, i) -> if insn_assigns i then UidSet.add x s else s)
(fun s _ -> s)
UidSet.empty f in
{live_in = (fun _ -> s); live_out = (fun _ -> s)}
let liveness_fn : (Ll.fdecl -> liveness) ref =
ref trivial_liveness
let layout_fn : (Ll.fdecl -> liveness -> layout) ref =
ref no_reg_layout
(* Consistency check for layout, i.e., make sure that a layout does not use the
same location for variables that are live at the same time *)
let check_layout (lay:layout) (live:liveness) (f:Ll.fdecl) =
(* Check that uid is not allocated to the same location as any uid in s *)
let check_disjoint uid s =
let loc = lay.uid_loc uid in
if loc <> LVoid then
UidSet.iter
(fun v -> if v <> uid && loc = (lay.uid_loc v) then
failwith @@
Printf.sprintf
"Invalid layout %s and %s both map to %s"
uid v (Alloc.str_loc loc))
s
in
UidSet.iter
(fun x ->
let live_in = try (live.live_in x) with Not_found -> UidSet.empty in
UidSet.iter (fun y -> check_disjoint y live_in) live_in)
(fold_fdecl
(fun s (x, _) -> UidSet.add x s)
(fun s _ -> s)
(fun s (x, i) -> if insn_assigns i then UidSet.add x s else s)
(fun s _ -> s)
UidSet.empty f)
let set_liveness name =
liveness_fn := match name with
| "trivial" -> trivial_liveness
| "dataflow" -> Liveness.get_liveness
| _ -> failwith "impossible arg"
let set_regalloc name =
layout_fn := match name with
| "none" -> no_reg_layout
| "greedy" -> greedy_layout
| "better" -> better_layout
| _ -> failwith "impossible arg"
(* Compile a function declaration using the chosen liveness analysis
and register allocation strategy. *)
let compile_fdecl tdecls (g:gid) (f:Ll.fdecl) : x86stream =
let liveness = !liveness_fn f in
let layout = !layout_fn f liveness in
(*
Help out students by checking that the layout is correct with
respect to liveness.
*)
let _ = check_layout layout liveness f in
let afdecl = alloc_fdecl layout liveness f in
[L (Platform.mangle g, true)]
>@ lift Asm.[ Pushq, [~%Rbp]
; Movq, [~%Rsp; ~%Rbp] ]
>@ (if layout.spill_bytes <= 0 then [] else
lift Asm.[ Subq, [~$(layout.spill_bytes); ~%Rsp] ])
>@ (compile_fbody tdecls afdecl)
(* compile_gdecl ------------------------------------------------------------ *)
let rec compile_ginit = function
| GNull -> [Quad (Lit 0L)]
| GGid gid -> [Quad (Lbl (Platform.mangle gid))]
| GInt c -> [Quad (Lit c)]
| GString s -> [Asciz s]
| GArray gs
| GStruct gs -> List.(flatten @@ map compile_gdecl gs)
| GBitcast (t1,g,t2) -> compile_ginit g
and compile_gdecl (_, g) = compile_ginit g
(* compile_prog ------------------------------------------------------------- *)
let compile_prog {tdecls; gdecls; fdecls} : X86.prog =
let g = fun (lbl, gdecl) ->
Asm.data (Platform.mangle lbl) (compile_gdecl gdecl)
in
let f = fun (name, fdecl) ->
prog_of_x86stream @@ compile_fdecl tdecls name fdecl
in
(List.map g gdecls)
@ List.(flatten @@ map f fdecls)