850 lines
29 KiB
OCaml
850 lines
29 KiB
OCaml
(* ll ir compilation -------------------------------------------------------- *)
|
|
open Ll
|
|
open Llutil
|
|
open X86
|
|
module Platform = Util.Platform
|
|
|
|
(* Backend "Layout" compilation strategy ------------------------------------ *)
|
|
|
|
|
|
(* allocated llvmlite function bodies --------------------------------------- *)
|
|
|
|
module Alloc = struct
|
|
|
|
(* X86 locations *)
|
|
type loc =
|
|
| LVoid (* no storage *)
|
|
| LReg of X86.reg (* x86 register *)
|
|
| LStk of int (* a stack slot offset from %rbp (not a byte offset!)*)
|
|
| LLbl of X86.lbl (* an assembler label *)
|
|
|
|
type operand =
|
|
| Null
|
|
| Const of int64
|
|
| Gid of X86.lbl
|
|
| Loc of loc
|
|
|
|
type insn =
|
|
| ILbl of loc
|
|
| PMov of (loc * ty * operand) list
|
|
| Binop of loc * bop * ty * operand * operand
|
|
| Alloca of loc * ty
|
|
| Load of loc * ty * operand
|
|
| Store of ty * operand * operand
|
|
| Icmp of loc * Ll.cnd * ty * operand * operand
|
|
| Call of loc * ty * operand * (ty * operand) list
|
|
| Bitcast of loc * ty * operand * ty
|
|
| Gep of loc * ty * operand * operand list
|
|
| Ret of ty * operand option
|
|
| Br of loc
|
|
| Cbr of operand * loc * loc
|
|
|
|
let str_loc = function
|
|
| LVoid -> "LVoid"
|
|
| LReg r -> X86.string_of_reg r
|
|
| LStk n -> Printf.sprintf "LStk %d" n
|
|
| LLbl l -> l
|
|
|
|
let str_operand = function
|
|
| Null -> "null"
|
|
| Const x -> "Const _"
|
|
| Gid l -> l
|
|
| Loc l -> str_loc l
|
|
|
|
|
|
module LocSet = Set.Make (struct type t = loc let compare = compare end)
|
|
module UidSet = Datastructures.UidS
|
|
|
|
type fbody = (insn * LocSet.t) list
|
|
|
|
let map_operand f g : Ll.operand -> operand = function
|
|
| Null -> Null
|
|
| Const i -> Const i
|
|
| Gid x -> Gid (g x)
|
|
| Id u -> Loc (f u)
|
|
|
|
let map_insn f g : uid * Ll.insn -> insn =
|
|
let mo = map_operand f g in function
|
|
| x, Binop (b,t,o,o') -> Binop (f x, b,t,mo o,mo o')
|
|
| x, Alloca t -> Alloca (f x, t)
|
|
| x, Load (t,o) -> Load (f x, t, mo o)
|
|
| _, Store (t,o,o') -> Store (t, mo o, mo o')
|
|
| x, Icmp (c,t,o,o') -> Icmp (f x, c, t, mo o, mo o')
|
|
| x, Call (t,o,args) -> Call (f x, t, mo o, List.map (fun (t,o) -> t, mo o) args)
|
|
| x, Bitcast (t,o,t') -> Bitcast (f x, t, mo o, t')
|
|
| x, Gep (t,o,is) -> Gep (f x, t, mo o, List.map mo is)
|
|
|
|
let map_terminator f g : uid * Ll.terminator -> insn =
|
|
let mo = map_operand f g in function
|
|
| _, Ret (t,None) -> Ret (t, None)
|
|
| _, Ret (t,Some o) -> Ret (t, Some (mo o))
|
|
| _, Br l -> Br (f l)
|
|
| _, Cbr (o,l,l') -> Cbr (mo o,f l,f l')
|
|
|
|
let map_lset f (s:UidSet.t) : LocSet.t =
|
|
UidSet.fold (fun x t -> LocSet.add (f x) t) s LocSet.empty
|
|
|
|
let of_block
|
|
(f:Ll.uid -> loc)
|
|
(g:Ll.gid -> X86.lbl)
|
|
(live_in:uid -> UidSet.t)
|
|
(b:Ll.block) : fbody =
|
|
List.map (fun (u,i) ->
|
|
(* Uncomment this to enable verbose debugging output... *)
|
|
(* Platform.verb @@ Printf.sprintf
|
|
" * of_block: %s live_in = %s\n" u (UidSet.to_string (live_in u)); *)
|
|
map_insn f g (u,i), map_lset f @@ live_in u) b.insns
|
|
@ let x,t = b.term in
|
|
[map_terminator f g (x,t), map_lset f @@ live_in x]
|
|
|
|
let of_lbl_block f g live_in (l,b:Ll.lbl * Ll.block) : fbody =
|
|
(ILbl (f l), map_lset f @@ live_in l)::of_block f g live_in b
|
|
|
|
let of_cfg
|
|
(f : Ll.uid -> loc)
|
|
(g : Ll.gid -> X86.lbl)
|
|
(live_in : uid -> UidSet.t)
|
|
(e, bs : Ll.cfg) : fbody =
|
|
List.(flatten @@ of_block f g live_in e :: map (of_lbl_block f g live_in) bs)
|
|
|
|
end
|
|
|
|
module LocSet = Alloc.LocSet
|
|
module UidSet = Alloc.UidSet
|
|
|
|
let str_locset (lo:LocSet.t) : string =
|
|
String.concat " " (List.map Alloc.str_loc (LocSet.elements lo))
|
|
|
|
|
|
(* streams of x86 instructions ---------------------------------------------- *)
|
|
|
|
type x86elt =
|
|
| I of X86.ins
|
|
| L of (X86.lbl * bool)
|
|
|
|
type x86stream = x86elt list
|
|
|
|
let lift : X86.ins list -> x86stream =
|
|
List.rev_map (fun i -> I i)
|
|
|
|
let ( >@ ) x y = y @ x
|
|
let ( >:: ) x y = y :: x
|
|
|
|
let prog_of_x86stream : x86stream -> X86.prog =
|
|
let rec loop p iis = function
|
|
| [] -> (match iis with [] -> p | _ -> failwith "stream has no initial label")
|
|
| (I i)::s' -> loop p (i::iis) s'
|
|
| (L (l,global))::s' -> loop ({ lbl=l; global; asm=Text iis }::p) [] s'
|
|
in loop [] []
|
|
|
|
|
|
(* locals and layout -------------------------------------------------------- *)
|
|
|
|
(* The layout for this version of the backend is slightly more complex
|
|
than we saw earlier. It consists of
|
|
- uid_loc a function that maps LL uids to their target x86 locations
|
|
- the number of bytes to be allocated on the stack due to spills
|
|
*)
|
|
|
|
type layout =
|
|
{ uid_loc : uid -> Alloc.loc
|
|
; spill_bytes : int
|
|
}
|
|
|
|
(* The liveness analysis will return a record, with fields live_in and live_out,
|
|
which are functions from uid to the set of variables that are live in (or
|
|
live out) at a given program point denoted by the uid *)
|
|
type liveness = Liveness.liveness
|
|
|
|
(* The set of all caller-save registers available for register allocation *)
|
|
let caller_save : LocSet.t =
|
|
[ Rdi; Rsi; Rdx; Rcx; R09; R08; Rax; R10; R11 ]
|
|
|> List.map (fun r -> Alloc.LReg r) |> LocSet.of_list
|
|
|
|
(* excludes Rbp, Rsp, and Rip, since they have special meanings
|
|
The current backend does not use callee-save registers except in
|
|
the special case of through registers. It uses R15 as a function
|
|
pointer, but ensures that it is saved/restored.
|
|
*)
|
|
let callee_save : LocSet.t =
|
|
[ Rbx; R12; R13; R14; R15 ]
|
|
|> List.map (fun r -> Alloc.LReg r) |> LocSet.of_list
|
|
|
|
let arg_reg : int -> X86.reg option = function
|
|
| 0 -> Some Rdi
|
|
| 1 -> Some Rsi
|
|
| 2 -> Some Rdx
|
|
| 3 -> Some Rcx
|
|
| 4 -> Some R08
|
|
| 5 -> Some R09
|
|
| n -> None
|
|
|
|
let arg_loc (n:int) : Alloc.loc =
|
|
match arg_reg n with
|
|
| Some r -> Alloc.LReg r
|
|
| None -> Alloc.LStk (n-4)
|
|
|
|
let alloc_fdecl (layout:layout) (liveness:liveness) (f:Ll.fdecl) : Alloc.fbody =
|
|
let dst = List.map layout.uid_loc f.f_param in
|
|
let tdst = List.combine (fst f.f_ty) dst in
|
|
let movs = List.mapi (fun i (t,x) -> x, t, Alloc.Loc (arg_loc i)) tdst in
|
|
(Alloc.PMov movs, LocSet.of_list dst)
|
|
:: Alloc.of_cfg layout.uid_loc Platform.mangle liveness.live_in f.f_cfg
|
|
|
|
(* compiling operands ------------------------------------------------------ *)
|
|
|
|
let compile_operand : Alloc.operand -> X86.operand =
|
|
let open Alloc in function
|
|
| Null -> Asm.(~$0)
|
|
| Const i -> Asm.(Imm (Lit i))
|
|
| Gid l -> Asm.(~$$l)
|
|
| Loc LVoid -> failwith "compiling uid without location"
|
|
| Loc (LStk i) -> Asm.(Ind3 (Lit (Int64.of_int @@ i * 8), Rbp))
|
|
| Loc (LReg r) -> Asm.(~%r)
|
|
| Loc (LLbl l) -> Asm.(Ind1 (Lbl l))
|
|
|
|
let emit_mov (src:X86.operand) (dst:X86.operand) : x86stream =
|
|
let open X86 in match src, dst with
|
|
| Imm (Lbl l), Reg _ -> lift Asm.[ Leaq, [Ind3 (Lbl l, Rip); dst ] ]
|
|
| Imm (Lbl l), _ -> lift Asm.[ Leaq, [Ind3 (Lbl l, Rip); ~%Rax ]
|
|
; Movq, [~%Rax; dst ] ]
|
|
| Reg r, Reg r' when r = r' -> []
|
|
| Reg _, _ -> lift Asm.[ Movq, [src; dst] ]
|
|
| _, Reg _ -> lift Asm.[ Movq, [src; dst] ]
|
|
| _, _ -> lift Asm.[ Pushq, [src]; Popq, [dst] ]
|
|
|
|
|
|
(* compiling parallel moves ------------------------------------------------- *)
|
|
|
|
(* Compiles a parallel move instruction into a sequence of moves, pushing and
|
|
popping values to the stack when there are not enough registers to directly
|
|
shuffle the sources to the targets. It uses liveness information to simply
|
|
not move dead operands.
|
|
|
|
The PMov instruction is used at the beginning of a function declaration to
|
|
move the incoming arguments to their destination uids/registers.
|
|
compile_pmov is directly used when compiling a function call to move
|
|
the arguments.
|
|
|
|
Inputs:
|
|
live - the liveness information
|
|
ol - a list of triples of the form (dest, type, src)
|
|
|
|
Note: the destinations are assumed to be distinct, but might also be sources
|
|
|
|
Outputs:
|
|
an x86 instruction stream that (efficiently) moves each src to its
|
|
destination
|
|
|
|
The algorithm works like this:
|
|
1. Filter out the triples in which srcs are dead or already in the right
|
|
place. (none of those need to be moved)
|
|
|
|
Then do a recursive algorithm that processes the remaining list of triples:
|
|
2. See if there are triples of the form (dest, type, src) where dest
|
|
is not also source in some other triple. For each such triple we can
|
|
directly move the src to its dest (which won't "clobber" some other
|
|
source). These are the "ready" moves.
|
|
|
|
3. If there are no "ready" moves to make (i.e. every destination is also
|
|
a source of some other triple), we pick the first triple, push its
|
|
src to the stack, recursively process the remaining list, and then
|
|
pop the stack into the destination.
|
|
|
|
ol ol' 2 2 3 2
|
|
x <- y x <- y w <- x MOV x, w MOV x, w MOV x, w
|
|
z <- z ==> ==> ------ ==> -------- ==> PUSH y ==> PUSH y
|
|
w <- x w <- x x <- y x <- y y <- z MOV z, y
|
|
y <- z y <- z y <- z y <- z POP x POP x
|
|
|
|
*)
|
|
|
|
let compile_pmov live (ol:(Alloc.loc * Ll.ty * Alloc.operand) list) : x86stream =
|
|
let open Alloc in
|
|
let module OpSet = Set.Make (struct type t = operand let compare = compare end) in
|
|
|
|
(* Filter the moves to keep the needed ones:
|
|
The operands that actually need to be moved are those that are
|
|
- not in the right location already, and
|
|
- still live *)
|
|
let ol' = List.filter (fun (x, _, o) -> Loc x <> o && LocSet.mem x live) ol in
|
|
|
|
let rec loop outstream ol =
|
|
(* Find the _set_ of all sources that still need to be moved. *)
|
|
let srcs = List.fold_left (fun s (_, _, o) -> OpSet.add o s) OpSet.empty ol in
|
|
match List.partition (fun (x, _, o) -> OpSet.mem (Loc x) srcs) ol with
|
|
| [], [] -> outstream
|
|
|
|
(* when no moves are ready to be emitted, push onto stack *)
|
|
| (x,_,o)::ol', [] ->
|
|
let os = loop (outstream >:: I Asm.( Pushq, [compile_operand o]))
|
|
ol' in
|
|
os >:: I Asm.( Popq, [compile_operand (Loc x)] )
|
|
|
|
(* when some destination of a move is not also a source *)
|
|
| ol', ready ->
|
|
loop (List.fold_left (fun os (x,_,o) ->
|
|
os >@
|
|
emit_mov (compile_operand o) (compile_operand (Loc x))) outstream ready)
|
|
ol'
|
|
in
|
|
loop [] ol'
|
|
|
|
|
|
(* compiling call ---------------------------------------------------------- *)
|
|
|
|
let compile_call live (fo:Alloc.operand) (os:(ty * Alloc.operand) list) : x86stream =
|
|
let oreg, ostk, _ =
|
|
List.fold_left (fun (oreg, ostk, i) (t, o) ->
|
|
match arg_reg i with
|
|
| Some r -> (Alloc.LReg r, t, o)::oreg, ostk, i+1
|
|
| None -> oreg, o::ostk, i+1
|
|
) ([], [], 0) os in
|
|
let nstack = List.length ostk in
|
|
let live' = LocSet.of_list @@ List.map (fun (r,_,_) -> r) oreg in
|
|
lift (List.map (fun o -> Pushq, [compile_operand o]) ostk)
|
|
>@ compile_pmov (LocSet.union live live') oreg
|
|
>:: I Asm.( Callq, [compile_operand fo] )
|
|
>@ lift (if nstack <= 0 then []
|
|
else Asm.[ Addq, [~$(nstack * 8); ~%Rsp] ])
|
|
|
|
|
|
(* compiling getelementptr (gep) ------------------------------------------- *)
|
|
|
|
let rec size_ty tdecls t : int =
|
|
begin match t with
|
|
| Void | I8 | Fun _ -> 0
|
|
| I1 | I64 | Ptr _ -> 8 (* Target 64-bit only subset of X86 *)
|
|
| Struct ts -> List.fold_left (fun acc t -> acc + (size_ty tdecls t)) 0 ts
|
|
| Array (n, t) -> n * (size_ty tdecls t)
|
|
| Namedt id -> size_ty tdecls (List.assoc id tdecls)
|
|
end
|
|
|
|
(* Compute the size of the offset (in bytes) of the nth element of a region
|
|
of memory whose types are given by the list. Also returns the nth type. *)
|
|
let index_into tdecls (ts:ty list) (n:int) : int * ty =
|
|
let rec loop ts n acc =
|
|
begin match (ts, n) with
|
|
| (u::_, 0) -> (acc, u)
|
|
| (u::us, n) -> loop us (n-1) (acc + (size_ty tdecls u))
|
|
| _ -> failwith "index_into encountered bogus index"
|
|
end
|
|
in loop ts n 0
|
|
|
|
let imm_of_int (n:int) = Imm (Lit (Int64.of_int n))
|
|
|
|
let compile_getelementptr tdecls (t:Ll.ty) (o:Alloc.operand)
|
|
(path: Alloc.operand list) : x86stream =
|
|
|
|
let rec loop ty path (code : x86stream) =
|
|
match (ty, path) with
|
|
| (_, []) -> code
|
|
|
|
| (Struct ts, Alloc.Const n::rest) ->
|
|
let (offset, u) = index_into tdecls ts (Int64.to_int n) in
|
|
loop u rest @@ (
|
|
code >:: I Asm.(Addq, [~$offset; ~%Rax])
|
|
)
|
|
|
|
| (Array(_, u), Alloc.Const n::rest) ->
|
|
(* Statically calculate the offset *)
|
|
let offset = (size_ty tdecls u) * (Int64.to_int n) in
|
|
loop u rest @@ (
|
|
code >:: I Asm.(Addq, [~$offset; ~%Rax])
|
|
)
|
|
|
|
| (Array(_, u), offset_op::rest) ->
|
|
loop u rest @@ (
|
|
code >@
|
|
([I Asm.(Movq, [~%Rax; ~%Rcx])] >@
|
|
(emit_mov (compile_operand offset_op) (Reg Rax)) >@
|
|
[I Asm.(Imulq, [imm_of_int @@ size_ty tdecls u; ~%Rax])] >@
|
|
[I Asm.(Addq, [~%Rcx; ~%Rax])]
|
|
)
|
|
)
|
|
|
|
| (Namedt t, p) -> loop (List.assoc t tdecls) p code
|
|
|
|
| _ -> failwith "compile_gep encountered unsupported getelementptr data" in
|
|
|
|
match t with
|
|
| Ptr t -> loop (Array(0, t)) path (emit_mov (compile_operand o) (Reg Rax))
|
|
| _ -> failwith "compile_gep got incorrect parameters"
|
|
|
|
(* compiling instructions within function bodies ---------------------------- *)
|
|
|
|
|
|
let compile_fbody tdecls (af:Alloc.fbody) : x86stream =
|
|
let rec loop (af:Alloc.fbody) (outstream:x86stream) : x86stream =
|
|
let cb = function
|
|
| Ll.Add -> Addq | Ll.Sub -> Subq | Ll.Mul -> Imulq
|
|
| Ll.Shl -> Shlq | Ll.Lshr -> Shrq | Ll.Ashr -> Sarq
|
|
| Ll.And -> Andq | Ll.Or -> Orq | Ll.Xor -> Xorq in
|
|
let cc = function
|
|
| Ll.Eq -> Set Eq | Ll.Ne -> Set Neq | Ll.Slt -> Set Lt
|
|
| Ll.Sle -> Set Le | Ll.Sgt -> Set Gt | Ll.Sge -> Set Ge in
|
|
let co = compile_operand in
|
|
|
|
let open Alloc in
|
|
match af with
|
|
| [] -> outstream
|
|
|
|
| (ILbl (LLbl l), _)::rest ->
|
|
loop rest @@
|
|
(outstream
|
|
>:: L (l, false) )
|
|
|
|
| (PMov ol, live)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ compile_pmov live ol )
|
|
|
|
| (Icmp (LVoid, _,_,_,_), _)::rest -> loop rest outstream
|
|
| (Binop (LVoid, _,_,_,_), _)::rest -> loop rest outstream
|
|
| (Alloca (LVoid, _), _)::rest -> loop rest outstream
|
|
| (Bitcast (LVoid, _,_,_), _)::rest -> loop rest outstream
|
|
| (Load (LVoid, _,_), _)::rest -> loop rest outstream
|
|
| (Gep (LVoid, _,_,_), _)::rest -> loop rest outstream
|
|
|
|
| (Icmp (x, c,_,Loc (LReg o),o'), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ lift Asm.[ Cmpq, [co o'; ~%o]
|
|
; cc c, [co (Loc x)]
|
|
; Andq, [~$1; co (Loc x)] ] )
|
|
|
|
|
|
| (Icmp (x, c,_,o,o'), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ emit_mov (co o) (Reg Rax)
|
|
>@ lift Asm.[ Cmpq, [co o'; ~%Rax]
|
|
; cc c, [co (Loc x)]
|
|
; Andq, [~$1; co (Loc x)] ] )
|
|
|
|
(* Shift instructions must use Rcx or Immediate as second arg *)
|
|
| (Binop (x, bop,_,o,o'), _)::rest
|
|
when (bop = Shl || bop = Lshr || bop = Ashr)
|
|
->
|
|
loop rest @@
|
|
( outstream
|
|
>@ emit_mov (co o) (Reg Rax)
|
|
>@ emit_mov (co o') (Reg Rcx)
|
|
>@ lift Asm.[ cb bop, [~%Rcx; ~%Rax]
|
|
; Movq, [~%Rax; co (Loc x)] ] )
|
|
|
|
| (Binop (LReg r, bop,_,o,o'), _)::rest
|
|
when Loc (LReg r) = o' &&
|
|
(bop = Add || bop = Mul || bop = And || bop = Or || bop = Xor) ->
|
|
loop rest @@
|
|
( outstream
|
|
>:: I Asm.( cb bop, [co o; ~%r] ) )
|
|
|
|
|
|
| (Binop (LReg r, b,_,o,o'), _)::rest when Loc (LReg r) <> o' ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ emit_mov (co o) (Reg r)
|
|
>:: I Asm.( cb b, [co o'; ~%r] ) )
|
|
|
|
| (Binop (x, b,_,o,o'), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ emit_mov (co o) (Reg Rax)
|
|
>@ lift Asm.[ cb b, [co o'; ~%Rax]
|
|
; Movq, [~%Rax; co (Loc x)] ] )
|
|
|
|
| (Alloca (x, at), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ lift Asm.[ Subq, [~$(size_ty tdecls at); ~%Rsp]
|
|
; Movq, [~%Rsp; co (Loc x)] ] )
|
|
|
|
|
|
| (Bitcast (x, _,o,_), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ emit_mov (co o) (Reg Rax)
|
|
>:: I Asm.( Movq, [~%Rax; co (Loc x)] ) )
|
|
|
|
|
|
| (Load (LReg x, _, Loc (LReg src)), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>:: I Asm.( Movq, [Ind2 src; ~%x] ) )
|
|
|
|
| (Load (x, _, src), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ emit_mov (co src) (Reg Rax)
|
|
>@ lift Asm.[ Movq, [Ind2 Rax; ~%Rax]
|
|
; Movq, [~%Rax; co (Loc x)] ] )
|
|
|
|
| (Store (_,Loc (LReg src),Loc (LReg dst)), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>:: I Asm.( Movq, [~%src; Ind2 dst] ) )
|
|
|
|
| (Store (_,src,dst), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ emit_mov (co src) (Reg Rax)
|
|
>@ emit_mov (co dst) (Reg Rcx)
|
|
>:: I Asm.( Movq, [~%Rax; Ind2 Rcx] ) )
|
|
|
|
| (Gep (x, at,o,os), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ compile_getelementptr tdecls at o os
|
|
>:: I Asm.( Movq, [~%Rax; co (Loc x)] ) )
|
|
|
|
| (Call (x, t,fo,os), live)::rest ->
|
|
(* Corner: fo is Loc (LReg r) and r is used in the calling conventions.
|
|
Then we use R15 to hold the function pointer, saving and restoring it,
|
|
since it is a callee-save register. *)
|
|
let fptr_op, init_fp, restore_fp =
|
|
begin match fo with
|
|
| Loc (LReg (Rdi | Rsi | Rdx | Rcx | R08 | R09)) ->
|
|
Loc (LReg R15),
|
|
[I Asm.(Pushq, [~%R15])] >@ (emit_mov (co fo) (Reg R15)),
|
|
[I Asm.(Popq, [~%R15])]
|
|
| _ -> fo, [], []
|
|
end
|
|
in
|
|
let () = Platform.verb @@ Printf.sprintf "call: %s live = %s\n"
|
|
(str_operand fo) (str_locset live)
|
|
in
|
|
let save = LocSet.(elements @@ inter (remove x live) caller_save) in
|
|
loop rest @@
|
|
( outstream
|
|
>@ init_fp
|
|
>@ lift (List.rev_map (fun x -> Pushq, [co (Loc x)]) save)
|
|
>@ compile_call live fptr_op os
|
|
>@ lift (List.map (fun x -> Popq, [co (Loc x)]) save)
|
|
>@ restore_fp
|
|
>@ (if t = Ll.Void || x = LVoid then []
|
|
else lift Asm.[ Movq, [~%Rax; co (Loc x)] ]) )
|
|
|
|
| (Ret (_,None), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ lift Asm.[ Movq, [~%Rbp; ~%Rsp]
|
|
; Popq, [~%Rbp]
|
|
; Retq, [] ] )
|
|
|
|
| (Ret (_,Some o), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ emit_mov (co o) (Reg Rax)
|
|
>@ lift Asm.[ Movq, [~%Rbp; ~%Rsp]
|
|
; Popq, [~%Rbp]
|
|
; Retq, [] ] )
|
|
|
|
| (Br (LLbl l), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>:: I Asm.( Jmp, [~$$l] ) )
|
|
|
|
| (Cbr (Const i,(LLbl l1),(LLbl l2)), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>:: (if i <> 0L
|
|
then I Asm.( Jmp, [~$$l1] )
|
|
else I Asm.( Jmp, [~$$l2] ) ) )
|
|
|
|
| (Cbr (o,(LLbl l1),(LLbl l2)), _)::rest ->
|
|
loop rest @@
|
|
( outstream
|
|
>@ lift Asm.[ Cmpq, [~$0; co o]
|
|
; J Neq, [~$$l1]
|
|
; Jmp, [~$$l2] ] )
|
|
|
|
| _ -> failwith "codegen failed to find instruction"
|
|
in
|
|
loop af []
|
|
|
|
|
|
(* compile_fdecl ------------------------------------------------------------ *)
|
|
|
|
(* Processes a function declaration by processing each of the subcomponents
|
|
in turn:
|
|
- first fold over the function parameters
|
|
- then fold over the entry block
|
|
- then fold over the subsequent blocks in their listed order
|
|
To fold over a block:
|
|
- fold over the label
|
|
- then the instructions (in block order)
|
|
- then the terminator
|
|
|
|
See the examples no_reg_layout and greedy_layout for how to use this function.
|
|
*)
|
|
let fold_fdecl (f_param : 'a -> uid * Ll.ty -> 'a)
|
|
(f_lbl : 'a -> lbl -> 'a)
|
|
(f_insn : 'a -> uid * Ll.insn -> 'a)
|
|
(f_term : 'a -> uid * Ll.terminator -> 'a)
|
|
(init:'a) (f:Ll.fdecl) : 'a =
|
|
let fold_params ps a =
|
|
List.fold_left f_param a ps in
|
|
let fold_block {insns; term} a =
|
|
f_term (List.fold_left f_insn a insns) term in
|
|
let fold_lbl_block (l,blk) a =
|
|
fold_block blk (f_lbl a l) in
|
|
let fold_lbl_blocks bs a =
|
|
List.fold_left (fun a b -> fold_lbl_block b a) a bs in
|
|
let entry,bs = f.f_cfg in
|
|
init
|
|
|> fold_params (List.combine f.f_param (fst f.f_ty))
|
|
|> fold_block entry
|
|
|> fold_lbl_blocks bs
|
|
|
|
|
|
(* no layout ---------------------------------------------------------------- *)
|
|
(* This register allocation strategy puts all uids into stack
|
|
slots. It does not use liveness information.
|
|
*)
|
|
let insn_assigns : Ll.insn -> bool = function
|
|
| Ll.Call (Ll.Void, _, _) | Ll.Store _ -> false
|
|
| _ -> true
|
|
|
|
let no_reg_layout (f:Ll.fdecl) (_:liveness) : layout =
|
|
let lo, n_stk =
|
|
fold_fdecl
|
|
(fun (lo, n) (x, _) -> (x, Alloc.LStk (- (n + 1)))::lo, n + 1)
|
|
(fun (lo, n) l -> (l, Alloc.LLbl (Platform.mangle l))::lo, n)
|
|
(fun (lo, n) (x, i) ->
|
|
if insn_assigns i
|
|
then (x, Alloc.LStk (- (n + 1)))::lo, n + 1
|
|
else (x, Alloc.LVoid)::lo, n)
|
|
(fun a _ -> a)
|
|
([], 0) f in
|
|
{ uid_loc = (fun x -> List.assoc x lo)
|
|
; spill_bytes = 8 * n_stk
|
|
}
|
|
|
|
(* greedy layout ------------------------------------------------------------ *)
|
|
(* This example register allocation strategy puts the first few uids in
|
|
available registers and spills the rest. It uses liveness information to
|
|
recycle available registers when their current value becomes dead.
|
|
|
|
There is a corner case where we might have to try to allocate a location
|
|
but there is a live variable who's location is unknown! (This can happen
|
|
in a loop... see gcd_euclidean.ll for an example.) In that case, we
|
|
should just spill to avoid conflicts.
|
|
*)
|
|
|
|
let greedy_layout (f:Ll.fdecl) (live:liveness) : layout =
|
|
let n_arg = ref 0 in
|
|
let n_spill = ref 0 in
|
|
|
|
let spill () = (incr n_spill; Alloc.LStk (- !n_spill)) in
|
|
|
|
(* Allocates a destination location for an incoming function parameter.
|
|
Corner case: argument 3, in Rcx occupies a register used for other
|
|
purposes by the compiler. We therefore always spill it.
|
|
*)
|
|
let alloc_arg () =
|
|
let res =
|
|
match arg_loc !n_arg with
|
|
| Alloc.LReg Rcx -> spill ()
|
|
| x -> x
|
|
in
|
|
incr n_arg; res
|
|
in
|
|
(* The available palette of registers. Excludes Rax and Rcx *)
|
|
let pal = LocSet.(caller_save
|
|
|> remove (Alloc.LReg Rax)
|
|
|> remove (Alloc.LReg Rcx)
|
|
)
|
|
in
|
|
|
|
(* Allocates a uid greedily based on liveness information *)
|
|
let allocate lo uid =
|
|
let loc =
|
|
try
|
|
let used_locs =
|
|
UidSet.fold (fun y -> LocSet.add (List.assoc y lo)) (live.live_in uid) LocSet.empty
|
|
in
|
|
let available_locs = LocSet.diff pal used_locs in
|
|
LocSet.choose available_locs
|
|
with
|
|
| Not_found -> spill ()
|
|
in
|
|
Platform.verb @@ Printf.sprintf "allocated: %s <- %s\n" (Alloc.str_loc loc) uid; loc
|
|
in
|
|
|
|
let lo =
|
|
fold_fdecl
|
|
(fun lo (x, _) -> (x, alloc_arg())::lo)
|
|
(fun lo l -> (l, Alloc.LLbl (Platform.mangle l))::lo)
|
|
(fun lo (x, i) ->
|
|
if insn_assigns i
|
|
then (x, allocate lo x)::lo
|
|
else (x, Alloc.LVoid)::lo)
|
|
(fun lo _ -> lo)
|
|
[] f in
|
|
{ uid_loc = (fun x -> List.assoc x lo)
|
|
; spill_bytes = 8 * !n_spill
|
|
}
|
|
|
|
|
|
(* better register allocation ----------------------------------------------- *)
|
|
(* TASK: Implement a (correct) register allocation strategy that
|
|
outperforms the greedy layout strategy given above, assuming that
|
|
the liveness information is calculated using the dataflow analysis
|
|
from liveness.ml.
|
|
|
|
Your implementation does _not_ necessarily have to do full-blown
|
|
coalescing graph coloring as described in lecture. You may choose
|
|
a simpler strategy. In particular, a non-coalescing graph coloring
|
|
algorithm that uses some simple preference heuristics should be
|
|
able to beat the greedy algorithm.
|
|
|
|
To measure the effectiveness of your strategy, our testing infrastructure
|
|
uses a simple heuristic to compare it with the 'greedy' strategy given above.
|
|
|
|
QUALITY HEURISTIC:
|
|
The 'quality score' of a register assignment for an x86 program is based
|
|
on two things:
|
|
- the total number of memory accesses, which is the sum of:
|
|
- the number of Ind2 and Ind3 operands
|
|
- the number of Push and Pop instructions
|
|
|
|
- size(p) the total number of instructions in the x86 program
|
|
|
|
Your goal for register allocation should be to minimize the number of
|
|
memory operations and, secondarily, the overall size of the program.
|
|
|
|
registers.ml provides some helper functions that you can use to
|
|
get the size and total number of memory operations in a program. It
|
|
also provides a function that computes a histogram of the register usage,
|
|
which can be helpful when testing your register allocator.
|
|
|
|
To see whether your register assignment is better than the greedy one,
|
|
we check:
|
|
if #mem_ops(yours) < #mem_ops(greedy) then yours is better
|
|
otherwise if size(yours) < size(greedy) then yours is better
|
|
otherwise greedy wins.
|
|
|
|
Hints:
|
|
- The Datastructures file provides a UidMap that can be used to
|
|
create your interference graph.
|
|
|
|
- It may be useful to understand how this version of the compiler
|
|
deals with function calls (see compile_pmov) and what the
|
|
greedy allocator does.
|
|
|
|
- The compiler uses Rax and Rcx in its code generation, so they
|
|
are _not_ generally available for your allocator to use.
|
|
|
|
. other caller_save registers are freely available
|
|
|
|
. if you want to use callee_save registers you might have to
|
|
adjust the code generated by compile_fdecl to save/restore them.
|
|
*)
|
|
|
|
let better_layout (f:Ll.fdecl) (live:liveness) : layout =
|
|
failwith "Backend.better_layout not implemented"
|
|
|
|
|
|
|
|
(* register allocation options ---------------------------------------------- *)
|
|
(* A trivial liveness analysis that conservatively says that every defined
|
|
uid is live across every edge. *)
|
|
let trivial_liveness (f:Ll.fdecl) : liveness =
|
|
let s =
|
|
fold_fdecl
|
|
(fun s (x, _) -> UidSet.add x s)
|
|
(fun s _ -> s)
|
|
(fun s (x, i) -> if insn_assigns i then UidSet.add x s else s)
|
|
(fun s _ -> s)
|
|
UidSet.empty f in
|
|
{live_in = (fun _ -> s); live_out = (fun _ -> s)}
|
|
|
|
let liveness_fn : (Ll.fdecl -> liveness) ref =
|
|
ref trivial_liveness
|
|
|
|
let layout_fn : (Ll.fdecl -> liveness -> layout) ref =
|
|
ref no_reg_layout
|
|
|
|
(* Consistency check for layout, i.e., make sure that a layout does not use the
|
|
same location for variables that are live at the same time *)
|
|
let check_layout (lay:layout) (live:liveness) (f:Ll.fdecl) =
|
|
(* Check that uid is not allocated to the same location as any uid in s *)
|
|
let check_disjoint uid s =
|
|
let loc = lay.uid_loc uid in
|
|
if loc <> LVoid then
|
|
UidSet.iter
|
|
(fun v -> if v <> uid && loc = (lay.uid_loc v) then
|
|
failwith @@
|
|
Printf.sprintf
|
|
"Invalid layout %s and %s both map to %s"
|
|
uid v (Alloc.str_loc loc))
|
|
s
|
|
in
|
|
UidSet.iter
|
|
(fun x ->
|
|
let live_in = try (live.live_in x) with Not_found -> UidSet.empty in
|
|
UidSet.iter (fun y -> check_disjoint y live_in) live_in)
|
|
(fold_fdecl
|
|
(fun s (x, _) -> UidSet.add x s)
|
|
(fun s _ -> s)
|
|
(fun s (x, i) -> if insn_assigns i then UidSet.add x s else s)
|
|
(fun s _ -> s)
|
|
UidSet.empty f)
|
|
|
|
let set_liveness name =
|
|
liveness_fn := match name with
|
|
| "trivial" -> trivial_liveness
|
|
| "dataflow" -> Liveness.get_liveness
|
|
| _ -> failwith "impossible arg"
|
|
|
|
let set_regalloc name =
|
|
layout_fn := match name with
|
|
| "none" -> no_reg_layout
|
|
| "greedy" -> greedy_layout
|
|
| "better" -> better_layout
|
|
| _ -> failwith "impossible arg"
|
|
|
|
(* Compile a function declaration using the chosen liveness analysis
|
|
and register allocation strategy. *)
|
|
let compile_fdecl tdecls (g:gid) (f:Ll.fdecl) : x86stream =
|
|
let liveness = !liveness_fn f in
|
|
let layout = !layout_fn f liveness in
|
|
(*
|
|
Help out students by checking that the layout is correct with
|
|
respect to liveness.
|
|
*)
|
|
let _ = check_layout layout liveness f in
|
|
let afdecl = alloc_fdecl layout liveness f in
|
|
[L (Platform.mangle g, true)]
|
|
>@ lift Asm.[ Pushq, [~%Rbp]
|
|
; Movq, [~%Rsp; ~%Rbp] ]
|
|
>@ (if layout.spill_bytes <= 0 then [] else
|
|
lift Asm.[ Subq, [~$(layout.spill_bytes); ~%Rsp] ])
|
|
>@ (compile_fbody tdecls afdecl)
|
|
|
|
(* compile_gdecl ------------------------------------------------------------ *)
|
|
|
|
let rec compile_ginit = function
|
|
| GNull -> [Quad (Lit 0L)]
|
|
| GGid gid -> [Quad (Lbl (Platform.mangle gid))]
|
|
| GInt c -> [Quad (Lit c)]
|
|
| GString s -> [Asciz s]
|
|
| GArray gs
|
|
| GStruct gs -> List.(flatten @@ map compile_gdecl gs)
|
|
| GBitcast (t1,g,t2) -> compile_ginit g
|
|
|
|
and compile_gdecl (_, g) = compile_ginit g
|
|
|
|
(* compile_prog ------------------------------------------------------------- *)
|
|
|
|
let compile_prog {tdecls; gdecls; fdecls} : X86.prog =
|
|
let g = fun (lbl, gdecl) ->
|
|
Asm.data (Platform.mangle lbl) (compile_gdecl gdecl)
|
|
in
|
|
|
|
let f = fun (name, fdecl) ->
|
|
prog_of_x86stream @@ compile_fdecl tdecls name fdecl
|
|
in
|
|
(List.map g gdecls)
|
|
@ List.(flatten @@ map f fdecls)
|