490 lines
18 KiB
OCaml
490 lines
18 KiB
OCaml
(* ll ir compilation -------------------------------------------------------- *)
|
|
|
|
open Ll
|
|
open X86
|
|
|
|
module Platform = Util.Platform
|
|
|
|
(* Overview ----------------------------------------------------------------- *)
|
|
|
|
(* We suggest that you spend some time understanding this entire file and
|
|
how it fits with the compiler pipeline before making changes. The suggested
|
|
plan for implementing the compiler is provided on the project web page.
|
|
*)
|
|
|
|
|
|
(* helpers ------------------------------------------------------------------ *)
|
|
|
|
(* Map LL comparison operations to X86 condition codes *)
|
|
let compile_cnd = function
|
|
| Ll.Eq -> X86.Eq
|
|
| Ll.Ne -> X86.Neq
|
|
| Ll.Slt -> X86.Lt
|
|
| Ll.Sle -> X86.Le
|
|
| Ll.Sgt -> X86.Gt
|
|
| Ll.Sge -> X86.Ge
|
|
|
|
let imm_of_int (n:int) = Imm (Lit (Int64.of_int n))
|
|
|
|
(* Compute an indirect address that is a fixed byte offset from %rbp *)
|
|
let rbp_offset (offset:int) : X86.operand =
|
|
let amt = Int64.of_int offset in
|
|
Ind3 (Lit amt, Rbp)
|
|
|
|
(* locals and layout -------------------------------------------------------- *)
|
|
|
|
(* One key problem in compiling the LLVM IR is how to map its local
|
|
identifiers to X86 abstractions. For the best performance, one
|
|
would want to use an X86 register for each LLVM %uid. However,
|
|
since there are an unlimited number of %uids and only 16 registers,
|
|
doing so effectively is quite difficult. We will see later in the
|
|
course how _register allocation_ algorithms can do a good job at
|
|
this.
|
|
|
|
A simpler, but less performant, implementation is to map each %uid
|
|
in the LLVM source to a _stack slot_ (i.e. a region of memory in
|
|
the stack). Since LLVMlite, unlike real LLVM, permits %uid locals
|
|
to store only 64-bit data, each stack slot is an 8-byte value.
|
|
|
|
[ NOTE: For compiling LLVMlite, even i1 data values should be
|
|
represented as a 8-byte quad. This greatly simplifies code
|
|
generation. ]
|
|
|
|
We call the datastructure that maps each %uid to its stack slot a
|
|
'stack layout'. A stack layout maps a uid to an X86 operand for
|
|
accessing its contents. For this compilation strategy, the operand
|
|
is always an offset from %rbp (in bytes) that represents a storage slot in
|
|
the stack.
|
|
*)
|
|
|
|
type layout = (uid * X86.operand) list
|
|
|
|
(* A context contains the global type declarations (needed for getelementptr
|
|
calculations) and a stack layout. *)
|
|
type ctxt = { tdecls : (tid * ty) list
|
|
; layout : layout
|
|
}
|
|
|
|
(* useful for looking up items in tdecls or layouts *)
|
|
let lookup m x = List.assoc x m
|
|
|
|
|
|
(* compiling operands ------------------------------------------------------ *)
|
|
|
|
(* LLVM IR instructions support several kinds of operands.
|
|
|
|
LL local %uids live in stack slots, whereas global ids live at
|
|
global addresses that must be computed from a label. Constants are
|
|
immediately available, and the operand Null is the 64-bit 0 value.
|
|
|
|
NOTE: two important facts about global identifiers:
|
|
|
|
(1) You should use (Platform.mangle gid) to obtain a string
|
|
suitable for naming a global label on your platform (OS X expects
|
|
"_main" while linux expects "main").
|
|
|
|
(2) 64-bit assembly labels are not allowed as immediate operands.
|
|
That is, the X86 code: movq _gid %rax which looks like it should
|
|
put the address denoted by _gid into %rax is not allowed.
|
|
Instead, you need to compute an %rip-relative address using the
|
|
leaq instruction: leaq _gid(%rip) %rax.
|
|
|
|
One strategy for compiling instruction operands is to use a
|
|
designated register (or registers) for holding the values being
|
|
manipulated by the LLVM IR instruction. You might find it useful to
|
|
implement the following helper function, whose job is to generate
|
|
the X86 instruction that moves an LLVM operand into a designated
|
|
destination (usually a register).
|
|
*)
|
|
let compile_operand ctxt dest : Ll.operand -> ins =
|
|
function
|
|
| Null -> Asm.(Movq, [~$0; dest])
|
|
| Const i -> Asm.(Movq, [Imm (Lit i); dest])
|
|
| Gid id -> Asm.(Leaq, [Ind3 (Lbl (Platform.mangle id), Rip); dest])
|
|
| Id id -> Asm.(Movq, [lookup ctxt.layout id; dest])
|
|
|
|
|
|
(* compiling call ---------------------------------------------------------- *)
|
|
|
|
(* You will probably find it helpful to implement a helper function that
|
|
generates code for the LLVM IR call instruction.
|
|
|
|
The code you generate should follow the x64 System V AMD64 ABI
|
|
calling conventions, which places the first six 64-bit (or smaller)
|
|
values in registers and pushes the rest onto the stack. Note that,
|
|
since all LLVM IR operands are 64-bit values, the first six
|
|
operands will always be placed in registers. (See the notes about
|
|
compiling fdecl below.)
|
|
|
|
[ NOTE: It is the caller's responsibility to clean up arguments
|
|
pushed onto the stack, so you must free the stack space after the
|
|
call returns. ]
|
|
|
|
[ NOTE: Don't forget to preserve caller-save registers (only if
|
|
needed). ]
|
|
|
|
[ NOTE: Remember, call can use labels as immediates! You shouldn't
|
|
need to perform any RIP-relative addressing for this one. ]
|
|
*)
|
|
|
|
|
|
let arg_reg : int -> (X86.operand) option = function
|
|
| 0 -> Some (Reg Rdi)
|
|
| 1 -> Some (Reg Rsi)
|
|
| 2 -> Some (Reg Rdx)
|
|
| 3 -> Some (Reg Rcx)
|
|
| 4 -> Some (Reg R08)
|
|
| 5 -> Some (Reg R09)
|
|
| n -> None
|
|
|
|
let compile_call ctxt fop args =
|
|
let op_to_rax = compile_operand ctxt (Reg Rax) in
|
|
let call_code, op = match fop with
|
|
| Gid g -> [], Imm (Lbl (Platform.mangle g))
|
|
| Id _ -> [op_to_rax fop], (Reg Rax)
|
|
| _ -> failwith "call function operand was not a local or global id"
|
|
in
|
|
|
|
let arg_code =
|
|
let (_, register_arg_code, stack_arg_code) =
|
|
List.fold_left (fun (i, r, s) (_,op) ->
|
|
let r, s = match arg_reg i with
|
|
| Some reg -> r @ (op_to_rax op :: Asm.([Movq, [~%Rax; reg]])), s
|
|
| None -> r, (op_to_rax op :: Asm.([Pushq, [~%Rax]])) @ s
|
|
in (i+1, r, s)
|
|
) (0, [], []) args
|
|
in register_arg_code @ stack_arg_code
|
|
in
|
|
|
|
arg_code @ call_code @
|
|
(X86.Callq, [op]) ::
|
|
(if (List.length args) > 6 then
|
|
Asm.([Addq, [imm_of_int (8 * ((List.length args) - 6)); ~%Rsp]])
|
|
else [])
|
|
|
|
(* compiling getelementptr (gep) ------------------------------------------- *)
|
|
|
|
(* The getelementptr instruction computes an address by indexing into
|
|
a datastructure, following a path of offsets. It computes the
|
|
address based on the size of the data, which is dictated by the
|
|
data's type.
|
|
|
|
To compile getelementptr, you must generate x86 code that performs
|
|
the appropriate arithmetic calculations.
|
|
*)
|
|
|
|
(* [size_ty] maps an LLVMlite type to a size in bytes.
|
|
(needed for getelementptr)
|
|
|
|
- the size of a struct is the sum of the sizes of each component
|
|
- the size of an array of t's with n elements is n * the size of t
|
|
- all pointers, I1, and I64 are 8 bytes
|
|
- the size of a named type is the size of its definition
|
|
|
|
- Void, i8, and functions have undefined sizes according to LLVMlite.
|
|
Your function should simply return 0 in those cases
|
|
*)
|
|
let rec size_ty (tdecls:(tid * ty) list) (t:Ll.ty) : int =
|
|
begin match t with
|
|
| Void | I8 | Fun _ -> 0
|
|
| I1 | I64 | Ptr _ -> 8 (* Target 64-bit only subset of X86 *)
|
|
| Struct ts -> List.fold_left (fun acc t -> acc + (size_ty tdecls t)) 0 ts
|
|
| Array (n, t) -> n * (size_ty tdecls t)
|
|
| Namedt id -> size_ty tdecls (lookup tdecls id)
|
|
end
|
|
|
|
(* Compute the size of the offset (in bytes) of the nth element of a region
|
|
of memory whose types are given by the list. Also returns the nth type. *)
|
|
let index_into tdecls (ts:ty list) (n:int) : int * ty =
|
|
let rec loop ts n acc =
|
|
begin match (ts, n) with
|
|
| (u::_, 0) -> (acc, u)
|
|
| (u::us, n) -> loop us (n-1) (acc + (size_ty tdecls u))
|
|
| _ -> failwith "index_into encountered bogus index"
|
|
end
|
|
in loop ts n 0
|
|
|
|
|
|
(* Generates code that computes a pointer value.
|
|
|
|
1. op must be of pointer type: t*
|
|
|
|
2. the value of op is the base address of the calculation
|
|
|
|
3. the first index in the path is treated as the index into an array
|
|
of elements of type t located at the base address
|
|
|
|
4. subsequent indices are interpreted according to the type t:
|
|
|
|
- if t is a struct, the index must be a constant n and it
|
|
picks out the n'th element of the struct. [ NOTE: the offset
|
|
within the struct of the n'th element is determined by the
|
|
sizes of the types of the previous elements ]
|
|
|
|
- if t is an array, the index can be any operand, and its
|
|
value determines the offset within the array.
|
|
|
|
- if t is any other type, the path is invalid
|
|
|
|
5. if the index is valid, the remainder of the path is computed as
|
|
in (4), but relative to the type f the sub-element picked out
|
|
by the path so far
|
|
*)
|
|
let compile_gep (ctxt:ctxt) (op : Ll.ty * Ll.operand) (path: Ll.operand list) : ins list =
|
|
let op_to_rax = compile_operand ctxt (Reg Rax) in
|
|
let rec effective_type = function
|
|
| Namedt id -> effective_type @@ lookup ctxt.tdecls id
|
|
| t -> t
|
|
in
|
|
let rec loop ty path code =
|
|
match (ty, path) with
|
|
| (_, []) -> List.rev code
|
|
|
|
| (Struct ts, Const n::rest) ->
|
|
let (offset, u) = index_into ctxt.tdecls ts (Int64.to_int n) in
|
|
loop u rest @@ Asm.(Addq, [~$offset; ~%Rax])::code
|
|
|
|
| (Array(_, u), Const n::rest) ->
|
|
(* Statically calculate the offset *)
|
|
let offset = (size_ty ctxt.tdecls u) * (Int64.to_int n) in
|
|
loop u rest @@ Asm.(Addq, [~$offset; ~%Rax])::code
|
|
|
|
| (Array(_, u), offset_op::rest) ->
|
|
loop u rest @@
|
|
Asm.([ Addq, [~%Rcx; ~%Rax]
|
|
; Imulq, [imm_of_int @@ size_ty ctxt.tdecls u; ~%Rax] ])
|
|
@ (op_to_rax offset_op) ::
|
|
Asm.(Movq, [~%Rax; ~%Rcx])
|
|
:: code
|
|
|
|
| (Namedt t, p) -> loop (lookup ctxt.tdecls t) p code
|
|
|
|
| _ -> failwith "compile_gep encountered unsupported getelementptr data" in
|
|
|
|
match (effective_type @@ fst op, snd op) with
|
|
| (Ptr t, op) -> loop (Array(0, t)) path [op_to_rax op]
|
|
| _ -> failwith "compile_gep got incorrect parameters"
|
|
|
|
|
|
(* compiling instructions -------------------------------------------------- *)
|
|
|
|
(* The result of compiling a single LLVM instruction might be many x86
|
|
instructions. We have not determined the structure of this code
|
|
for you. Some of the instructions require only a couple of assembly
|
|
instructions, while others require more. We have suggested that
|
|
you need at least compile_operand, compile_call, and compile_gep
|
|
helpers; you may introduce more as you see fit.
|
|
|
|
Here are a few notes:
|
|
|
|
- Icmp: the Setb instruction may be of use. Depending on how you
|
|
compile Cbr, you may want to ensure that the value produced by
|
|
Icmp is exactly 0 or 1.
|
|
|
|
- Load & Store: these need to dereference the pointers. Const and
|
|
Null operands aren't valid pointers. Don't forget to
|
|
Platform.mangle the global identifier.
|
|
|
|
- Alloca: needs to return a pointer into the stack
|
|
|
|
- Bitcast: does nothing interesting at the assembly level
|
|
*)
|
|
let compile_insn (ctxt:ctxt) ((uid:uid), (i:Ll.insn)) : X86.ins list =
|
|
let op_to = compile_operand ctxt in
|
|
let op_to_rax = op_to (Reg Rax) in (* Move the value of op into rax *)
|
|
let op_to_rcx = op_to (Reg Rcx) in (* Move the value of op into rax *)
|
|
let dst = lookup ctxt.layout uid in
|
|
match i with
|
|
| Binop (bop, _t, op1, op2) ->
|
|
let bin op =
|
|
(op_to_rax op1) ::
|
|
(op_to_rcx op2) ::
|
|
Asm.([ op, [~%Rcx; ~%Rax]
|
|
; Movq, [~%Rax; dst] ])
|
|
in
|
|
begin match bop with
|
|
| Ll.Add -> bin Addq
|
|
| Ll.Sub -> bin Subq
|
|
| Ll.Mul -> bin Imulq
|
|
| Ll.Shl -> bin Shlq
|
|
| Ll.Lshr -> bin Shrq
|
|
| Ll.Ashr -> bin Sarq
|
|
| Ll.And -> bin Andq
|
|
| Ll.Or -> bin Orq
|
|
| Ll.Xor -> bin Xorq
|
|
end
|
|
|
|
(* Alloca instructions allocate an fresh stack slot and
|
|
move the address of the newly allocated storage into the
|
|
destination uid. *)
|
|
| Alloca (_t) -> Asm.([ Pushq, [~$0]
|
|
; Movq, [~%Rsp; dst] ])
|
|
|
|
(* Load dereferences the pointer value stored in a local.
|
|
Global and constant pointers don't need indirection. *)
|
|
| Load (_t, op) -> (op_to_rax op) :: Asm.([ Movq, [Ind2 Rax; ~%Rcx]
|
|
; Movq, [~%Rcx; dst] ])
|
|
|
|
(* Store also needs to dereference the destination pointer if it's a global *)
|
|
| Store (_, src, (Id _ as dest)) ->
|
|
(op_to_rcx src) ::
|
|
(op_to_rax dest) :: Asm.([Movq, [~%Rcx; Ind2 Rax]])
|
|
| Store (_, src, Gid gid) ->
|
|
(op_to_rax src) :: Asm.([Movq, [~%Rax; Ind3 (Lbl (Platform.mangle gid), Rip)]])
|
|
| Store (_, _, _) -> failwith "store destination was not a local or global id"
|
|
|
|
(* Treat LL i1 values as quadwords, so zero-out the rest of the bits *)
|
|
| Icmp (cnd, _, op1, op2) -> (op_to_rax op1) ::
|
|
(op_to_rcx op2) ::
|
|
Asm.([ Cmpq, [~%Rcx; ~%Rax]
|
|
; (Set (compile_cnd cnd)), [dst]
|
|
; Andq, [imm_of_int 1; dst] ])
|
|
|
|
| Call(ret_ty, fop, args) ->
|
|
let code = compile_call ctxt fop args in
|
|
code @
|
|
(match ret_ty with
|
|
| Void -> []
|
|
| _ -> Asm.([Movq, [~%Rax; dst]]))
|
|
|
|
(* Bitcast is effectively just a Mov at the assembly level *)
|
|
| Bitcast (_, op, _) -> (op_to_rax op) :: Asm.([Movq, [~%Rax; dst]])
|
|
|
|
(* Defer to the helper function to compute the pointer value *)
|
|
| Gep (t, op, path) ->
|
|
let code = compile_gep ctxt (t, op) path in
|
|
code @ Asm.([ Movq, [~%Rax; dst] ])
|
|
|
|
|
|
(* compiling terminators --------------------------------------------------- *)
|
|
|
|
(* prefix the function name [fn] to a label to ensure that the X86 labels are
|
|
globally unique . *)
|
|
let mk_lbl (fn:string) (l:string) = fn ^ "." ^ l
|
|
|
|
(* Compile block terminators is not too difficult:
|
|
|
|
- Ret should properly exit the function: freeing stack space,
|
|
restoring the value of %rbp, and putting the return value (if
|
|
any) in %rax.
|
|
|
|
- Br should jump
|
|
|
|
- Cbr branch should treat its operand as a boolean conditional
|
|
|
|
[fn] - the name of the function containing this terminator
|
|
*)
|
|
let compile_terminator (fn:string) (ctxt:ctxt) (t:Ll.terminator) : ins list =
|
|
let epilogue = Asm.([ Movq, [~%Rbp; ~%Rsp]
|
|
; Popq, [~%Rbp]
|
|
; Retq, []])
|
|
in match t with
|
|
| Ll.Ret (_, None) -> epilogue
|
|
| Ll.Ret (_, Some o) -> (compile_operand ctxt (Reg Rax) o) :: epilogue
|
|
| Ll.Br l -> Asm.([ Jmp, [~$$(mk_lbl fn l)] ])
|
|
| Ll.Cbr (o, l1, l2) -> (compile_operand ctxt (Reg Rax) o)
|
|
:: Asm.([ Cmpq, [~$0; ~%Rax]
|
|
; J (X86.Neq) , [~$$(mk_lbl fn l1)]
|
|
; Jmp, [~$$(mk_lbl fn l2)]
|
|
])
|
|
|
|
(* compiling blocks --------------------------------------------------------- *)
|
|
|
|
let compile_block (fn:string) (ctxt:ctxt) (blk:Ll.block) : ins list =
|
|
let insns = List.map (compile_insn ctxt) blk.insns |> List.flatten in
|
|
let term = compile_terminator fn ctxt (snd blk.term) in
|
|
insns @ term
|
|
|
|
let compile_lbl_block fn lbl ctxt blk : elem =
|
|
Asm.text (mk_lbl fn lbl) (compile_block fn ctxt blk)
|
|
|
|
|
|
|
|
(* compile_fdecl ------------------------------------------------------------ *)
|
|
let rbp_offset n = Ind3 (Lit (Int64.of_int @@ 8 * n), Rbp)
|
|
|
|
(* This helper function computes the location of the nth incoming
|
|
function argument: either in a register or relative to %rbp,
|
|
according to the calling conventions. You might find it useful for
|
|
compile_fdecl.
|
|
|
|
[ NOTE: the first six arguments are numbered 0 .. 5 ]
|
|
*)
|
|
let arg_loc (n : int) : operand =
|
|
begin match arg_reg n with
|
|
| Some op -> op
|
|
| None -> rbp_offset (n-4)
|
|
end
|
|
|
|
(* We suggest that you create a helper function that computes the
|
|
stack layout for a given function declaration.
|
|
|
|
- each function argument should be copied into a stack slot
|
|
- in this (inefficient) compilation strategy, each local id
|
|
is also stored as a stack slot.
|
|
- see the discussion about locals
|
|
|
|
*)
|
|
let stack_layout (args : uid list) ((block, lbled_blocks):cfg) : layout =
|
|
let lbled_block_isns = List.map (fun (_, blk) -> blk.insns) lbled_blocks in
|
|
let cfg_uids = List.map fst (block.insns @ (List.flatten lbled_block_isns)) in
|
|
List.mapi (fun i uid -> (uid, rbp_offset (-i - 1))) (args @ cfg_uids)
|
|
|
|
(* The code for the entry-point of a function must do several things:
|
|
|
|
- since our simple compiler maps local %uids to stack slots,
|
|
compiling the control-flow-graph body of an fdecl requires us to
|
|
compute the layout (see the discussion of locals and layout)
|
|
|
|
- the function code should also comply with the calling
|
|
conventions, typically by moving arguments out of the parameter
|
|
registers (or stack slots) into local storage space. For our
|
|
simple compilation strategy, that local storage space should be
|
|
in the stack. (So the function parameters can also be accounted
|
|
for in the layout.)
|
|
|
|
- the function entry code should allocate the stack storage needed
|
|
to hold all of the local stack slots.
|
|
*)
|
|
let compile_fdecl (tdecls:(tid * ty) list) (name:string) ({ f_ty; f_param; f_cfg }:fdecl) : prog =
|
|
let entry_name = (Platform.mangle name) in
|
|
let layout = stack_layout f_param f_cfg in
|
|
let init_arg_code =
|
|
(List.mapi (fun i uid -> Asm.([ Movq, [arg_loc i; ~%Rax]
|
|
; Movq, [~%Rax; lookup layout uid] ]) )
|
|
f_param) |> List.flatten
|
|
in
|
|
let ctxt = { tdecls; layout } in
|
|
let tmpsize = 8 * (List.length layout) in
|
|
|
|
let prologue = Asm.([ Pushq, [~%Rbp]
|
|
; Movq, [~%Rsp; ~%Rbp]
|
|
; Subq, [~$tmpsize; ~%Rsp] ])
|
|
@ init_arg_code
|
|
in
|
|
let (entry, body) = f_cfg in
|
|
let entry_insns = compile_block name ctxt entry in
|
|
(Asm.gtext entry_name @@ prologue @ entry_insns) ::
|
|
(List.map (fun (lbl, blk) -> compile_lbl_block name lbl ctxt blk) body)
|
|
|
|
|
|
(* compile_gdecl ------------------------------------------------------------ *)
|
|
(* Compile a global value into an X86 global data declaration and map
|
|
a global uid to its associated X86 label.
|
|
*)
|
|
let rec compile_ginit : ginit -> X86.data list = function
|
|
| GNull -> [Quad (Lit 0L)]
|
|
| GGid gid -> [Quad (Lbl (Platform.mangle gid))]
|
|
| GInt c -> [Quad (Lit c)]
|
|
| GString s -> [Asciz s]
|
|
| GArray gs | GStruct gs -> List.map compile_gdecl gs |> List.flatten
|
|
| GBitcast (_t1,g,_t2) -> compile_ginit g
|
|
|
|
and compile_gdecl (_, g) = compile_ginit g
|
|
|
|
|
|
(* compile_prog ------------------------------------------------------------- *)
|
|
let compile_prog {tdecls; gdecls; fdecls} : X86.prog =
|
|
let g = fun (lbl, gdecl) -> Asm.data (Platform.mangle lbl) (compile_gdecl gdecl) in
|
|
let f = fun (name, fdecl) -> compile_fdecl tdecls name fdecl in
|
|
(List.map g gdecls) @ (List.map f fdecls |> List.flatten)
|