Update hw4 to a newer version.
Signed-off-by: jmug <u.g.a.mariano@gmail.com>
This commit is contained in:
parent
07d34c0cd8
commit
b24a264f7e
221 changed files with 846 additions and 746 deletions
445
hw4/frontend.ml
445
hw4/frontend.ml
|
|
@ -1,445 +0,0 @@
|
|||
open Ll
|
||||
open Llutil
|
||||
open Ast
|
||||
|
||||
(* instruction streams ------------------------------------------------------ *)
|
||||
|
||||
(* As in the last project, we'll be working with a flattened representation
|
||||
of LLVMlite programs to make emitting code easier. This version
|
||||
additionally makes it possible to emit elements will be gathered up and
|
||||
"hoisted" to specific parts of the constructed CFG
|
||||
- G of gid * Ll.gdecl: allows you to output global definitions in the middle
|
||||
of the instruction stream. You will find this useful for compiling string
|
||||
literals
|
||||
- E of uid * insn: allows you to emit an instruction that will be moved up
|
||||
to the entry block of the current function. This will be useful for
|
||||
compiling local variable declarations
|
||||
*)
|
||||
|
||||
type elt =
|
||||
| L of Ll.lbl (* block labels *)
|
||||
| I of uid * Ll.insn (* instruction *)
|
||||
| T of Ll.terminator (* block terminators *)
|
||||
| G of gid * Ll.gdecl (* hoisted globals (usually strings) *)
|
||||
| E of uid * Ll.insn (* hoisted entry block instructions *)
|
||||
|
||||
type stream = elt list
|
||||
let ( >@ ) x y = y @ x
|
||||
let ( >:: ) x y = y :: x
|
||||
let lift : (uid * insn) list -> stream = List.rev_map (fun (x,i) -> I (x,i))
|
||||
|
||||
(* Build a CFG and collection of global variable definitions from a stream *)
|
||||
let cfg_of_stream (code:stream) : Ll.cfg * (Ll.gid * Ll.gdecl) list =
|
||||
let gs, einsns, insns, term_opt, blks = List.fold_left
|
||||
(fun (gs, einsns, insns, term_opt, blks) e ->
|
||||
match e with
|
||||
| L l ->
|
||||
begin match term_opt with
|
||||
| None ->
|
||||
if (List.length insns) = 0 then (gs, einsns, [], None, blks)
|
||||
else failwith @@ Printf.sprintf "build_cfg: block labeled %s has\
|
||||
no terminator" l
|
||||
| Some term ->
|
||||
(gs, einsns, [], None, (l, {insns; term})::blks)
|
||||
end
|
||||
| T t -> (gs, einsns, [], Some (Llutil.Parsing.gensym "tmn", t), blks)
|
||||
| I (uid,insn) -> (gs, einsns, (uid,insn)::insns, term_opt, blks)
|
||||
| G (gid,gdecl) -> ((gid,gdecl)::gs, einsns, insns, term_opt, blks)
|
||||
| E (uid,i) -> (gs, (uid, i)::einsns, insns, term_opt, blks)
|
||||
) ([], [], [], None, []) code
|
||||
in
|
||||
match term_opt with
|
||||
| None -> failwith "build_cfg: entry block has no terminator"
|
||||
| Some term ->
|
||||
let insns = einsns @ insns in
|
||||
({insns; term}, blks), gs
|
||||
|
||||
|
||||
(* compilation contexts ----------------------------------------------------- *)
|
||||
|
||||
(* To compile OAT variables, we maintain a mapping of source identifiers to the
|
||||
corresponding LLVMlite operands. Bindings are added for global OAT variables
|
||||
and local variables that are in scope. *)
|
||||
|
||||
module Ctxt = struct
|
||||
|
||||
type t = (Ast.id * (Ll.ty * Ll.operand)) list
|
||||
let empty = []
|
||||
|
||||
(* Add a binding to the context *)
|
||||
let add (c:t) (id:id) (bnd:Ll.ty * Ll.operand) : t = (id,bnd)::c
|
||||
|
||||
(* Lookup a binding in the context *)
|
||||
let lookup (id:Ast.id) (c:t) : Ll.ty * Ll.operand =
|
||||
List.assoc id c
|
||||
|
||||
(* Lookup a function, fail otherwise *)
|
||||
let lookup_function (id:Ast.id) (c:t) : Ll.ty * Ll.operand =
|
||||
match List.assoc id c with
|
||||
| Ptr (Fun (args, ret)), g -> Ptr (Fun (args, ret)), g
|
||||
| _ -> failwith @@ id ^ " not bound to a function"
|
||||
|
||||
let lookup_function_option (id:Ast.id) (c:t) : (Ll.ty * Ll.operand) option =
|
||||
try Some (lookup_function id c) with _ -> None
|
||||
|
||||
end
|
||||
|
||||
(* compiling OAT types ------------------------------------------------------ *)
|
||||
|
||||
(* The mapping of source types onto LLVMlite is straightforward. Booleans and ints
|
||||
are represented as the corresponding integer types. OAT strings are
|
||||
pointers to bytes (I8). Arrays are the most interesting type: they are
|
||||
represented as pointers to structs where the first component is the number
|
||||
of elements in the following array.
|
||||
|
||||
The trickiest part of this project will be satisfying LLVM's rudimentary type
|
||||
system. Recall that global arrays in LLVMlite need to be declared with their
|
||||
length in the type to statically allocate the right amount of memory. The
|
||||
global strings and arrays you emit will therefore have a more specific type
|
||||
annotation than the output of cmp_rty. You will have to carefully bitcast
|
||||
gids to satisfy the LLVM type checker.
|
||||
*)
|
||||
|
||||
let rec cmp_ty : Ast.ty -> Ll.ty = function
|
||||
| Ast.TBool -> I1
|
||||
| Ast.TInt -> I64
|
||||
| Ast.TRef r -> Ptr (cmp_rty r)
|
||||
|
||||
and cmp_rty : Ast.rty -> Ll.ty = function
|
||||
| Ast.RString -> I8
|
||||
| Ast.RArray u -> Struct [I64; Array(0, cmp_ty u)]
|
||||
| Ast.RFun (ts, t) ->
|
||||
let args, ret = cmp_fty (ts, t) in
|
||||
Fun (args, ret)
|
||||
|
||||
and cmp_ret_ty : Ast.ret_ty -> Ll.ty = function
|
||||
| Ast.RetVoid -> Void
|
||||
| Ast.RetVal t -> cmp_ty t
|
||||
|
||||
and cmp_fty (ts, r) : Ll.fty =
|
||||
List.map cmp_ty ts, cmp_ret_ty r
|
||||
|
||||
|
||||
let typ_of_binop : Ast.binop -> Ast.ty * Ast.ty * Ast.ty = function
|
||||
| Add | Mul | Sub | Shl | Shr | Sar | IAnd | IOr -> (TInt, TInt, TInt)
|
||||
| Eq | Neq | Lt | Lte | Gt | Gte -> (TInt, TInt, TBool)
|
||||
| And | Or -> (TBool, TBool, TBool)
|
||||
|
||||
let typ_of_unop : Ast.unop -> Ast.ty * Ast.ty = function
|
||||
| Neg | Bitnot -> (TInt, TInt)
|
||||
| Lognot -> (TBool, TBool)
|
||||
|
||||
(* Compiler Invariants
|
||||
|
||||
The LLVM IR type of a variable (whether global or local) that stores an Oat
|
||||
array value (or any other reference type, like "string") will always be a
|
||||
double pointer. In general, any Oat variable of Oat-type t will be
|
||||
represented by an LLVM IR value of type Ptr (cmp_ty t). So the Oat variable
|
||||
x : int will be represented by an LLVM IR value of type i64*, y : string will
|
||||
be represented by a value of type i8**, and arr : int[] will be represented
|
||||
by a value of type {i64, [0 x i64]}**. Whether the LLVM IR type is a
|
||||
"single" or "double" pointer depends on whether t is a reference type.
|
||||
|
||||
We can think of the compiler as paying careful attention to whether a piece
|
||||
of Oat syntax denotes the "value" of an expression or a pointer to the
|
||||
"storage space associated with it". This is the distinction between an
|
||||
"expression" and the "left-hand-side" of an assignment statement. Compiling
|
||||
an Oat variable identifier as an expression ("value") does the load, so
|
||||
cmp_exp called on an Oat variable of type t returns (code that) generates a
|
||||
LLVM IR value of type cmp_ty t. Compiling an identifier as a left-hand-side
|
||||
does not do the load, so cmp_lhs called on an Oat variable of type t returns
|
||||
and operand of type (cmp_ty t)*. Extending these invariants to account for
|
||||
array accesses: the assignment e1[e2] = e3; treats e1[e2] as a
|
||||
left-hand-side, so we compile it as follows: compile e1 as an expression to
|
||||
obtain an array value (which is of pointer of type {i64, [0 x s]}* ).
|
||||
compile e2 as an expression to obtain an operand of type i64, generate code
|
||||
that uses getelementptr to compute the offset from the array value, which is
|
||||
a pointer to the "storage space associated with e1[e2]".
|
||||
|
||||
On the other hand, compiling e1[e2] as an expression (to obtain the value of
|
||||
the array), we can simply compile e1[e2] as a left-hand-side and then do the
|
||||
load. So cmp_exp and cmp_lhs are mutually recursive. [[Actually, as I am
|
||||
writing this, I think it could make sense to factor the Oat grammar in this
|
||||
way, which would make things clearer, I may do that for next time around.]]
|
||||
|
||||
|
||||
Consider globals7.oat
|
||||
|
||||
/--------------- globals7.oat ------------------
|
||||
global arr = int[] null;
|
||||
|
||||
int foo() {
|
||||
var x = new int[3];
|
||||
arr = x;
|
||||
x[2] = 3;
|
||||
return arr[2];
|
||||
}
|
||||
/------------------------------------------------
|
||||
|
||||
The translation (given by cmp_ty) of the type int[] is {i64, [0 x i64}* so
|
||||
the corresponding LLVM IR declaration will look like:
|
||||
|
||||
@arr = global { i64, [0 x i64] }* null
|
||||
|
||||
This means that the type of the LLVM IR identifier @arr is {i64, [0 x i64]}**
|
||||
which is consistent with the type of a locally-declared array variable.
|
||||
|
||||
The local variable x would be allocated and initialized by (something like)
|
||||
the following code snippet. Here %_x7 is the LLVM IR uid containing the
|
||||
pointer to the "storage space" for the Oat variable x.
|
||||
|
||||
%_x7 = alloca { i64, [0 x i64] }* ;; (1)
|
||||
%_raw_array5 = call i64* @oat_alloc_array(i64 3) ;; (2)
|
||||
%_array6 = bitcast i64* %_raw_array5 to { i64, [0 x i64] }* ;; (3)
|
||||
store { i64, [0 x i64]}* %_array6, { i64, [0 x i64] }** %_x7 ;; (4)
|
||||
|
||||
(1) note that alloca uses cmp_ty (int[]) to find the type, so %_x7 has
|
||||
the same type as @arr
|
||||
|
||||
(2) @oat_alloc_array allocates len+1 i64's
|
||||
|
||||
(3) we have to bitcast the result of @oat_alloc_array so we can store it
|
||||
in %_x7
|
||||
|
||||
(4) stores the resulting array value (itself a pointer) into %_x7
|
||||
|
||||
The assignment arr = x; gets compiled to (something like):
|
||||
|
||||
%_x8 = load { i64, [0 x i64] }*, { i64, [0 x i64] }** %_x7 ;; (5)
|
||||
store {i64, [0 x i64] }* %_x8, { i64, [0 x i64] }** @arr ;; (6)
|
||||
|
||||
(5) load the array value (a pointer) that is stored in the address pointed
|
||||
to by %_x7
|
||||
|
||||
(6) store the array value (a pointer) into @arr
|
||||
|
||||
The assignment x[2] = 3; gets compiled to (something like):
|
||||
|
||||
%_x9 = load { i64, [0 x i64] }*, { i64, [0 x i64] }** %_x7 ;; (7)
|
||||
%_index_ptr11 = getelementptr { i64, [0 x i64] },
|
||||
{ i64, [0 x i64] }* %_x9, i32 0, i32 1, i32 2 ;; (8)
|
||||
store i64 3, i64* %_index_ptr11 ;; (9)
|
||||
|
||||
(7) as above, load the array value that is stored %_x7
|
||||
|
||||
(8) calculate the offset from the array using GEP
|
||||
|
||||
(9) store 3 into the array
|
||||
|
||||
Finally, return arr[2]; gets compiled to (something like) the following.
|
||||
Note that the way arr is treated is identical to x. (Once we set up the
|
||||
translation, there is no difference between Oat globals and locals, except
|
||||
how their storage space is initially allocated.)
|
||||
|
||||
%_arr12 = load { i64, [0 x i64] }*, { i64, [0 x i64] }** @arr ;; (10)
|
||||
%_index_ptr14 = getelementptr { i64, [0 x i64] },
|
||||
{ i64, [0 x i64] }* %_arr12, i32 0, i32 1, i32 2 ;; (11)
|
||||
%_index15 = load i64, i64* %_index_ptr14 ;; (12)
|
||||
ret i64 %_index15
|
||||
|
||||
(10) just like for %_x9, load the array value that is stored in @arr
|
||||
|
||||
(11) calculate the array index offset
|
||||
|
||||
(12) load the array value at the index
|
||||
|
||||
*)
|
||||
|
||||
(* Global initialized arrays:
|
||||
|
||||
There is another wrinkle: To compile global initialized arrays like in the
|
||||
globals4.oat, it is helpful to do a bitcast once at the global scope to
|
||||
convert the "precise type" required by the LLVM initializer to the actual
|
||||
translation type (which sets the array length to 0). So for globals4.oat,
|
||||
the arr global would compile to (something like):
|
||||
|
||||
@arr = global { i64, [0 x i64] }* bitcast
|
||||
({ i64, [4 x i64] }* @_global_arr5 to { i64, [0 x i64] }* )
|
||||
@_global_arr5 = global { i64, [4 x i64] }
|
||||
{ i64 4, [4 x i64] [ i64 1, i64 2, i64 3, i64 4 ] }
|
||||
|
||||
*)
|
||||
|
||||
|
||||
|
||||
(* Some useful helper functions *)
|
||||
|
||||
(* Generate a fresh temporary identifier. Since OAT identifiers cannot begin
|
||||
with an underscore, these should not clash with any source variables *)
|
||||
let gensym : string -> string =
|
||||
let c = ref 0 in
|
||||
fun (s:string) -> incr c; Printf.sprintf "_%s%d" s (!c)
|
||||
|
||||
(* Amount of space an Oat type takes when stored in the satck, in bytes.
|
||||
Note that since structured values are manipulated by reference, all
|
||||
Oat values take 8 bytes on the stack.
|
||||
*)
|
||||
let size_oat_ty (t : Ast.ty) = 8L
|
||||
|
||||
(* Generate code to allocate a zero-initialized array of source type TRef (RArray t) of the
|
||||
given size. Note "size" is an operand whose value can be computed at
|
||||
runtime *)
|
||||
let oat_alloc_array (t:Ast.ty) (size:Ll.operand) : Ll.ty * operand * stream =
|
||||
let ans_id, arr_id = gensym "array", gensym "raw_array" in
|
||||
let ans_ty = cmp_ty @@ TRef (RArray t) in
|
||||
let arr_ty = Ptr I64 in
|
||||
ans_ty, Id ans_id, lift
|
||||
[ arr_id, Call(arr_ty, Gid "oat_alloc_array", [I64, size])
|
||||
; ans_id, Bitcast(arr_ty, Id arr_id, ans_ty) ]
|
||||
|
||||
(* Compiles an expression exp in context c, outputting the Ll operand that will
|
||||
recieve the value of the expression, and the stream of instructions
|
||||
implementing the expression.
|
||||
|
||||
Tips:
|
||||
- use the provided cmp_ty function!
|
||||
|
||||
- string literals (CStr s) should be hoisted. You'll need to make sure
|
||||
either that the resulting gid has type (Ptr I8), or, if the gid has type
|
||||
[n x i8] (where n is the length of the string), convert the gid to a
|
||||
(Ptr I8), e.g., by using getelementptr.
|
||||
|
||||
- use the provided "oat_alloc_array" function to implement literal arrays
|
||||
(CArr) and the (NewArr) expressions
|
||||
|
||||
*)
|
||||
|
||||
let rec cmp_exp (c:Ctxt.t) (exp:Ast.exp node) : Ll.ty * Ll.operand * stream =
|
||||
failwith "cmp_exp not implemented"
|
||||
|
||||
(* Compile a statement in context c with return typ rt. Return a new context,
|
||||
possibly extended with new local bindings, and the instruction stream
|
||||
implementing the statement.
|
||||
|
||||
Left-hand-sides of assignment statements must either be OAT identifiers,
|
||||
or an index into some arbitrary expression of array type. Otherwise, the
|
||||
program is not well-formed and your compiler may throw an error.
|
||||
|
||||
Tips:
|
||||
- for local variable declarations, you will need to emit Allocas in the
|
||||
entry block of the current function using the E() constructor.
|
||||
|
||||
- don't forget to add a bindings to the context for local variable
|
||||
declarations
|
||||
|
||||
- you can avoid some work by translating For loops to the corresponding
|
||||
While loop, building the AST and recursively calling cmp_stmt
|
||||
|
||||
- you might find it helpful to reuse the code you wrote for the Call
|
||||
expression to implement the SCall statement
|
||||
|
||||
- compiling the left-hand-side of an assignment is almost exactly like
|
||||
compiling the Id or Index expression. Instead of loading the resulting
|
||||
pointer, you just need to store to it!
|
||||
|
||||
*)
|
||||
|
||||
let rec cmp_stmt (c:Ctxt.t) (rt:Ll.ty) (stmt:Ast.stmt node) : Ctxt.t * stream =
|
||||
failwith "cmp_stmt not implemented"
|
||||
|
||||
(* Compile a series of statements *)
|
||||
and cmp_block (c:Ctxt.t) (rt:Ll.ty) (stmts:Ast.block) : Ctxt.t * stream =
|
||||
List.fold_left (fun (c, code) s ->
|
||||
let c, stmt_code = cmp_stmt c rt s in
|
||||
c, code >@ stmt_code
|
||||
) (c,[]) stmts
|
||||
|
||||
|
||||
|
||||
(* Adds each function identifer to the context at an
|
||||
appropriately translated type.
|
||||
|
||||
NOTE: The Gid of a function is just its source name
|
||||
*)
|
||||
let cmp_function_ctxt (c:Ctxt.t) (p:Ast.prog) : Ctxt.t =
|
||||
List.fold_left (fun c -> function
|
||||
| Ast.Gfdecl { elt={ frtyp; fname; args } } ->
|
||||
let ft = TRef (RFun (List.map fst args, frtyp)) in
|
||||
Ctxt.add c fname (cmp_ty ft, Gid fname)
|
||||
| _ -> c
|
||||
) c p
|
||||
|
||||
(* Populate a context with bindings for global variables
|
||||
mapping OAT identifiers to LLVMlite gids and their types.
|
||||
|
||||
Only a small subset of OAT expressions can be used as global initializers
|
||||
in well-formed programs. (The constructors starting with C).
|
||||
*)
|
||||
let cmp_global_ctxt (c:Ctxt.t) (p:Ast.prog) : Ctxt.t =
|
||||
failwith "cmp_global_ctxt not implemented"
|
||||
|
||||
(* Compile a function declaration in global context c. Return the LLVMlite cfg
|
||||
and a list of global declarations containing the string literals appearing
|
||||
in the function.
|
||||
|
||||
You will need to
|
||||
1. Allocate stack space for the function parameters using Alloca
|
||||
2. Store the function arguments in their corresponding alloca'd stack slot
|
||||
3. Extend the context with bindings for function variables
|
||||
4. Compile the body of the function using cmp_block
|
||||
5. Use cfg_of_stream to produce a LLVMlite cfg from
|
||||
*)
|
||||
|
||||
let cmp_fdecl (c:Ctxt.t) (f:Ast.fdecl node) : Ll.fdecl * (Ll.gid * Ll.gdecl) list =
|
||||
failwith "cmp_fdecl not implemented"
|
||||
|
||||
(* Compile a global initializer, returning the resulting LLVMlite global
|
||||
declaration, and a list of additional global declarations.
|
||||
|
||||
Tips:
|
||||
- Only CNull, CBool, CInt, CStr, and CArr can appear as global initializers
|
||||
in well-formed OAT programs. Your compiler may throw an error for the other
|
||||
cases
|
||||
|
||||
- OAT arrays are always handled via pointers. A global array of arrays will
|
||||
be an array of pointers to arrays emitted as additional global declarations.
|
||||
*)
|
||||
|
||||
let rec cmp_gexp c (e:Ast.exp node) : Ll.gdecl * (Ll.gid * Ll.gdecl) list =
|
||||
failwith "cmp_gexp not implemented"
|
||||
|
||||
(* Oat internals function context ------------------------------------------- *)
|
||||
let internals = [
|
||||
"oat_alloc_array", Ll.Fun ([I64], Ptr I64)
|
||||
]
|
||||
|
||||
(* Oat builtin function context --------------------------------------------- *)
|
||||
let builtins =
|
||||
[ "array_of_string", cmp_rty @@ RFun ([TRef RString], RetVal (TRef(RArray TInt)))
|
||||
; "string_of_array", cmp_rty @@ RFun ([TRef(RArray TInt)], RetVal (TRef RString))
|
||||
; "length_of_string", cmp_rty @@ RFun ([TRef RString], RetVal TInt)
|
||||
; "string_of_int", cmp_rty @@ RFun ([TInt], RetVal (TRef RString))
|
||||
; "string_cat", cmp_rty @@ RFun ([TRef RString; TRef RString], RetVal (TRef RString))
|
||||
; "print_string", cmp_rty @@ RFun ([TRef RString], RetVoid)
|
||||
; "print_int", cmp_rty @@ RFun ([TInt], RetVoid)
|
||||
; "print_bool", cmp_rty @@ RFun ([TBool], RetVoid)
|
||||
]
|
||||
|
||||
(* Compile a OAT program to LLVMlite *)
|
||||
let cmp_prog (p:Ast.prog) : Ll.prog =
|
||||
(* add built-in functions to context *)
|
||||
let init_ctxt =
|
||||
List.fold_left (fun c (i, t) -> Ctxt.add c i (Ll.Ptr t, Gid i))
|
||||
Ctxt.empty builtins
|
||||
in
|
||||
let fc = cmp_function_ctxt init_ctxt p in
|
||||
|
||||
(* build global variable context *)
|
||||
let c = cmp_global_ctxt fc p in
|
||||
|
||||
(* compile functions and global variables *)
|
||||
let fdecls, gdecls =
|
||||
List.fold_right (fun d (fs, gs) ->
|
||||
match d with
|
||||
| Ast.Gvdecl { elt=gd } ->
|
||||
let ll_gd, gs' = cmp_gexp c gd.init in
|
||||
(fs, (gd.name, ll_gd)::gs' @ gs)
|
||||
| Ast.Gfdecl fd ->
|
||||
let fdecl, gs' = cmp_fdecl c fd in
|
||||
(fd.elt.fname,fdecl)::fs, gs' @ gs
|
||||
) p ([], [])
|
||||
in
|
||||
|
||||
(* gather external declarations *)
|
||||
let edecls = internals @ builtins in
|
||||
{ tdecls = []; gdecls; fdecls; edecls }
|
||||
Loading…
Add table
Add a link
Reference in a new issue