Add all the assignment code.

Signed-off-by: jmug <u.g.a.mariano@gmail.com>
This commit is contained in:
Mariano Uvalle 2025-01-24 18:59:28 -08:00
parent 58c6b1f81c
commit cfe502c598
1277 changed files with 48709 additions and 1 deletions

81
hw6/ll/ll-original.ml Normal file
View file

@ -0,0 +1,81 @@
(* LLVMlite: A simplified subset of the LLVM IR *)
type uid = string (* Local identifiers *)
type gid = string (* Global identifiers *)
type tid = string (* Named types *)
type lbl = string (* Labels *)
(* LLVM IR types *)
type ty =
| Void (* mix of unit/bottom from C *)
| I1 | I8 | I64 (* integer types *)
| Ptr of ty (* t* *)
| Struct of ty list (* { t1, t2, ... , tn } *)
| Array of int * ty (* [ NNN x t ] *)
| Fun of fty (* t1, ..., tn -> tr *)
| Namedt of tid (* named type aliases *)
(* Function type: argument types and return type *)
and fty = ty list * ty
(* Syntactic Values *)
type operand =
| Null (* null pointer *)
| Const of int64 (* integer constant *)
| Gid of gid (* A global identifier *)
| Id of uid (* A local identifier *)
(* Type-annotated operands *)
(* Binary operations *)
type bop = Add | Sub | Mul | Shl | Lshr | Ashr | And | Or | Xor
(* Comparison Operators *)
type cnd = Eq | Ne | Slt | Sle | Sgt | Sge
(* Instructions *)
type insn =
| Binop of bop * ty * operand * operand (* bop ty %o1, %o2 *)
| Alloca of ty (* alloca ty *)
| Load of ty * operand (* load ty %u *)
| Store of ty * operand * operand (* store ty %t, ty* %u *)
| Icmp of cnd * ty * operand * operand (* icmp %s ty %s, %s *)
| Call of ty * operand * (ty * operand) list (* fn(%1, %2, ...) *)
| Bitcast of ty * operand * ty (* bitcast ty1 %u to ty2 *)
| Gep of ty * operand * operand list (* getelementptr ty* %u, i64 %vi, ... *)
(* Block terminators *)
type terminator =
| Ret of ty * operand option (* ret i64 %s *)
| Br of lbl (* br label %lbl *)
| Cbr of operand * lbl * lbl (* br i1 %s, label %l1, label %l2 *)
(* Basic blocks *)
type block = { insns: (uid * insn) list; terminator: uid * terminator }
(* Control Flow Graph: a pair of an entry block and a set labeled blocks *)
type cfg = block * (lbl * block) list
(* Function declarations *)
type fdecl = { fty: fty; param: uid list; cfg: cfg }
(* Initializers for global data *)
type ginit =
| GNull (* null literal *)
| GGid of gid (* reference another global *)
| GInt of int64 (* global integer value *)
| GString of string (* constant global string *)
| GArray of gdecl list (* global array *)
| GStruct of gdecl list (* global struct *)
(* Global declaration *)
and gdecl = ty * ginit
(* LLVMlite programs *)
type prog =
{ tdecls: (tid * ty) list (* named types *)
; gdecls: (gid * gdecl) list (* global data *)
; fdecls: (gid * fdecl) list (* code *)
; edecls: (gid * ty) list (* external declarations *)
}

98
hw6/ll/ll.ml Normal file
View file

@ -0,0 +1,98 @@
(* LLVMlite: A simplified subset of LLVM IR *)
(* Local identifiers *)
type uid = string
(* Global identifiers *)
type gid = string
(* Named types *)
type tid = string
(* Labels *)
type lbl = string
(* LLVM types *)
type ty =
| Void
| I1
| I8
| I64
| Ptr of ty
| Struct of ty list
| Array of int * ty
| Fun of ty list * ty
| Namedt of tid
(* Function type: argument types and return type *)
type fty = ty list * ty
(* Syntactic Values *)
type operand =
| Null
| Const of int64
| Gid of gid
| Id of uid
(* Binary i64 Operations *)
type bop =
| Add
| Sub
| Mul
| Shl
| Lshr
| Ashr
| And
| Or
| Xor
(* Comparison Operators *)
type cnd =
| Eq
| Ne
| Slt
| Sle
| Sgt
| Sge
(* Instructions *)
type insn =
| Binop of bop * ty * operand * operand
| Alloca of ty
| Load of ty * operand
| Store of ty * operand * operand
| Icmp of cnd * ty * operand * operand
| Call of ty * operand * (ty * operand) list
| Bitcast of ty * operand * ty
| Gep of ty * operand * operand list
type terminator =
| Ret of ty * operand option
| Br of lbl
| Cbr of operand * lbl * lbl
(* Basic Blocks *)
type block = { insns : (uid * insn) list; term : (uid * terminator) }
(* Control Flow Graphs: entry and labeled blocks *)
type cfg = block * (lbl * block) list
(* Function Declarations *)
type fdecl = { f_ty : fty; f_param : uid list; f_cfg : cfg }
(* Global Data Initializers *)
type ginit =
| GNull
| GGid of gid
| GInt of int64
| GString of string
| GArray of (ty * ginit) list
| GStruct of (ty * ginit) list
| GBitcast of ty * ginit * ty
(* Global Declarations *)
type gdecl = ty * ginit
(* LLVMlite Programs *)
type prog = { tdecls : (tid * ty) list; gdecls : (gid * gdecl) list;
fdecls : (gid * fdecl) list; edecls : (gid * ty) list }

470
hw6/ll/llinterp.ml Normal file
View file

@ -0,0 +1,470 @@
open Ll
open Llutil
(* LLVMlite interpreter *)
type mid = int (* memory block id *)
type fid = int (* stack frame id *)
type idx = int (* index *)
(* Memory block identifier *)
type bid = GlobId of gid
| HeapId of mid
| StckId of fid
| NullId
(* Pointers are tagged with a description of the block they reference
offsets are represented as paths into memory values *)
type ptr = ty * bid * idx list
(* "Simple" or stack values *)
type sval =
| VUndef
| VInt of int64
| VPtr of ptr
(* Memory values *)
type mtree = MWord of sval
| MStr of string
| MNode of mval
and mval = mtree list
(* Locals *)
type locals = uid -> sval
(* The memory state *)
type config =
{ globals : (gid * mval) list
; heap : (mid * mval) list
; stack : (fid * mval) list
}
(* Create memory value for global declaration *)
let mval_of_gdecl (gd:gdecl) : mval =
let rec mtree_of_gdecl : gdecl -> mtree = function
| ty, GNull -> MWord (VPtr (ty, NullId, [0]))
| ty, GGid g -> MWord (VPtr (ty, GlobId g, [0]))
| _, GBitcast (t1, v,t2) -> mtree_of_gdecl (t1, v)
| _, GInt i -> MWord (VInt i)
| _, GString s -> MStr s
| _, GArray gs
| _, GStruct gs -> MNode (List.map mtree_of_gdecl gs)
in [mtree_of_gdecl gd]
(* Create fully undefined memory value for a type *)
let mval_of_ty (nt:tid -> ty) (t:ty) : mval =
let rec mtree_of_ty : ty -> mtree = function
| I1 | I8 | I64 | Ptr _ -> MWord VUndef
| Array (n, I8) -> MStr (String.make n '\x00')
| Array (n, t) -> MNode Array.(make n (MWord VUndef) |> to_list)
| Struct ts -> MNode (List.map mtree_of_ty ts)
| Fun _ | Void -> failwith "mval_of_ty: mval for bad type"
| Namedt id -> mtree_of_ty (nt id)
in [mtree_of_ty t]
(* Printing machine states *)
let mapcat s f l = String.concat s @@ List.map f l
let prefix p f a = p ^ f a
let ( ^. ) s t = if s = "" || t = "" then "" else s ^ t
let pp = Printf.sprintf
let string_of_bid = function
| GlobId gid -> "@" ^ gid
| HeapId mid -> "M" ^ (string_of_int mid)
| StckId fid -> "S" ^ (string_of_int fid)
| NullId -> "null"
let string_of_ptr (t, b, i) =
pp "%s %s %s" (string_of_ty t) (string_of_bid b)
(mapcat ", " string_of_int i)
let string_of_sval (sv:sval) : string =
match sv with
| VUndef -> "undef"
| VInt x -> Int64.to_string x
| VPtr p -> string_of_ptr p
let rec string_of_mval (mv:mval) : string =
"[" ^ (mapcat " " string_of_mtree mv) ^ "]"
and string_of_mtree = function
| MWord sv -> string_of_sval sv
| MStr s -> "\"" ^ s ^ "\""
| MNode m -> string_of_mval m
(* Varieties of undefined behavior. Can be raised by load/store *)
exception OOBIndexDeref (* mem access not in bounds of an array *)
exception NullPtrDeref (* deref Null *)
exception UndefPtrDeref (* deref Undef pointer (from bad GEP) *)
exception IncompatTagDeref (* deref pointer at wrong type (bad bitcast) *)
exception UndefMemDeref (* read uninitialized memory *)
exception UninitMemLoad (* uninitialized memory load *)
exception UseAfterFree (* deref freed stack/heap memory *)
(* Arithmetic operations are all signed 64bit 2s compliment (mod In64.max_int) *)
let interp_bop (b:bop) (v1:sval) (v2:sval) : sval =
let i, j = match v1, v2 with
| VInt i, VInt j -> i, j
| _ -> invalid_arg "interp_bop"
in
let open Int64 in
let f = match b with
| Add -> add | Sub -> sub | Mul -> mul
| And -> logand | Or -> logor | Xor -> logxor
| Shl -> fun i j -> shift_left i @@ to_int j
| Lshr -> fun i j -> shift_right_logical i @@ to_int j
| Ashr -> fun i j -> shift_right i @@ to_int j
in VInt (f i j)
let interp_cnd (c:cnd) (v1:sval) (v2:sval) =
let f = match c with
| Eq -> (=) | Ne -> (<>) | Slt -> (<)
| Sle -> (<=) | Sgt -> (>) | Sge -> (>=)
in
match v1, v2, c with
| VPtr (_,b1,i1), VPtr (_,b2,i2), Eq
| VPtr (_,b1,i1), VPtr (_,b2,i2), Ne ->
VInt (if f (b1,i1) (b2,i2) then 1L else 0L)
| VInt i, VInt j, _ ->
VInt (if f i j then 1L else 0L)
| _ -> invalid_arg "interp_cnd"
let interp_i1 : sval -> bool = function
| VInt 0L -> false
| VInt 1L -> true
| _ -> invalid_arg "interp_i1"
let rec interp_operand (nt:tid -> ty) (locs:locals) (ty:ty) (o:operand) : sval =
match ty, o with
| I64, Const i -> VInt i
| Ptr ty, Null -> VPtr (ty, NullId, [0])
| Ptr ty, Gid g -> VPtr (ty, GlobId g, [0])
| _, Id u -> locs u
| Namedt id, o -> interp_operand nt locs (nt id) o
| _ -> failwith @@ "interp_operand: malformed operand " ^ soo o ^ ":" ^ sot ty
(* Some utility functions *)
let update f k v = fun k' -> if k = k' then v else f k'
let rec is_prefix (l:'a list) (m:'a list) : bool =
match l, m with
| [], _ -> true
| _, [] -> false
| a::l, b::m -> a = b && is_prefix l m
let replace_assoc (l:('a * 'b) list) (a:'a) (b:'b) : ('a * 'b) list =
let rec loop acc = function
| [] -> List.rev @@ (a,b)::acc
| (a',_)::l' when a = a' -> List.rev_append acc @@ (a,b):: l'
| e::l' -> loop (e::acc) l'
in
loop [] l
let replace_nth (l:'a list) (n:int) (a:'a) : 'a list =
let rec loop acc n = function
| [] -> raise Not_found
| a'::l' -> if n = 0 then List.rev_append acc (a::l')
else loop (a'::acc) (pred n) l'
in
loop [] n l
(* Memory access *)
let rec load_idxs (m:mval) (idxs:idx list) : mtree =
match idxs with
| [] -> MNode m
| i::idxs' ->
let len = List.length m in
if len <= i || i < 0 then raise OOBIndexDeref else
match idxs', List.nth m i with
| [], mt -> mt
| [0], MStr s -> MStr s (* [n x i8]* %p and gep [n x i8]* %p, 0, 0 alias *)
| _, MWord v -> failwith "load_idxs: attempted to index into word"
| _, MStr _ -> failwith "load_idxs: attempted to index into string"
| _, MNode m' -> load_idxs m' idxs'
let rec store_idxs (m:mval) (idxs:idx list) (mt:mtree) : mval =
match idxs with
| [] -> [mt]
| i::idxs' ->
let len = List.length m in
if len <= i || i < 0 then raise OOBIndexDeref else
match idxs', List.nth m i with
| [], _ -> replace_nth m i mt
| _, MWord v -> failwith "store_idxs: attempted to index into word"
| _, MStr _ -> failwith "store_idxs: attempted to index into string"
| _, MNode m' -> replace_nth m i @@ MNode (store_idxs m' idxs' mt)
let load_bid (c:config) (bid:bid) : mval =
match bid with
| NullId -> raise NullPtrDeref
| HeapId mid ->
(try List.assoc mid c.heap
with Not_found -> raise UseAfterFree)
| GlobId gid ->
(try List.assoc gid c.globals
with Not_found -> failwith "Load: bogus gid")
| StckId fid ->
(try List.assoc fid c.stack
with Not_found -> raise UseAfterFree)
let load_ptr (c:config) (_, bid, idxs:ptr) : mtree =
load_idxs (load_bid c bid) idxs
let store_ptr (c:config) (_, bid, idxs:ptr) (mt:mtree) : config =
let mval = load_bid c bid in
match bid with
| NullId -> raise NullPtrDeref
| HeapId mid ->
let mval' = store_idxs mval idxs mt in
let heap = replace_assoc c.heap mid mval' in
{c with heap}
| GlobId gid ->
let mval' = store_idxs mval idxs mt in
let globals = replace_assoc c.globals gid mval' in
{c with globals}
| StckId fid ->
let frame' = store_idxs mval idxs mt in
let stack = replace_assoc c.stack fid frame' in
{c with stack}
(* Tag and GEP implementation *)
let effective_tag (nt:tid -> ty) (tag, _, idxs :ptr) : ty =
let rec loop tag idxs =
match tag, idxs with
| t, [] -> t
| Struct ts, i::idxs' -> if List.length ts <= i
then failwith "effective_tag: index oob of struct"
else loop (List.nth ts i) idxs'
| Array (n, t), i::idxs' -> loop t idxs' (* Don't check if OOB! *)
| Namedt id, _ -> loop (nt id) idxs
| _, _::_ -> failwith "effective_tag: index into non-aggregate"
in
loop tag @@ try List.tl idxs
with Failure _ -> failwith "effective_tag: invalid pointer missing first index"
let rec gep_idxs (p:idx list) (idxs:idx list) : idx list =
match p, idxs with
| [], _ | _, [] -> failwith "gep_idxs: invalid indices"
| [i], j::idxs' -> i+j::idxs'
| i::p', _ -> i::gep_idxs p' idxs
let legal_gep (nt:tid -> ty) (sty:ty) (tag:ty) : bool =
let rec ptrtoi8 : ty -> ty = function
| Ptr _ -> Ptr I8
| Struct ts -> Struct (List.map ptrtoi8 ts)
| Array (n, t) -> Array (n, ptrtoi8 t)
| Namedt id -> ptrtoi8 (nt id)
| t -> t
in
let rec flatten_ty : ty -> ty list = function
| Struct ts -> List.(concat @@ map flatten_ty ts)
| t -> [t]
in
is_prefix (flatten_ty @@ ptrtoi8 sty) (flatten_ty @@ ptrtoi8 tag)
let gep_ptr (nt:tid -> ty) (ot:ty) (p:ptr) (idxs':idx list) : sval =
if not (legal_gep nt ot @@ effective_tag nt p) then VUndef else
match p with
| t, NullId, idxs -> VUndef
| t, bid, idxs ->
VPtr (t, bid, gep_idxs idxs idxs')
(* LLVMlite reference interpreter *)
let interp_prog {tdecls; gdecls; fdecls} (args:string list) : sval =
let globals = List.map (fun (g,gd) -> g,mval_of_gdecl gd) gdecls in
let nt (id:tid) : ty =
try List.assoc id tdecls
with Not_found -> failwith @@ "interp_prog: undefined named type " ^ id
in
let interp_op = interp_operand nt in
let next_id : unit -> fid =
let c = ref 0 in
fun () -> c := succ !c; !c
in
(* Global identifiers that will be interpreted as external functions *)
let runtime_fns = [ "ll_puts"; "ll_strcat"; "ll_ltoa" ]
in
(* External function implementation *)
let runtime_call (t:ty) (fn:gid) (args:sval list) (c:config) : config * sval =
let load_strptr p = match load_ptr c p with
| MStr s -> s
| _ -> failwith "runtime_call: non string-ptr arg"
in
match t, fn, args with
| Void, "ll_puts", [VPtr p] ->
let s = load_strptr p in
print_endline s;
c, VUndef
| Ptr t, "ll_strcat", [VPtr ps1; VPtr ps2] ->
let s1 = load_strptr ps1 in
let s2 = load_strptr ps2 in
let mid = next_id () in
let mv = [MStr (s1 ^ s2)] in
let heap = (mid, mv)::c.heap in
{c with heap}, VPtr (t, HeapId mid, [0])
| I64, "ll_ltoa", [VInt i; VPtr dst] ->
let mid = next_id () in
let mv = [MStr (Int64.to_string i)] in
let heap = (mid, mv)::c.heap in
{c with heap}, VPtr (t, HeapId mid, [0])
| _ -> failwith @@ "runtime_call: invalid call to " ^ fn
in
(* Interprety the body of a function *)
let rec interp_cfg (k, blocks:cfg) (locs:locals) (c:config) : config * sval =
match k.insns, k.term with
| (u, Binop (b, t, o1, o2))::insns, _ ->
let v1 = interp_op locs t o1 in
let v2 = interp_op locs t o2 in
let vr = interp_bop b v1 v2 in
let locs' = update locs u vr in
interp_cfg ({k with insns}, blocks) locs' c
| (u, Icmp (cnd, t, o1, o2))::insns, _ ->
let v1 = interp_op locs t o1 in
let v2 = interp_op locs t o2 in
let vr = interp_cnd cnd v1 v2 in
let locs' = update locs u vr in
interp_cfg ({k with insns}, blocks) locs' c
| (u, Alloca ty)::insns, _ ->
begin match c.stack with
| [] -> failwith "stack empty"
| (fid,frame)::stack' ->
let ptr = VPtr (ty, StckId fid, [List.length frame]) in
let stack = (fid, frame @ [MWord VUndef])::stack' in
let locs' = update locs u ptr in
interp_cfg ({k with insns}, blocks) locs' {c with stack}
end
| (u, Load (Ptr t, o))::insns, _ ->
let mt = match interp_op locs (Ptr t) o with
| VPtr p ->
if effective_tag nt p <> t
then raise IncompatTagDeref
else load_ptr c p
| VUndef -> raise UndefPtrDeref
| VInt _ -> failwith "non-ptr arg for load"
in
let v = match mt with
| MWord VUndef -> raise UninitMemLoad
| MWord v -> v
| _ -> failwith "load: returned aggregate"
in
let locs' = update locs u v in
interp_cfg ({k with insns}, blocks) locs' c
| (_, Store (t, os, od))::insns, _ ->
let vs = interp_op locs t os in
let vd = interp_op locs (Ptr t) od in
let c' = match vd with
| VPtr p ->
if effective_tag nt p <> t
then raise IncompatTagDeref
else store_ptr c p (MWord vs)
| VUndef -> raise UndefPtrDeref
| VInt _ -> failwith "non-vptr arg for load"
in
interp_cfg ({k with insns}, blocks) locs c'
| (u, Call (t, ofn, ato))::insns, _ ->
let ats, aos = List.split ato in
let ft = Ptr (Fun (ats, t)) in
let g = match interp_op locs ft ofn with
| VPtr (_, GlobId g, [0]) -> g
| _ -> failwith "bad call arg"
in
let args = List.map2 (interp_op locs) ats aos in
let c', r = interp_call t g args c in
let locs' = update locs u r in
interp_cfg ({k with insns}, blocks) locs' c'
| (u, Bitcast (t1, o, _))::insns, _ ->
let v = interp_op locs t1 o in
let locs' = update locs u v in
interp_cfg ({k with insns}, blocks) locs' c
| (u, Gep (Ptr t, o, os))::insns, _ ->
let idx_of_sval : sval -> idx = function
| VInt i -> Int64.to_int i
| _ -> failwith "idx_of_sval: non-integer value"
in
let vs = List.map (interp_op locs I64) os in
let idxs' = List.map idx_of_sval vs in
let v' = match interp_op locs (Ptr t) o with
| VPtr p -> gep_ptr nt t p idxs'
| VUndef -> VUndef
| VInt _ -> failwith "non-ptr arg for gep"
in
let locs' = update locs u v' in
interp_cfg ({k with insns}, blocks) locs' c
| [], (_, Ret (_, None)) ->
{c with stack = List.tl c.stack}, VUndef
| [], (_, Ret (t, Some o)) ->
{c with stack = List.tl c.stack}, interp_op locs t o
| [], (_, Br l) ->
let k' = List.assoc l blocks in
interp_cfg (k', blocks) locs c
| [], (_, Cbr (o, l1, l2)) ->
let v = interp_op locs I1 o in
let l' = if interp_i1 v then l1 else l2 in
let k' = List.assoc l' blocks in
interp_cfg (k', blocks) locs c
| (u,i)::_, _ -> failwith @@ "interp_cfg: invalid instruction \""
^ string_of_insn i ^ "\" at %" ^ u
and interp_call (ty:ty) (fn:gid) (args:sval list) (c:config) : config * sval =
if List.mem fn runtime_fns
then runtime_call ty fn args c
else
let {f_param; f_cfg} = try List.assoc fn fdecls
with Not_found -> failwith @@ "interp_call: undefined function " ^ fn
in
if List.(length f_param <> length args) then
failwith @@ "interp_call: wrong no. arguments for " ^ fn;
let init_locs l = failwith @@ "interp_call: undefined local " ^ l in
let locs = List.fold_left2 update init_locs f_param args in
let stack = (next_id(), [])::c.stack in
interp_cfg f_cfg locs {c with stack}
in
let mkarg a (p,h) =
let id = next_id () in
VPtr (I8, HeapId id, [0])::p, (id, [MStr a])::h
in
let ptrs, heap = List.fold_right mkarg ("LLINTERP"::args) ([],[]) in
let narg = List.length args + 1 in
let argc = VInt (Int64.of_int @@ narg) in
let aid = next_id () in
let argv = VPtr (Array (narg, Ptr I8), HeapId aid, [0; 0]) in
let amval = List.map (fun p -> MWord p) ptrs in
let heap = (aid, [MNode amval])::heap in
let _, r = interp_call I64 "main" [argc; argv] {globals; heap; stack=[]} in
r

83
hw6/ll/lllexer.mll Normal file
View file

@ -0,0 +1,83 @@
{ open Lexing
open Llparser
exception SyntaxError of string
}
let newline = '\n' | ('\r' '\n') | '\r'
let whitespace = ['\t' ' ']
let lowercase = ['a'-'z']
let uppercase = ['A'-'Z']
let character = lowercase | uppercase
let digit = '-'? ['0'-'9']
let identifier = (character | digit | '_' ) (character | digit | '_' | '.')*
rule token = parse
| eof { EOF }
| whitespace+ { token lexbuf }
| newline+ { token lexbuf }
| "c\"" { read_string (Buffer.create 17) lexbuf }
| '*' { STAR }
| ',' { COMMA }
| ':' { COLON }
| '=' { EQUALS }
| '(' { LPAREN }
| ')' { RPAREN }
| '{' { LBRACE }
| '}' { RBRACE }
| '[' { LBRACKET }
| ']' { RBRACKET }
| "i1" { I1 }
| "i8" { I8 }
| "i32" { I32 }
| "i64" { I64 }
| "to" { TO }
| "br" { BR }
| "eq" { EQ }
| "ne" { NE }
| "or" { OR }
| "and" { AND }
| "add" { ADD }
| "sub" { SUB }
| "mul" { MUL }
| "xor" { XOR }
| "slt" { SLT }
| "sle" { SLE }
| "sgt" { SGT }
| "sge" { SGE }
| "shl" { SHL }
| "ret" { RET }
| "getelementptr" { GEP }
| "type" { TYPE }
| "null" { NULL }
| "lshr" { LSHR }
| "ashr" { ASHR }
| "call" { CALL }
| "icmp" { ICMP }
| "void" { VOID }
| "load" { LOAD }
| "entry" { ENTRY }
| "store" { STORE }
| "label" { LABEL }
| "global" { GLOBAL }
| "define" { DEFINE }
| "declare" { DECLARE }
| "external" { EXTERNAL }
| "alloca" { ALLOCA }
| "bitcast" { BITCAST }
| '%' ('.' ?) (identifier as i) { UID i }
| '@' ('.' ?) (identifier as i) { GID i }
| "x" { CROSS } (* for Array types *)
| digit+ as d { INT (int_of_string d) }
| identifier as i { LBL i }
| ";" ([^ '\n' '\r'])* newline { token lexbuf } (* comments *)
| "declare" ([^ '\n' '\r'])* newline { token lexbuf } (* declare acts as a comment for our IR *)
| _ as c { raise @@ SyntaxError ("Unexpected character: " ^ Char.escaped c) }
and read_string buf = parse
| '\\' "00" '"' { STRING (Buffer.contents buf) }
| '\\' { Buffer.add_char buf '\\'; read_string buf lexbuf }
| [^ '"' '\\']+ { Buffer.add_string buf (Lexing.lexeme lexbuf)
; read_string buf lexbuf }
| _ { raise (SyntaxError ("Illegal string character: " ^ Lexing.lexeme lexbuf)) }
| eof { raise (SyntaxError ("String is not terminated")) }

298
hw6/ll/llparser.mly Normal file
View file

@ -0,0 +1,298 @@
%{ open Ll
open Llutil.Parsing
%}
(* Symbols *)
%token STAR (* * *)
%token COMMA (* , *)
%token COLON (* : *)
%token EQUALS (* = *)
%token LPAREN (* ( *)
%token RPAREN (* ) *)
%token LBRACE (* { *)
%token RBRACE (* } *)
%token LBRACKET (* [ *)
%token RBRACKET (* ] *)
%token EOF
(* Reserved Words *)
%token CROSS (* x *)
%token I1 (* i1 *)
%token I8 (* i8 *)
%token I32 (* i32 *)
%token I64 (* i64 *)
%token TO (* to *)
%token BR (* br *)
%token EQ (* eq *)
%token NE (* ne *)
%token OR (* or *)
%token AND (* and *)
%token ADD (* add *)
%token SUB (* sub *)
%token MUL (* mul *)
%token XOR (* xor *)
%token SLT (* slt *)
%token SLE (* sle *)
%token SGT (* sgt *)
%token SGE (* sge *)
%token SHL (* shl *)
%token RET (* ret *)
%token TYPE (* type *)
%token NULL (* null *)
%token LSHR (* lshr *)
%token ASHR (* ashr *)
%token CALL (* call *)
%token ICMP (* icmp *)
%token VOID (* void *)
%token LOAD (* load *)
%token STORE (* store *)
%token LABEL (* label *)
%token ENTRY (* entry *)
%token GLOBAL (* global *)
%token DEFINE (* define *)
%token DECLARE (* define *)
%token EXTERNAL (* external *)
%token ALLOCA (* alloca *)
%token BITCAST (* bitcast *)
%token GEP (* getelementptr *)
%token <int> INT (* int64 values *)
%token <string> LBL (* labels *)
%token <string> GID (* global identifier *)
%token <string> UID (* local identifier *)
%token <string> STRING (* string literals *)
%start <Ll.prog> prog
%%
prog:
| ds=decls EOF
{ ds }
decls:
| ds = decls_rev
{ { tdecls = List.rev ds.tdecls
; gdecls = List.rev ds.gdecls
; fdecls = List.rev ds.fdecls
; edecls = List.rev ds.edecls
} }
decls_rev:
| (* empty *)
{ { tdecls = [] ; gdecls = [] ; fdecls = [] ; edecls = [] } }
| ds=decls_rev f=fdecl
{ { ds with fdecls = f :: ds.fdecls } }
| ds=decls_rev g=gdecl
{ { ds with gdecls = g :: ds.gdecls } }
| ds=decls_rev t=tdecl
{ { ds with tdecls = t :: ds.tdecls } }
| ds=decls_rev e=edecl
{ { ds with edecls = e :: ds.edecls } }
fdecl:
| DEFINE t=ty l=GID LPAREN a=arg_list RPAREN
LBRACE eb=entry_block bs=block_list RBRACE
{ (l, { f_ty = (List.map fst a, t)
; f_param = List.map snd a
; f_cfg = (eb, bs)
}
) }
gdecl:
| g=GID EQUALS GLOBAL t=ty gi=ginit
{ (g, (t,gi)) }
tdecl:
| tid=UID EQUALS TYPE t=ty
{ (tid, t) }
edecl:
| DECLARE rt=ty g=GID LPAREN ts=separated_list(COMMA, ty) RPAREN
{ (g, Fun (ts,rt)) }
| g=GID EQUALS EXTERNAL GLOBAL t=ty
{ (g, t) }
nonptr_ty:
| VOID { Void }
| I1 { I1 }
| I8 { I8 }
| I64 { I64 }
| LBRACE ts=ty_list RBRACE
{ Struct ts }
| LBRACKET i=INT CROSS t=ty RBRACKET
{ Array (i,t) }
| rt=ty LPAREN ts=ty_list RPAREN
{ Fun (ts, rt) }
| t=UID
{ Namedt t }
ty:
| t=ty STAR
{ Ptr t }
| t=nonptr_ty
{ t }
ty_list_rev:
| t=ty
{ [t] }
| ts=ty_list_rev COMMA t=ty
{ t::ts }
ty_list:
| ts=ty_list_rev
{ List.rev ts }
arg:
| t=ty u=UID { (t,u) }
arg_list_rev:
| (* empty *)
{ [] }
| a=arg
{ [a] }
| args=arg_list_rev COMMA a=arg
{ a::args }
arg_list:
| a=arg_list_rev
{ List.rev a }
operand:
| NULL
{ Null }
| i=INT
{ Const (Int64.of_int i) }
| g=GID
{ Gid g }
| u=UID
{ Id u }
ty_operand_list_rev:
| (* empty *)
{ [] }
| t=ty o=operand
{ [(t,o)] }
| tos=ty_operand_list_rev COMMA t=ty o=operand
{ (t,o)::tos }
ty_operand_list:
| tos=ty_operand_list_rev
{ List.rev tos }
i_operand_list_rev:
| (* empty *)
{ [] }
| I64 o=operand
{ [o] }
| I32 o=operand
{ [o] }
| os=i_operand_list_rev COMMA I64 o=operand
{ o::os }
| os=i_operand_list_rev COMMA I32 o=operand
{ o::os }
i_operand_list:
| os=i_operand_list_rev
{ List.rev os }
terminator:
| RET t=ty o=operand
{ Ret (t, Some o) }
| RET t=ty
{ Ret (t, None) }
| BR LABEL l=UID
{ Br l }
| BR I1 o=operand COMMA LABEL l1=UID COMMA LABEL l2=UID
{ Cbr (o,l1,l2) }
block:
| is=insn_list t=terminator
{ { insns = is; term=(gensym "tmn", t) } }
block_list_rev:
| (* empty *)
{ [] }
| bs=block_list_rev l=LBL COLON b=block
{ (l,b) :: bs }
block_list:
| bs=block_list_rev
{ List.rev bs }
entry_block:
| ENTRY COLON b=block
{ b }
| b=block
{ b }
bop:
| OR { Or }
| ADD { Add }
| SUB { Sub }
| MUL { Mul }
| SHL { Shl }
| XOR { Xor }
| AND { And }
| LSHR { Lshr }
| ASHR { Ashr }
cnd:
| EQ { Eq }
| NE { Ne }
| SLT { Slt }
| SLE { Sle }
| SGT { Sgt }
| SGE { Sge }
insn:
| u=UID EQUALS b=bop t=ty o1=operand COMMA o2=operand
{ (u, Binop (b,t,o1,o2)) }
| u=UID EQUALS ALLOCA t=ty
{ (u, Alloca t) }
| u=UID EQUALS LOAD ty COMMA t=ty o=operand
{ (u, Load (t,o)) }
| STORE t1=ty o1=operand COMMA t2=ty o2=operand
{ (gensym "store", Store (t1,o1,o2)) }
| u=UID EQUALS ICMP c=cnd t=ty o1=operand COMMA o2=operand
{ (u, Icmp (c,t,o1,o2)) }
| CALL t=ty o=operand LPAREN args=ty_operand_list RPAREN
{ (gensym "call", Call (t, o, args)) }
| u=UID EQUALS CALL t=ty o=operand LPAREN args=ty_operand_list RPAREN
{ (u, Call (t, o, args)) }
| u=UID EQUALS BITCAST t1=ty o=operand TO t2=ty
{ (u, Bitcast (t1,o,t2)) }
| u=UID EQUALS GEP ty COMMA t=ty o=operand COMMA os=i_operand_list
{ (u, Gep (t,o,os)) }
insn_list:
| is=list(insn)
{ is }
gdecl_list_rev:
| (* empty *)
{ [] }
| t=ty g=ginit
{ [(t,g)] }
| gs=gdecl_list_rev COMMA t=ty g=ginit
{ (t,g)::gs }
gdecl_list:
| gs=gdecl_list_rev
{ List.rev gs }
ginit:
| NULL
{ GNull }
| g=GID
{ GGid g }
| i=INT
{ GInt (Int64.of_int i) }
| s=STRING
{ GString s }
| LBRACKET gs=gdecl_list RBRACKET
{ GArray gs }
| LBRACE gs=gdecl_list RBRACE
{ GStruct gs }
| BITCAST LPAREN t1=ty g=ginit TO t2=ty RPAREN
{ GBitcast (t1, g, t2) }

38
hw6/ll/llruntime.c Normal file
View file

@ -0,0 +1,38 @@
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
/* TODO: if we enforce that all char literals are null-terminated,
and all allocated memory is zero-initialized, are these safe
when llvmlite program does not exhibit UB? */
void *ll_malloc(int64_t n, int64_t size) {
return calloc(n, size);
}
int64_t ll_strlen(int8_t *s) {
return 0;
}
int8_t *ll_strncopy(int8_t *dst, int8_t *src, int64_t i) {
int64_t src_size = ll_strlen(src);
int64_t dst_size = ll_strlen(dst);
if (i >= dst_size)
return dst;
else
return (int8_t*)strncpy((char *)dst + i, (char *)src, dst_size - i);
}
void ll_puts(int8_t *s) {
puts((char *)s);
}
int64_t ll_atol(int8_t *s) {
return atol((char *)s);
}
int64_t ll_ltoa(int64_t i, int8_t *dst) {
int64_t size = ll_strlen(dst);
return snprintf((char *)dst, size, "%ld", (long)i);
}

170
hw6/ll/llutil.ml Normal file
View file

@ -0,0 +1,170 @@
;; open Ll
(* serializing --------------------------------------------------------------- *)
let mapcat s f l = String.concat s @@ List.map f l
let prefix p f a = p ^ f a
let ( ^. ) s t = if s = "" || t = "" then "" else s ^ t
let pp = Printf.sprintf
let rec string_of_ty : ty -> string = function
| Void -> "void"
| I1 -> "i1"
| I8 -> "i8"
| I64 -> "i64"
| Ptr ty -> pp "%s*" (string_of_ty ty)
| Struct ts -> pp "{ %s }" (mapcat ", " string_of_ty ts)
| Array (n, t) -> pp "[%s x %s]" (string_of_int n) (string_of_ty t)
| Fun (ts,t) -> pp "%s (%s)" (string_of_ty t) (mapcat ", " string_of_ty ts)
| Namedt s -> pp "%%%s" s
let sot = string_of_ty
let dptr = function
| Ptr t -> t
| _ -> failwith "PP: expected pointer type"
let string_of_operand : operand -> string = function
| Null -> "null"
| Const i -> Int64.to_string i
| Gid g -> "@" ^ g
| Id u -> "%" ^ u
let soo = string_of_operand
let soop (t,v:ty * operand) : string =
pp "%s %s" (sot t) (soo v)
let string_of_bop : bop -> string = function
| Add -> "add" | Sub -> "sub" | Mul -> "mul"
| Shl -> "shl" | Lshr -> "lshr" | Ashr -> "ashr"
| And -> "and" | Or -> "or" | Xor -> "xor"
let string_of_cnd : cnd -> string = function
| Eq -> "eq" | Ne -> "ne" | Slt -> "slt"
| Sle -> "sle" | Sgt -> "sgt" | Sge -> "sge"
let string_of_gep_index : operand -> string = function
| Const i -> "i32 " ^ Int64.to_string i
| o -> "i64 " ^ soo o
let string_of_insn : insn -> string = function
| Binop (b, t, o1, o2) -> pp "%s %s %s, %s"
(string_of_bop b) (sot t) (soo o1) (soo o2)
| Alloca t -> pp "alloca %s" (sot t)
| Load (t, o) -> pp "load %s, %s %s" (sot (dptr t)) (sot t) (soo o)
| Store (t, os, od) -> pp "store %s %s, %s %s"
(sot t) (soo os) (sot (Ptr t)) (soo od)
| Icmp (c, t, o1, o2) -> pp "icmp %s %s %s, %s"
(string_of_cnd c) (sot t) (soo o1) (soo o2)
| Call (t, o, oa) -> pp "call %s %s(%s)"
(sot t) (soo o) (mapcat ", " soop oa)
| Bitcast (t1, o, t2) -> pp "bitcast %s %s to %s" (sot t1) (soo o) (sot t2)
| Gep (t, o, oi) -> pp "getelementptr %s, %s %s, %s" (sot (dptr t)) (sot t) (soo o)
(mapcat ", " string_of_gep_index oi)
let string_of_named_insn (u,i:uid * insn) : string =
match i with
| Store _ | Call (Void, _, _) -> string_of_insn i
| _ -> pp "%%%s = %s" u (string_of_insn i)
let string_of_terminator : terminator -> string = function
| Ret (_, None) -> "ret void"
| Ret (t, Some o) -> pp "ret %s %s" (sot t) (soo o)
| Br l -> pp "br label %%%s" l
| Cbr (o, l, m) -> pp "br i1 %s, label %%%s, label %%%s" (soo o) l m
let string_of_block (b:block) : string =
(mapcat "\n" (prefix " " string_of_named_insn) b.insns ^. "\n")
^ (prefix " " string_of_terminator) (snd b.term)
let string_of_cfg (e,bs:cfg) : string =
let string_of_named_block (l,b) = l ^ ":\n" ^ string_of_block b in
string_of_block e ^ "\n" ^. mapcat "\n" string_of_named_block bs
let string_of_named_fdecl (g,f:gid * fdecl) : string =
let string_of_arg (t,u) = pp "%s %%%s" (sot t) u in
let ts, t = f.f_ty in
pp "define %s @%s(%s) {\n%s\n}\n" (sot t) g
(mapcat ", " string_of_arg List.(combine ts f.f_param))
(string_of_cfg f.f_cfg)
let rec string_of_ginit : ginit -> string = function
| GNull -> "null"
| GGid g -> pp "@%s" g
| GInt i -> Int64.to_string i
| GString s -> pp "c\"%s\\00\"" s
| GArray gis -> pp "[ %s ]" (mapcat ", " string_of_gdecl gis)
| GStruct gis -> pp "{ %s }" (mapcat ", " string_of_gdecl gis)
| GBitcast (t1,g,t2) -> pp "bitcast (%s %s to %s)" (sot t1) (string_of_ginit g) (sot t2)
and string_of_gdecl (t,gi:gdecl) : string =
pp "%s %s" (sot t) (string_of_ginit gi)
let string_of_named_gdecl (g,gd:gid * gdecl) : string =
pp "@%s = global %s" g (string_of_gdecl gd)
let string_of_named_tdecl (n,t:tid * ty) : string =
pp "%%%s = type %s" n (sot t)
let string_of_named_edecl (g,t:gid * ty) : string =
match t with
| Fun (ts, rt) -> pp "declare %s @%s(%s)" (string_of_ty rt) g
(mapcat ", " string_of_ty ts)
| _ -> pp "@%s = external global %s" g (string_of_ty t)
let string_of_prog (p:prog) : string =
(mapcat "\n" string_of_named_tdecl p.tdecls ^. "\n\n")
^ (mapcat "\n" string_of_named_gdecl p.gdecls ^. "\n\n")
^ (mapcat "\n" string_of_named_fdecl p.fdecls ^. "\n\n")
^ (mapcat "\n" string_of_named_edecl p.edecls)
(* comparison for testing ----------------------------------------------------- *)
(* delete dummy uids before comparison *)
let compare_block (b:block) (c:block) : int =
let del_dummy (u,i) =
match i with
| Store (_, _, _) -> "", i
| Call (Void, _, _) -> "", i
| _ -> u, i
in
let del_term (u,t) = ("", t)
in
Pervasives.compare
{insns=List.map del_dummy b.insns; term=del_term b.term}
{insns=List.map del_dummy c.insns; term=del_term c.term}
(* helper module for AST ------------------------------------------------------ *)
module IR = struct
let define t gid args f_cfg =
let ats, f_param = List.split args in
gid, { f_ty=ats,t; f_param; f_cfg}
(* ignore first label *)
let cfg (lbs:(lbl * block) list) : cfg =
match lbs with
| [] -> failwith "cfg: no blocks!"
| (_,b)::lbs -> b, lbs
let entry insns term : (lbl * block) = "", { insns; term }
let label lbl insns term = lbl, { insns; term }
(* terminators *)
let ret_void = Ret (Void, None)
let ret t op = Ret (t, Some op)
let br l = Br l
let cbr op l1 l2 = Cbr (op, l1, l2)
end
module Parsing = struct
let gensym, reset =
let c = ref 0 in
( fun (s:string) -> incr c; Printf.sprintf "_%s__%d" s (!c) )
, ( fun () -> c := 0 )
end