Modified hw3 to newer version

Signed-off-by: jmug <u.g.a.mariano@gmail.com>
This commit is contained in:
Mariano Uvalle 2025-01-24 20:53:24 -08:00
parent 8437a82fbf
commit 07d34c0cd8
40 changed files with 856 additions and 271 deletions

344
hw3/bin/backend.ml Normal file
View file

@ -0,0 +1,344 @@
(* ll ir compilation -------------------------------------------------------- *)
open Ll
open X86
module Platform = Util.Platform
(* Overview ----------------------------------------------------------------- *)
(* We suggest that you spend some time understanding this entire file and
how it fits with the compiler pipeline before making changes. The suggested
plan for implementing the compiler is provided on the project web page.
*)
(* helpers ------------------------------------------------------------------ *)
(* Map LL comparison operations to X86 condition codes *)
let compile_cnd = function
| Ll.Eq -> X86.Eq
| Ll.Ne -> X86.Neq
| Ll.Slt -> X86.Lt
| Ll.Sle -> X86.Le
| Ll.Sgt -> X86.Gt
| Ll.Sge -> X86.Ge
(* locals and layout -------------------------------------------------------- *)
(* One key problem in compiling the LLVM IR is how to map its local
identifiers to X86 abstractions. For the best performance, one
would want to use an X86 register for each LLVM %uid. However,
since there are an unlimited number of %uids and only 16 registers,
doing so effectively is quite difficult. We will see later in the
course how _register allocation_ algorithms can do a good job at
this.
A simpler, but less performant, implementation is to map each %uid
in the LLVM source to a _stack slot_ (i.e. a region of memory in
the stack). Since LLVMlite, unlike real LLVM, permits %uid locals
to store only 64-bit data, each stack slot is an 8-byte value.
[ NOTE: For compiling LLVMlite, even i1 data values should be
represented as a 8-byte quad. This greatly simplifies code
generation. ]
We call the datastructure that maps each %uid to its stack slot a
'stack layout'. A stack layout maps a uid to an X86 operand for
accessing its contents. For this compilation strategy, the operand
is always an offset from %rbp (in bytes) that represents a storage slot in
the stack.
*)
type layout = (uid * X86.operand) list
(* A context contains the global type declarations (needed for getelementptr
calculations) and a stack layout. *)
type ctxt = { tdecls : (tid * ty) list
; layout : layout
}
(* useful for looking up items in tdecls or layouts *)
let lookup m x = List.assoc x m
(* compiling operands ------------------------------------------------------ *)
(* LLVM IR instructions support several kinds of operands.
LL local %uids live in stack slots, whereas global ids live at
global addresses that must be computed from a label. Constants are
immediately available, and the operand Null is the 64-bit 0 value.
NOTE: two important facts about global identifiers:
(1) You should use (Platform.mangle gid) to obtain a string
suitable for naming a global label on your platform (OS X expects
"_main" while linux expects "main").
(2) 64-bit assembly labels are not allowed as immediate operands.
That is, the X86 code: movq _gid %rax which looks like it should
put the address denoted by _gid into %rax is not allowed.
Instead, you need to compute an %rip-relative address using the
leaq instruction: leaq _gid(%rip) %rax.
One strategy for compiling instruction operands is to use a
designated register (or registers) for holding the values being
manipulated by the LLVM IR instruction. You might find it useful to
implement the following helper function, whose job is to generate
the X86 instruction that moves an LLVM operand into a designated
destination (usually a register).
*)
let compile_operand (ctxt:ctxt) (dest:X86.operand) : Ll.operand -> ins =
function _ -> failwith "compile_operand unimplemented"
(* compiling call ---------------------------------------------------------- *)
(* You will probably find it helpful to implement a helper function that
generates code for the LLVM IR call instruction.
The code you generate should follow the x64 System V AMD64 ABI
calling conventions, which places the first six 64-bit (or smaller)
values in registers and pushes the rest onto the stack. Note that,
since all LLVM IR operands are 64-bit values, the first six
operands will always be placed in registers. (See the notes about
compiling fdecl below.)
[ NOTE: Don't forget to preserve caller-save registers (only if needed). ]
[ NOTE: Remember, call can use labels as immediates! You shouldn't need to
perform any RIP-relative addressing for this one. ]
[ NOTE: It is the caller's responsibility to clean up arguments pushed onto
the stack, so you must free the stack space after the call returns. (But
see below about alignment.) ]
[ NOTE: One important detail about the ABI besides the conventions is that,
at the time the [callq] instruction is invoked, %rsp *must* be 16-byte aligned.
However, because LLVM IR provides the Alloca instruction, which can dynamically
allocate space on the stack, it is hard to know statically whether %rsp meets
this alignment requirement. Moroever: since, according to the calling
conventions, stack arguments numbered > 6 are pushed to the stack, we must take
that into account when enforcing the alignment property.
We suggest that, for a first pass, you *ignore* %rsp alignment -- only a few of
the test cases rely on this behavior. Once you have everything else working,
you can enforce proper stack alignment at the call instructions by doing
these steps:
1. *before* pushing any arguments of the call to the stack, ensure that the
%rsp is 16-byte aligned. You can achieve that with the x86 instruction:
`andq $-16, %rsp` (which zeros out the lower 4 bits of %rsp, possibly
"allocating" unused padding space on the stack)
2. if there are an *odd* number of arguments that will be pushed to the stack
(which would break the 16-byte alignment because stack slots are 8 bytes),
allocate an extra 8 bytes of padding on the stack.
3. follow the usual calling conventions - any stack arguments will still leave
%rsp 16-byte aligned
4. after the call returns, in addition to freeing up the stack slots used by
arguments, if there were an odd number of slots, also free the extra padding.
]
*)
(* compiling getelementptr (gep) ------------------------------------------- *)
(* The getelementptr instruction computes an address by indexing into
a datastructure, following a path of offsets. It computes the
address based on the size of the data, which is dictated by the
data's type.
To compile getelementptr, you must generate x86 code that performs
the appropriate arithmetic calculations.
*)
(* [size_ty] maps an LLVMlite type to a size in bytes.
(needed for getelementptr)
- the size of a struct is the sum of the sizes of each component
- the size of an array of t's with n elements is n * the size of t
- all pointers, I1, and I64 are 8 bytes
- the size of a named type is the size of its definition
- Void, i8, and functions have undefined sizes according to LLVMlite.
Your function should simply return 0 in those cases
*)
let rec size_ty (tdecls:(tid * ty) list) (t:Ll.ty) : int =
failwith "size_ty not implemented"
(* Generates code that computes a pointer value.
1. op must be of pointer type: t*
2. the value of op is the base address of the calculation
3. the first index in the path is treated as the index into an array
of elements of type t located at the base address
4. subsequent indices are interpreted according to the type t:
- if t is a struct, the index must be a constant n and it
picks out the n'th element of the struct. [ NOTE: the offset
within the struct of the n'th element is determined by the
sizes of the types of the previous elements ]
- if t is an array, the index can be any operand, and its
value determines the offset within the array.
- if t is any other type, the path is invalid
5. if the index is valid, the remainder of the path is computed as
in (4), but relative to the type f the sub-element picked out
by the path so far
*)
let compile_gep (ctxt:ctxt) (op : Ll.ty * Ll.operand) (path: Ll.operand list) : ins list =
failwith "compile_gep not implemented"
(* compiling instructions -------------------------------------------------- *)
(* The result of compiling a single LLVM instruction might be many x86
instructions. We have not determined the structure of this code
for you. Some of the instructions require only a couple of assembly
instructions, while others require more. We have suggested that
you need at least compile_operand, compile_call, and compile_gep
helpers; you may introduce more as you see fit.
Here are a few notes:
- Icmp: the Setb instruction may be of use. Depending on how you
compile Cbr, you may want to ensure that the value produced by
Icmp is exactly 0 or 1.
- Load & Store: these need to dereference the pointers. Const and
Null operands aren't valid pointers. Don't forget to
Platform.mangle the global identifier.
- Alloca: needs to return a pointer into the stack
- Bitcast: does nothing interesting at the assembly level
*)
let compile_insn (ctxt:ctxt) ((uid:uid), (i:Ll.insn)) : X86.ins list =
failwith "compile_insn not implemented"
(* compiling terminators --------------------------------------------------- *)
(* prefix the function name [fn] to a label to ensure that the X86 labels are
globally unique . *)
let mk_lbl (fn:string) (l:string) = fn ^ "." ^ l
(* Compile block terminators is not too difficult:
- Ret should properly exit the function: freeing stack space,
restoring the value of %rbp, and putting the return value (if
any) in %rax.
- Br should jump
- Cbr branch should treat its operand as a boolean conditional
[fn] - the name of the function containing this terminator
*)
let compile_terminator (fn:string) (ctxt:ctxt) (t:Ll.terminator) : ins list =
failwith "compile_terminator not implemented"
(* compiling blocks --------------------------------------------------------- *)
(* We have left this helper function here for you to complete.
[fn] - the name of the function containing this block
[ctxt] - the current context
[blk] - LLVM IR code for the block
*)
let compile_block (fn:string) (ctxt:ctxt) (blk:Ll.block) : ins list =
failwith "compile_block not implemented"
let compile_lbl_block fn lbl ctxt blk : elem =
Asm.text (mk_lbl fn lbl) (compile_block fn ctxt blk)
(* compile_fdecl ------------------------------------------------------------ *)
(* Complete this helper function, which computes the location of the nth incoming
function argument: either in a register or relative to %rbp,
according to the calling conventions. We will test this function as part of
the hidden test cases.
You might find it useful for compile_fdecl.
[ NOTE: the first six arguments are numbered 0 .. 5 ]
*)
let arg_loc (n : int) : operand =
failwith "arg_loc not implemented"
(* We suggest that you create a helper function that computes the
stack layout for a given function declaration.
- each function argument should be copied into a stack slot
- in this (inefficient) compilation strategy, each local id
is also stored as a stack slot.
- see the discussion about locals
*)
let stack_layout (args : uid list) ((block, lbled_blocks):cfg) : layout =
failwith "stack_layout not implemented"
(* The code for the entry-point of a function must do several things:
- since our simple compiler maps local %uids to stack slots,
compiling the control-flow-graph body of an fdecl requires us to
compute the layout (see the discussion of locals and layout)
- the function code should also comply with the calling
conventions, typically by moving arguments out of the parameter
registers (or stack slots) into local storage space. For our
simple compilation strategy, that local storage space should be
in the stack. (So the function parameters can also be accounted
for in the layout.)
- the function entry code should allocate the stack storage needed
to hold all of the local stack slots.
*)
let compile_fdecl (tdecls:(tid * ty) list) (name:string) ({ f_param; f_cfg; _ }:fdecl) : prog =
failwith "compile_fdecl unimplemented"
(* compile_gdecl ------------------------------------------------------------ *)
(* Compile a global value into an X86 global data declaration and map
a global uid to its associated X86 label.
*)
let rec compile_ginit : ginit -> X86.data list = function
| GNull -> [Quad (Lit 0L)]
| GGid gid -> [Quad (Lbl (Platform.mangle gid))]
| GInt c -> [Quad (Lit c)]
| GString s -> [Asciz s]
| GArray gs | GStruct gs -> List.map compile_gdecl gs |> List.flatten
| GBitcast (_t1,g,_t2) -> compile_ginit g
and compile_gdecl (_, g) = compile_ginit g
(* compile_prog ------------------------------------------------------------- *)
let compile_prog {tdecls; gdecls; fdecls; _} : X86.prog =
let g = fun (lbl, gdecl) -> Asm.data (Platform.mangle lbl) (compile_gdecl gdecl) in
let f = fun (name, fdecl) -> compile_fdecl tdecls name fdecl in
(List.map g gdecls) @ (List.map f fdecls |> List.flatten)

64
hw3/bin/cinterop.c Normal file
View file

@ -0,0 +1,64 @@
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
void ll_puts(int8_t *s) {
puts((char *)s);
}
int8_t* ll_strcat(int8_t* s1, int8_t* s2) {
int l1 = strlen((char*)s1);
int l2 = strlen((char*)s2);
char* buf = (char*)calloc(l1 + l2 + 1, sizeof(char));
strncpy(buf, (char*)s1, l1);
strncpy(buf + l1, (char*)s2, l2+1);
return (int8_t*) buf;
}
int64_t ll_callback(int64_t (*fun)(int64_t, int64_t)) {
int64_t x = 19L;
return fun(x, x);
}
int8_t* ll_ltoa(int64_t i) {
// Safety: INT64_MIN is -9223372036854775808, which has 20 characters when
// represented as a string. After including the null terminator, we need to
// allocate a buffer of size 21.
char* buf = (char*)calloc(21, sizeof(char));
int t = 0;
if (i == 0) {
buf[t++] = '0';
return (int8_t*) buf;
}
bool negative = i < 0;
if (!negative) {
// Normalize to negative number to avoid overflow
i = -i;
}
// Generate the digits in reverse order, from [0..t)
while (i < 0) {
char last_digit = '0' + -(i % 10);
buf[t++] = last_digit;
i /= 10;
}
if (negative) {
buf[t++] = '-';
}
// Reverse the buffer
for (int j = 0, r = t - 1; j < r; j++, r--) {
char temp = buf[j];
buf[j] = buf[r];
buf[r] = temp;
}
return (int8_t*) buf;
}
void *ll_malloc(int64_t n, int64_t size) {
return calloc(n, size);
}

174
hw3/bin/driver.ml Normal file
View file

@ -0,0 +1,174 @@
open Printf
module Platform = Util.Platform
(* configuration flags ------------------------------------------------------ *)
let interpret_ll = ref false (* run the ll interpreter? *)
let print_ll_flag = ref false (* print the generated ll code? *)
let print_x86_flag = ref false (* print the generated x86 code? *)
let clang = ref false (* use the clang backend? *)
let assemble = ref true (* assemble the .s to .o files? *)
let link = ref true (* combine multiple .o files executable? *)
let execute_x86 = ref false (* run the resulting x86 program? *)
let executable_filename = ref "a.out"
(* files processed during this run of the compiler *)
let files : string list ref = ref []
let link_files = ref []
let add_link_file path =
link_files := path :: (!link_files)
(* terminal output ---------------------------------------------------------- *)
let print_banner s =
let rec dashes n = if n = 0 then "" else "-"^(dashes (n-1)) in
printf "%s %s\n%!" (dashes (79 - (String.length s))) s
let print_ll file ll_ast =
print_banner (file ^ ".ll");
print_endline (Llutil.string_of_prog ll_ast)
let print_x86 file asm_str =
print_banner file;
print_endline asm_str
(* file i/o ----------------------------------------------------------------- *)
let read_file (file:string) : string =
let lines = ref [] in
let channel = open_in file in
try while true; do
lines := input_line channel :: !lines
done; ""
with End_of_file ->
close_in channel;
String.concat "\n" (List.rev !lines)
let write_file (file:string) (out:string) =
let channel = open_out file in
fprintf channel "%s" out;
close_out channel
(* running the generated code ----------------------------------------------- *)
let interpret program args : string =
let result = Llinterp.interp_prog program args in
Llinterp.string_of_sval result
let string_of_file (f:in_channel) : string =
let rec _string_of_file (stream:string list) (f:in_channel) : string list=
try
let s = input_line f in
_string_of_file (s::stream) f
with
| End_of_file -> stream
in
String.concat "\n" (List.rev (_string_of_file [] f))
let run_executable timeout arg pr =
let cmd = sprintf "%s%s %s" Platform.dot_path pr arg in
Platform.timeout_sh timeout cmd (fun _ i -> i)
let run_executable_to_tmpfile timeout arg pr tmp =
let cmd = sprintf "%s%s %d > %s 2>&1" Platform.dot_path pr arg tmp in
Platform.timeout_sh timeout cmd Platform.ignore_error
let run_program (args:string) (executable:string) (tmp_out:string) : string =
let _ =
let cmd =
sprintf "%s%s %s > %s 2>&1" Platform.dot_path executable args tmp_out
in
Platform.sh cmd Platform.ignore_error
in
let fi = open_in tmp_out in
let result = string_of_file fi in
let _ = close_in fi in
result
(* compiler pipeline -------------------------------------------------------- *)
(* These functions implement the compiler pipeline for a single ll file:
- parse the file
- compile to a .s file using either clang or backend.ml
- assemble the .s to a .o file using clang
*)
let parse_ll_file filename =
let program = read_file filename |>
Lexing.from_string |>
Llparser.prog Lllexer.token
in
program
let process_ll_ast path file ll_ast =
let _ = if !print_ll_flag then print_ll file ll_ast in
(* Optionally interpret it using the cs131 reference interperter. *)
let _ = if !interpret_ll then
let result = interpret ll_ast [] in
Printf.printf "Interpreter Result: %s\n" result
in
(* generated file names *)
let dot_s_file = Platform.gen_name !Platform.output_path file ".s" in
let dot_o_file = Platform.gen_name !Platform.output_path file ".o" in
let _ =
if !clang then begin
Platform.verb "* compiling with clang";
Platform.clang_compile path dot_s_file;
if !print_x86_flag then begin
print_banner dot_s_file;
Platform.sh (Printf.sprintf "cat %s" dot_s_file) Platform.raise_error
end
end else begin
Platform.verb "* compiling with cs131 backend";
let asm_ast = Backend.compile_prog ll_ast in
let asm_str = X86.string_of_prog asm_ast in
let _ = if !print_x86_flag then print_x86 dot_s_file asm_str in
let _ = write_file dot_s_file asm_str in
()
end
in
let _ = if !assemble then Platform.assemble dot_s_file dot_o_file in
let _ = add_link_file dot_o_file in
()
let process_ll_file path file =
let _ = Platform.verb @@ Printf.sprintf "* processing file: %s\n" path in
let ll_ast = parse_ll_file path in
process_ll_ast path file ll_ast
(* process files based on extension ----------------------------------------- *)
let process_file path =
let basename, ext = Platform.path_to_basename_ext path in
begin match ext with
| "ll" -> process_ll_file path basename
| "o" -> add_link_file path
| "c" -> add_link_file path
| _ -> failwith @@ Printf.sprintf "found unsupported file type: %s" path
end
(* process each file separately and then link all of them together *)
let process_files files =
if (List.length files) > 0 then begin
List.iter process_file files;
( if !assemble && !link then
Platform.link (List.rev !link_files) !executable_filename );
( if !assemble && !link && !execute_x86 then
let ret = run_executable 10 "" !executable_filename in
print_banner @@ Printf.sprintf "Executing: %s" !executable_filename;
Printf.printf "* %s returned %d\n" !executable_filename ret )
end

24
hw3/bin/dune Normal file
View file

@ -0,0 +1,24 @@
(library
(name llbackend)
(modules backend driver)
(libraries str num util x86 ll))
(env
(dev
(flags (:standard -warn-error -A))))
(executable
(public_name main)
(name main)
(modules main)
(promote (until-clean))
(libraries
; OCaml standard libraries
; project libraries
str
num
util
x86
ll
studenttests
gradedtests))

48
hw3/bin/main.ml Normal file
View file

@ -0,0 +1,48 @@
open Arg
open Util.Assert
open Llbackend.Driver
(* testing harness ---------------------------------------------------------- *)
exception Ran_tests
let suite = ref (Studenttests.provided_tests @ Gradedtests.graded_tests)
let execute_tests () =
let outcome = run_suite !suite in
Printf.printf "%s\n" (outcome_to_string outcome);
raise Ran_tests
(* command-line arguments --------------------------------------------------- *)
let args =
[ ("--test", Unit execute_tests, "run the test suite, ignoring other files inputs")
; ("-op", Set_string Platform.output_path, "set the path to the output files directory [default='output']")
; ("-o", Set_string executable_filename, "set the name of the resulting executable [default='a.out']")
; ("-S", Clear assemble, "stop after generating .s files; do generate .o files")
; ("-c", Clear link, "stop after generating .o files; do not generate executables")
; ("--interpret-ll", Set interpret_ll, "runs each LL program through the LL interpreter")
; ("--print-ll", Set print_ll_flag, "prints the program LL code")
; ("--print-x86", Set print_x86_flag, "prints the program's assembly code")
; ("--clang", Set clang, "compiles to assembly using clang, not the 131 backend")
; ("--execute-x86", Set execute_x86, "run the resulting executable file")
; ("-v", Unit Platform.enable_verbose, "enables more verbose compilation output")
]
(* Files found on the command line *)
let files = ref []
let main () =
Platform.configure_os ();
Platform.create_output_dir ();
try
Arg.parse args (fun filename -> files := filename :: !files)
"cs131 main test harness\n\
USAGE: ./oatc [options] <files>\n\
see README for details about using the compiler";
process_files !files
with Ran_tests ->
()
;; main ()

91
hw3/bin/timeout3 Normal file
View file

@ -0,0 +1,91 @@
#!/bin/bash
#
# The Bash shell script executes a command with a time-out.
# Upon time-out expiration SIGTERM (15) is sent to the process. If the signal
# is blocked, then the subsequent SIGKILL (9) terminates it.
#
# Based on the Bash documentation example.
# Hello Chet,
# please find attached a "little easier" :-) to comprehend
# time-out example. If you find it suitable, feel free to include
# anywhere: the very same logic as in the original examples/scripts, a
# little more transparent implementation to my taste.
#
# Dmitry V Golovashkin <Dmitry.Golovashkin@sas.com>
scriptName="${0##*/}"
declare -i DEFAULT_TIMEOUT=9
declare -i DEFAULT_INTERVAL=1
declare -i DEFAULT_DELAY=1
# Timeout.
declare -i timeout=DEFAULT_TIMEOUT
# Interval between checks if the process is still alive.
declare -i interval=DEFAULT_INTERVAL
# Delay between posting the SIGTERM signal and destroying the process by SIGKILL.
declare -i delay=DEFAULT_DELAY
function printUsage() {
cat <<EOF
Synopsis
$scriptName [-t timeout] [-i interval] [-d delay] command
Execute a command with a time-out.
Upon time-out expiration SIGTERM (15) is sent to the process. If SIGTERM
signal is blocked, then the subsequent SIGKILL (9) terminates it.
-t timeout
Number of seconds to wait for command completion.
Default value: $DEFAULT_TIMEOUT seconds.
-i interval
Interval between checks if the process is still alive.
Positive integer, default value: $DEFAULT_INTERVAL seconds.
-d delay
Delay between posting the SIGTERM signal and destroying the
process by SIGKILL. Default value: $DEFAULT_DELAY seconds.
As of today, Bash does not support floating point arithmetic (sleep does),
therefore all delay/time values must be integers.
EOF
}
# Options.
while getopts ":t:i:d:" option; do
case "$option" in
t) timeout=$OPTARG ;;
i) interval=$OPTARG ;;
d) delay=$OPTARG ;;
*) printUsage; exit 1 ;;
esac
done
shift $((OPTIND - 1))
# $# should be at least 1 (the command to execute), however it may be strictly
# greater than 1 if the command itself has options.
if (($# == 0 || interval <= 0)); then
printUsage
exit 1
fi
# kill -0 pid Exit code indicates if a signal may be sent to $pid process.
(
((t = timeout))
while ((t > 0)); do
sleep $interval
kill -0 $$ || exit 0
((t -= interval))
done
# Be nice, post SIGTERM first.
# The 'exit 0' below will be executed if any preceeding command fails.
kill -s SIGTERM $$ && kill -0 $$ || exit 0
sleep $delay
kill -s SIGKILL $$
) 2> /dev/null &
exec "$@"