diff --git a/allocator.py b/allocator.py index 038d9d4..1b7dd2c 100644 --- a/allocator.py +++ b/allocator.py @@ -83,7 +83,6 @@ def __init__(self, ir: ssa.CompilationContext, be: backend.Backend): def allocate(self): stack_height = 0 - heap_height = 0 self.stack_offsets = {} for block in self.ir: if block.func.enter_block == block: diff --git a/ast.py b/ast.py index 455b74a..bb795d6 100644 --- a/ast.py +++ b/ast.py @@ -209,6 +209,7 @@ def compile(self, context): func = ssa.Function() func.name = self.ident.name func.enter_block = root + func.arg_names = [ident.name for ident in self.param_idents] root.func = func context.add_root_block(root) context.set_current_block(root) diff --git a/backend.py b/backend.py index 8e82576..8b9ee02 100644 --- a/backend.py +++ b/backend.py @@ -8,6 +8,7 @@ """ import ssa +import math class Backend: @@ -18,6 +19,8 @@ def __init__(self, ir): self.block_instrs = {} # Map block label to tuples (head_instrs, tail_instrs) self.instrs = [] # After linking: linear list of blocks self.block_offsets = {} # Map block label to offset + self.func_entry_offsets = {} # Map function name to its prologue + self.func_exit_offsets = {} # Map function name to its epilogue self.current_block = None def get_asm(self): @@ -30,19 +33,22 @@ def get_asm(self): out = "" label_length = max(len(label) for label in self.block_offsets) + num_length = math.floor(math.log10(len(self.instrs))) + 1 for offset, instr in enumerate(self.instrs): + out += "{0:{1}d} ".format(offset, num_length) if offset in inverse_block_offsets: for label in inverse_block_offsets[offset]: out += "{0:{1}s} ".format(label+":", label_length+1) out += str(instr)+"\n" else: - out += "{} {}\n".format(" "*label_length, str(instr)) + out += "{} {}\n".format(" "*(label_length), str(instr)) return out def get_machine_code(self): return b"".join(map(bytes, self.instrs)) def compile(self): + # New "program loader" block for init code self.block_instrs = {} for block in self.ir: self.block_instrs[block.label] = ([], [], []) @@ -52,18 +58,30 @@ def compile(self): self.compile_block(block) def compile_init(self): + """ + "Loader" code compiled into the program. This only gets executed once at + the start of the program before the main function. + """ pass - def compile_prelude(self, func_block): + def compile_prologue(self, func_block): + """ + Function prologue. Save callee-save registers here. + """ pass def compile_epilogue(self, func_block): + """ + Function epilogue. All function exits should jump to the stream of code + compiled here (its address offset is stored in self.func_exit_offsets). + Restore callee-save registers. + """ pass def compile_block(self, block): self.current_block = block.label if block.func.enter_block == block: - self.compile_prelude(block) + self.compile_prologue(block) for instr in block.instrs: self.compile_instr(instr, context=block) if block.func.exit_block == block: @@ -79,11 +97,17 @@ def link(self): :return: """ self.instrs = [] - for block, (head_instrs, tail_instrs, term_instr) in self.block_instrs.items(): - self.block_offsets[block] = len(self.instrs) + blocks = {block.label: block for block in self.ir} # map block label to block object + for label, (head_instrs, tail_instrs, term_instr) in self.block_instrs.items(): + if blocks[label] == blocks[label].func.enter_block: # add label for function entry + self.func_entry_offsets[blocks[label].func.name] = len(self.instrs) + if blocks[label] == blocks[label].func.exit_block: + self.func_exit_offsets[blocks[label].func.name] = len(self.instrs) + self.block_offsets[label] = len(self.instrs) self.instrs.extend(head_instrs) self.instrs.extend(tail_instrs) self.instrs.extend(term_instr) + # After this, subclasses can use self.block_offsets to link the correct jump addresses def emit(self, instr, block=None): block = block or self.current_block diff --git a/dlx.py b/dlx.py index 55e2316..38bb8be 100644 --- a/dlx.py +++ b/dlx.py @@ -19,7 +19,9 @@ def __init__(self, opcode, mnemonic, *ops): self.opcode = opcode self.mnemonic = mnemonic self.ops = ops - self.jump_label = None # only used for BSR, JSR, RET + self.jump_label = None # during linking, only used for BSR, JSR, RET + self.jump_to_entry = None # replace arg during linking to jump to func prologue + self.jump_to_exit = None # replace operand during linking to jump to func epilogue def __repr__(self): return self.get_assembly() @@ -117,7 +119,7 @@ class INSTRUCTIONS: CHKI = F1Instruction(30, "CHKI") LDW = F1Instruction(32, "LDW") - LDX = F2Instruction(32, "LDX") + LDX = F2Instruction(33, "LDX") POP = F1Instruction(34, "POP") STW = F1Instruction(36, "STW") STX = F2Instruction(37, "STX") @@ -158,39 +160,73 @@ class DLXBackend(backend.Backend): WORD_SIZE = 4 STACK_SIZE = 0xFFF - ARG_REGS = [1, 2, 3, 4] - CALLEE_SAVE = [5, 6, 7, 8, 9, 10] + CALLEE_SAVE = [1, 2] def __init__(self, ir): super().__init__(ir) self.heap_height = 0 - def compile_prelude(self, func_block): + def compile_prologue(self, func_block): if func_block.func.is_main: return # no prelude required for main func - # As function is being called, stack pointer is at the bottom of calling function, - # pointing at the return address. - n_args = len(func_block.func.arg_names) - self.emit(INSTRUCTIONS.ADDI.make(self.STACK_PTR_REG, self.STACK_PTR_REG, - self.WORD_SIZE*n_args)) - # Save registers - for reg in self.CALLEE_SAVE: - self.emit(INSTRUCTIONS.PSH.make(reg, self.STACK_PTR_REG, self.WORD_SIZE)) - self.emit(INSTRUCTIONS.ADDI.make(self.FRAME_PTR_REG, self.STACK_PTR_REG, 0)) + # --- high addr --- + # * LOCAL 1 + # * LOCAL 2 + # * ... + # caller * PARAM 1 + # * PARAM 2 + # * ... + # * OLD RET ADDR + # / caller's old frame ptr <--- frame ptr + # / CALLEE SAVED REG 1 + # / CALLEE SAVED REG 2 + # / ... + # / LOCAL 1 + # callee / LOCAL 2 + # / ... + # / LOCAL N <--- stack ptr + # --- low addr --- + + # Update frame pointer to point to bottom of calling functions stack + # and store old frame pointer + self.emit(INSTRUCTIONS.PSH.make(self.FRAME_PTR_REG, self.STACK_PTR_REG, -self.WORD_SIZE)) + self.emit_move(self.STACK_PTR_REG, self.FRAME_PTR_REG) - def compile_init(self): - stack_bottom = 1250 # FIXME quick fix so stack does not run into program instructions - self.emit_immediate(stack_bottom, self.FRAME_PTR_REG) + # Save registers + for i, reg in enumerate(self.CALLEE_SAVE): + self.emit(INSTRUCTIONS.STW.make(reg, self.FRAME_PTR_REG, -(i+1)*self.WORD_SIZE)) def compile_epilogue(self, func_block): - if not func_block.func.is_main: - self.emit(INSTRUCTIONS.RET.make(0, 0, self.RET_ADD_REG)) # Restore registers - for reg in self.CALLEE_SAVE: - self.emit(INSTRUCTIONS.POP.make(reg, self.STACK_PTR_REG, self.WORD_SIZE)) - self.emit(INSTRUCTIONS.LDW.make(self.RET_ADD_REG, self.STACK_PTR_REG, 0)) + for i, reg in enumerate(self.CALLEE_SAVE): + self.emit(INSTRUCTIONS.LDW.make(reg, self.FRAME_PTR_REG, -(i+1)*self.WORD_SIZE)) + # restore old frame and stack pointers + self.emit_move(self.FRAME_PTR_REG, self.STACK_PTR_REG) + self.emit(INSTRUCTIONS.POP.make(self.FRAME_PTR_REG, self.STACK_PTR_REG, +self.WORD_SIZE)) self.emit(INSTRUCTIONS.RET.make(0, 0, self.RET_ADD_REG)) + def compile_init(self): + # We iterate over all memory allocation calls to determine the heap height; + # we then allocate the stack to start right below that, to make maximum use + # of memory + heap_size = 0 + for block in self.ir: + for instr in block.instrs: + if instr.instr != "alloca": + continue + assert isinstance(instr.ops[0], ssa.ImmediateOp) # Dynamic memory allocation currently not supported + sz = instr.ops[0].val + heap_size += sz + # subtract heap size from global memory pointer address + self.emit(INSTRUCTIONS.ADDI.make(self.FRAME_PTR_REG, self.GLOBAL_MEM_PTR_REG, -heap_size*4)) + self.emit_move(self.FRAME_PTR_REG, self.STACK_PTR_REG) + + # jump to main function + #dlx_instr = INSTRUCTIONS.JSR.make(0) + #dlx_instr.jump_to_entry = "main" + #self.emit(dlx_instr) + #self.emit(INSTRUCTIONS.RET.make(0, 0, 0)) + def compile_operand(self, op: ssa.Op, context: ssa.BasicBlock, into=1, block=None, back=False): emit_fun = self.emit if not back else self.emit_back if isinstance(op, ssa.ImmediateOp): @@ -205,18 +241,13 @@ def compile_operand(self, op: ssa.Op, context: ssa.BasicBlock, into=1, block=Non return op_reg elif isinstance(op, ssa.ArgumentOp): # ABI: We pass arguments on the stack. - # In the prologue, we adjust the stack pointer to point to local variables. - # Everything below the stack pointer for #args are the args, in reverse order. + # They are in reverse order just below the return address. The return + # address is the last thing stored by the calling function on its stack, + # and the frame pointer points to the last item of the caller stack. + # Hence, arguments start at FRAME_PTR+1. idx = context.func.arg_names.index(op.name) - if idx < len(self.ARG_REGS): - if into != idx: - emit_fun(INSTRUCTIONS.ADDI.make(into, idx, 0), block=block) - return into # argument passed in a register - # argument passed on stack - old_sp_offs = +len(self.CALLEE_SAVE) # frame pointer + calle saved - offs = (len(context.func.arg_names) - - idx) # arguments are pushed on stack in reverse order - emit_fun(INSTRUCTIONS.LDW.make(into, self.FRAME_PTR_REG, old_sp_offs + offs*self.WORD_SIZE), + offset = 1 + (len(context.func.arg_names) - idx) + emit_fun(INSTRUCTIONS.LDW.make(into, self.FRAME_PTR_REG, offset*self.WORD_SIZE), block=block) return into return None # Label arguments will be replaced in the linking phase @@ -240,7 +271,12 @@ def compile_instr(self, instr: ssa.Instruction, context: ssa.BasicBlock): # For most instructions, we first have to compile the operands. # We do not do it here for arithmetic F1 instructions, since we might want to use # immediate variants of these instructions instead if one of the operands are immediate. - if instr.instr not in {"phi", "alloca"} and instr.instr not in arith_f1_instrs: + # We also do not do it for phi nodes, since those are a simple move of a previous instr, + # or alloca, where we handle it ourselves. + # We do not do it for call instructions, because there we can have an unbounded number + # of arguments, that we do not want to all put into registers. Instead, we push them + # on the stack in this instruction-specific code. + if instr.instr not in {"phi", "alloca", "call", "return"} and instr.instr not in arith_f1_instrs: ops = [self.compile_operand(op, context=context, into=i+1) for i, op in enumerate(instr.ops)] # ops will contain register numbers / immediate values for all operands @@ -295,24 +331,64 @@ def compile_instr(self, instr: ssa.Instruction, context: ssa.BasicBlock): elif instr.instr == "alloca": assert isinstance(instr.ops[0], ssa.ImmediateOp) # Dynamic memory allocation currently not supported sz = instr.ops[0].val - self.heap_height += sz + self.heap_height -= sz self.emit(INSTRUCTIONS.ADDI.make(self.RES_REG, self.ZERO_REG, self.heap_height), block=context.label) self.allocator.store(instr.i, self.RES_REG) elif instr.instr == "call": assert isinstance(instr.ops[0], ssa.FunctionOp) - if instr.ops[0].func == "inputNum": + # built-in functions + if instr.ops[0].func == "InputNum": self.emit(INSTRUCTIONS.RDD.make(self.RES_REG, 0, 0)) self.allocator.store(instr.i, self.RES_REG) return - elif instr.ops[0].func == "outputNum": - self.emit(INSTRUCTIONS.WRD.make(0, ops[1], 0)) + elif instr.ops[0].func == "OutputNum": + self.compile_operand(instr.ops[1], context=context, into=self.RES_REG) + self.emit(INSTRUCTIONS.WRD.make(0, self.RES_REG, 0)) + return + elif instr.ops[0].func == "OutputNewLine": + self.emit(INSTRUCTIONS.WRL.make(0, 0, 0)) return - raise NotImplementedError() + + # UPDATE STACK POINTER + # since we do not actually use our stack pointer (instead use absolute + # offsets above frame pointer) we need to update it here so the callee + # knows where to start writing its values on the stack. + n_callee_save = len(self.CALLEE_SAVE) + stack_height = self.allocator.stack_offsets[instr.i] + n_callee_save + self.emit(INSTRUCTIONS.ADDI.make(self.STACK_PTR_REG, self.FRAME_PTR_REG, -stack_height*self.WORD_SIZE)) # R29 = R28 - this func stack height + + # PUSH ARGUMENTS ONTO STACK + for arg in instr.ops[1:]: + arg_reg = self.compile_operand(arg, context=context, into=self.RES_REG) + self.emit(INSTRUCTIONS.PSH.make(arg_reg, self.STACK_PTR_REG, -self.WORD_SIZE)) + + # CALLER SAVED REGISTERS + # return address is the very last thing we write to our stack frame + # it is caller-saved, since the jump instruction will overwrite it! + self.emit(INSTRUCTIONS.PSH.make(self.RET_ADD_REG, self.STACK_PTR_REG, -self.WORD_SIZE)) + + # JUMP + # actual function call: jump to label, storing return address in R31 + dlx_instr = INSTRUCTIONS.JSR.make(0) + dlx_instr.jump_to_entry = instr.ops[0].func + self.emit(dlx_instr) + + # RESTORE CALLER SAVED REGISTERS + # after jump instruction: this is where we end up when the function returns + # hence, restore the return address from the top of our stack + self.emit(INSTRUCTIONS.POP.make(self.RET_ADD_REG, self.STACK_PTR_REG, +self.WORD_SIZE)) + + # STORE RETURN VALUE + # Values are passed back in register RES_REG + self.allocator.store(instr.i, self.RES_REG) elif instr.instr == "return": - raise NotImplementedError() + if instr.ops: # return values are passed in register RES_REG + self.compile_operand(instr.ops[0], context=context, into=self.RES_REG) + dlx_instr = INSTRUCTIONS.JSR.make(0) + dlx_instr.jump_to_exit = context.func.name elif instr.instr == "phi": pred_a, op_a, pred_b, op_b = instr.ops @@ -331,34 +407,58 @@ def compile_instr(self, instr: ssa.Instruction, context: ssa.BasicBlock): def link(self): super().link() for i, instr in enumerate(self.instrs): - if not instr.jump_label: + if not instr.jump_label and not instr.jump_to_entry and not instr.jump_to_exit: continue - if instr.jump_label not in self.block_offsets: + target = None + target_label = instr.jump_label + lookup_map = self.block_offsets + if instr.jump_to_entry: + target_label = instr.jump_to_entry + lookup_map = self.func_entry_offsets + elif instr.jump_to_exit: + target_label = instr.jump_to_exit + lookup_map = self.func_exit_offsets + if target_label not in lookup_map: raise Exception("Unknown symbol {}".format(instr.jump_label)) # Jump instructions have their target as arg 3 (c) ops = list(instr.ops) - ops[-1] = self.block_offsets[instr.jump_label] - i # (relative offset) + target = lookup_map[target_label] + if instr.opcode == INSTRUCTIONS.BSR.opcode: + target = target - i # (relative offset) + if instr.opcode == INSTRUCTIONS.JSR.opcode: + target *= self.WORD_SIZE + ops[-1] = target instr.ops = tuple(ops) self.instrs[i] = instr def emit_stack_load(self, offset, into, block=None, back=False): + """ + We actually use the frame pointer as the base address for our stack + loads and writes. This allows us to use "absolute" offsets within + the function, whereas the stack pointer may move to stay on top of + the stack. + """ emit_fun = self.emit if not back else self.emit_back + n_callee_save = len(self.CALLEE_SAVE) + offset += n_callee_save + 1 # adjust for the portion of the stack used for calle save registers in prologue # Memory is byte addressed and one word is four bytes emit_fun(INSTRUCTIONS.LDW.make(into, self.FRAME_PTR_REG, -offset*self.WORD_SIZE), block=block) def emit_stack_store(self, addr_offs, val_reg, block=None, back=False): emit_fun = self.emit if not back else self.emit_back + n_callee_save = len(self.CALLEE_SAVE) + addr_offs += n_callee_save + 1 emit_fun(INSTRUCTIONS.STW.make(val_reg, self.FRAME_PTR_REG, -addr_offs*self.WORD_SIZE), block=block) def emit_heap_load(self, addr_offs_reg, into, block=None, back=False): emit_fun = self.emit if not back else self.emit_back - emit_fun(INSTRUCTIONS.LDW.make(into, self.GLOBAL_MEM_PTR_REG, -addr_offs_reg), + emit_fun(INSTRUCTIONS.LDX.make(into, self.GLOBAL_MEM_PTR_REG, addr_offs_reg), block=block) def emit_heap_store(self, addr_offs_reg, val_reg): - self.emit(INSTRUCTIONS.STX.make(val_reg, self.GLOBAL_MEM_PTR_REG, -addr_offs_reg)) + self.emit(INSTRUCTIONS.STX.make(val_reg, self.GLOBAL_MEM_PTR_REG, addr_offs_reg)) def emit_move(self, from_reg, to_reg): self.emit(INSTRUCTIONS.ADDI.make(to_reg, from_reg, 0)) diff --git a/main.py b/main.py index b8e1995..dd99e90 100755 --- a/main.py +++ b/main.py @@ -79,10 +79,10 @@ def main(): backend.link() if args.asm: output.write(backend.get_asm().encode("ascii")) - elif args.run: + if args.run: dlx_emulator.DLX.load([instr.encode() for instr in backend.instrs]) dlx_emulator.DLX.execute() - else: + if not args.asm and not args.run: output.write(backend.get_machine_code()) diff --git a/ssa.py b/ssa.py index bc2b34d..9ecdb44 100644 --- a/ssa.py +++ b/ssa.py @@ -270,6 +270,7 @@ def emit(self, instr_index, instr_name, *args, produces_output=True, may_elimina # eliminated together or not at all, they must always appear in pairs. identical_adda = self.instrs[-1].find_dominating_identical() if identical_adda: + orig_ops = instr.ops instr.ops = (InstructionOp(identical_adda),) identical = instr.find_dominating_identical() if identical: @@ -277,6 +278,8 @@ def emit(self, instr_index, instr_name, *args, produces_output=True, may_elimina # thus eliminate both. del self.instrs[-1] return InstructionOp(identical) + else: + instr.ops = orig_ops instr.i = instr_index self.instrs.append(instr) self.dom_instr_tree[dominance_class] = instr @@ -481,7 +484,7 @@ def print_warnings(self): uninitialized_set = {op.name for block in self for instr in block.instrs - if instr.instr != "phi" + #if instr.instr != "phi" for op in instr.ops if isinstance(op, UninitializedVarOp)} for uninitialized in uninitialized_set: @@ -499,7 +502,7 @@ class CompilationContextIterator: def __init__(self, context: CompilationContext): self.context = context - self.todo = self.context.root_blocks.copy() + self.todo = list(reversed(self.context.root_blocks)) self.visited = set() def __next__(self): diff --git a/test_progs/array_fill_simple.smpl b/test_progs/array_fill_simple.smpl new file mode 100644 index 0000000..b69c2c3 --- /dev/null +++ b/test_progs/array_fill_simple.smpl @@ -0,0 +1,16 @@ +main +array[10] arr; +var v, i; +{ + let v <- 999; + let i <- 0; + while i < 10 do + let arr[i] <- v*i; + let i <- i + 1; + od; + let i <- 0; + while i < 10 do + call outputNum(arr[i]); + let i <- i + 1; + od; +}. \ No newline at end of file diff --git a/test_progs/function_call.smpl b/test_progs/function_call.smpl new file mode 100644 index 0000000..6d9293d --- /dev/null +++ b/test_progs/function_call.smpl @@ -0,0 +1,16 @@ +main +var out; + +function a(arga); +{ + return arga + call b(arga); +}; + +function b(arga); +{ + return arga + 2; +}; + +{ + let out <- call a(7); +}. diff --git a/test_progs/nested_function_calls.smpl b/test_progs/nested_function_calls.smpl new file mode 100644 index 0000000..d70a5d1 --- /dev/null +++ b/test_progs/nested_function_calls.smpl @@ -0,0 +1,44 @@ +main +array[4] arr; +var max; + +function minOrMax(returnMin, a1, a2, a3, a4); +array[4] arr; +var i, j, min, max; +{ + let arr[0] <- a1; + let arr[1] <- a2; + let arr[2] <- a3; + let arr[3] <- a4; + let i <- 1; + let j <- 0; + let max <- arr[0]; + let min <- arr[0]; + while i < 4 do + if arr[i] > max then + let j <- i; + let max <- arr[i]; + else + if arr[i] < min then + let j <- i; + let min <- arr[i]; + fi; + fi; + let i <- i + 1; + od; + if returnMin == 1 then + return min; + else + return max; + fi; +}; + +{ + let arr[0] <- call inputNum(); + let arr[1] <- call inputNum(); + let arr[2] <- call inputNum(); + let arr[3] <- call inputNum(); + let max <- call minOrMax(0, arr[0], arr[1], arr[2], arr[3]); + call outputNum(max); + return max +}.