From d15c911cacd4ba2877cc163afc12b8924b59212b Mon Sep 17 00:00:00 2001 From: sadullah Date: Fri, 11 Oct 2019 12:39:43 -0400 Subject: [PATCH] BlackParrot initial commit w/ Litex BIOS simulation including LiteDRAM w/ Litex BIOS working on FPGA excluding LiteDRAM --- .gitmodules | 3 + litex/soc/cores/cpu/__init__.py | 2 + litex/soc/cores/cpu/blackparrot/README.md | 12 + litex/soc/cores/cpu/blackparrot/__init__.py | 1 + .../bp_fpga/ExampleBlackParrotSystem.v | 434 ++++++++++++++++++ .../cpu/blackparrot/bp_fpga/bp2wb_convertor.v | 214 +++++++++ .../bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v | 55 +++ .../bp_hardware/bp_cce_mmio_cfg_loader.v | 231 ++++++++++ .../blackparrot/bp_hardware/bp_common_pkg.vh | 55 +++ .../bp_hardware/bp_nonsynth_host.v | 190 ++++++++ .../cpu/blackparrot/bp_software/cce_ucode.mem | 96 ++++ .../cpu/blackparrot/bp_software/udivmoddi4.c | 358 +++++++++++++++ litex/soc/cores/cpu/blackparrot/core.py | 174 +++++++ .../cpu/blackparrot/flist_litex.verilator | 228 +++++++++ .../cores/cpu/blackparrot/pre-alpha-release | 1 + .../cores/cpu/blackparrot/setEnvironment.sh | 97 ++++ litex/soc/cores/cpu/blackparrot/update_BP.sh | 17 + litex/soc/software/bios/Makefile | 10 +- .../software/bios/boot-helper-blackparrot.S | 4 + litex/soc/software/bios/isr.c | 15 +- litex/soc/software/bios/main.c | 10 +- litex/soc/software/bios/sdram.c | 2 + litex/soc/software/include/base/irq.h | 25 + litex/soc/software/include/base/system.h | 3 +- litex/soc/software/libbase/crt0-blackparrot.S | 77 ++++ litex/soc/software/libbase/system.c | 7 + 26 files changed, 2313 insertions(+), 8 deletions(-) create mode 100644 litex/soc/cores/cpu/blackparrot/README.md create mode 100644 litex/soc/cores/cpu/blackparrot/__init__.py create mode 100644 litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v create mode 100644 litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v create mode 100644 litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v create mode 100644 litex/soc/cores/cpu/blackparrot/bp_hardware/bp_cce_mmio_cfg_loader.v create mode 100644 litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh create mode 100644 litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v create mode 100644 litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem create mode 100644 litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c create mode 100644 litex/soc/cores/cpu/blackparrot/core.py create mode 100644 litex/soc/cores/cpu/blackparrot/flist_litex.verilator create mode 160000 litex/soc/cores/cpu/blackparrot/pre-alpha-release create mode 100755 litex/soc/cores/cpu/blackparrot/setEnvironment.sh create mode 100755 litex/soc/cores/cpu/blackparrot/update_BP.sh create mode 100644 litex/soc/software/bios/boot-helper-blackparrot.S create mode 100644 litex/soc/software/libbase/crt0-blackparrot.S diff --git a/.gitmodules b/.gitmodules index 509b98ac29..1a2bf3a312 100644 --- a/.gitmodules +++ b/.gitmodules @@ -25,3 +25,6 @@ [submodule "litex/soc/cores/cpu/microwatt/sources"] path = litex/soc/cores/cpu/microwatt/sources url = https://github.com/antonblanchard/microwatt +[submodule "litex/soc/cores/cpu/blackparrot/pre-alpha-release"] + path = litex/soc/cores/cpu/blackparrot/pre-alpha-release + url = https://github.com/black-parrot/pre-alpha-release.git diff --git a/litex/soc/cores/cpu/__init__.py b/litex/soc/cores/cpu/__init__.py index afcf6a1723..1503dfff09 100644 --- a/litex/soc/cores/cpu/__init__.py +++ b/litex/soc/cores/cpu/__init__.py @@ -33,6 +33,7 @@ class CPUNone(CPU): from litex.soc.cores.cpu.minerva import Minerva from litex.soc.cores.cpu.rocket import RocketRV64 from litex.soc.cores.cpu.microwatt import Microwatt +from litex.soc.cores.cpu.blackparrot import BlackParrotRV64 CPUS = { "lm32" : LM32, @@ -42,6 +43,7 @@ class CPUNone(CPU): "minerva" : Minerva, "rocket" : RocketRV64, "microwatt" : Microwatt, + "blackparrot" : BlackParrotRV64, } # CPU Variants/Extensions Definition --------------------------------------------------------------- diff --git a/litex/soc/cores/cpu/blackparrot/README.md b/litex/soc/cores/cpu/blackparrot/README.md new file mode 100644 index 0000000000..d320d4b457 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/README.md @@ -0,0 +1,12 @@ +TODO: Edit +git submodule update --init --recursive (for blackparrot pre-alpha repo) +cd pre_alpha_release +follow getting_started to install blackparrot +cd .. +source ./setEnvironment.sh #should be sourced each time you open a terminal or just add this line to bashrc +Add $BP_TOP/external/bin to $PATH for verilator and riscv-gnu tools +./update_BP.sh #to modify some of the files in Blackparrot repo (one-time process) +Currently, we could simulate the LITEX-BIOS on BP processor. + +[![asciicast](https://asciinema.org/a/286568.svg)](https://asciinema.org/a/286568) + diff --git a/litex/soc/cores/cpu/blackparrot/__init__.py b/litex/soc/cores/cpu/blackparrot/__init__.py new file mode 100644 index 0000000000..05c9fcee48 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/__init__.py @@ -0,0 +1 @@ +from litex.soc.cores.cpu.blackparrot.core import BlackParrotRV64 diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v new file mode 100644 index 0000000000..cdd1a95e07 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v @@ -0,0 +1,434 @@ +/** + * + * ExampleBlackParrotSystem For Simulating With Litex + * + */ + +`include "bsg_noc_links.vh" + +module ExampleBlackParrotSystem + import bp_common_pkg::*; + import bp_common_aviary_pkg::*; + import bp_be_pkg::*; + import bp_common_rv64_pkg::*; + import bp_cce_pkg::*; + import bp_cfg_link_pkg::*; + #(parameter bp_cfg_e cfg_p = e_bp_single_core_cfg // Replaced by the flow with a specific bp_cfg + `declare_bp_proc_params(cfg_p) + `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p) + + // Tracing parameters + , parameter calc_trace_p = 1 + , parameter cce_trace_p = 0 + , parameter cmt_trace_p = 0 + , parameter dram_trace_p = 0 + , parameter skip_init_p = 0 + + , parameter mem_load_p = 1 + , parameter mem_file_p = "prog.mem" + , parameter mem_cap_in_bytes_p = 2**20 + , parameter [paddr_width_p-1:0] mem_offset_p = paddr_width_p'(32'h8000_0000) + + ) + (input clk_i + , input reset_i + //Wishbone interface + , input [63:0] wbm_dat_i + , output [63:0] wbm_dat_o + , input wbm_ack_i + // , input wbm_err_i + // , input wbm_rty_i + , output [36:0] wbm_adr_o //TODO parametrize this + , output wbm_stb_o + , output wbm_cyc_o + , output wbm_sel_o //TODO: how many bits ? check 3.5 table 3-1 + , output wbm_we_o + , output [2:0] wbm_cti_o //TODO: hardwire in Litex + , output [1:0] wbm_bte_o //TODO: hardwire in Litex + , output all_finished_debug_o //SC_add + , output core_passed_debug + , output core_failed_debug + , input [3:0] interrupts + ); + +`declare_bsg_ready_and_link_sif_s(mem_noc_flit_width_p, bsg_ready_and_link_sif_s); +`declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p) + +bsg_ready_and_link_sif_s cmd_link_li, cmd_link_lo; +bsg_ready_and_link_sif_s resp_link_li, resp_link_lo; + +bsg_ready_and_link_sif_s mem_cmd_link_li, mem_cmd_link_lo, mem_resp_link_li, mem_resp_link_lo; +bsg_ready_and_link_sif_s cfg_cmd_link_li, cfg_cmd_link_lo, cfg_resp_link_li, cfg_resp_link_lo; + +assign mem_cmd_link_li = cmd_link_li; +assign cfg_cmd_link_li = '{ready_and_rev: cmd_link_li.ready_and_rev, default: '0}; +assign cmd_link_lo = '{data: cfg_cmd_link_lo.data + ,v : cfg_cmd_link_lo.v + ,ready_and_rev: mem_cmd_link_lo.ready_and_rev + }; + +assign mem_resp_link_li = '{ready_and_rev: resp_link_li.ready_and_rev, default: '0}; +assign cfg_resp_link_li = resp_link_li; +assign resp_link_lo = '{data: mem_resp_link_lo.data + ,v : mem_resp_link_lo.v + ,ready_and_rev: cfg_resp_link_lo.ready_and_rev + }; + +bp_cce_mem_msg_s mem_resp_li; +logic mem_resp_v_li, mem_resp_ready_lo; +bp_cce_mem_msg_s mem_cmd_lo; +logic mem_cmd_v_lo, mem_cmd_yumi_li; + +bp_cce_mem_msg_s dram_resp_lo; +logic dram_resp_v_lo, dram_resp_ready_li; +bp_cce_mem_msg_s dram_cmd_li; +logic dram_cmd_v_li, dram_cmd_yumi_lo; + +bp_cce_mem_msg_s host_resp_lo; +logic host_resp_v_lo, host_resp_ready_li; +bp_cce_mem_msg_s host_cmd_li; +logic host_cmd_v_li, host_cmd_yumi_lo; + +bp_cce_mem_msg_s cfg_cmd_lo; +logic cfg_cmd_v_lo, cfg_cmd_ready_li; +bp_cce_mem_msg_s cfg_resp_li; +logic cfg_resp_v_li, cfg_resp_ready_lo; + +logic [mem_noc_cord_width_p-1:0] dram_cord_lo, mmio_cord_lo, host_cord_lo; +logic [num_core_p-1:0][mem_noc_cord_width_p-1:0] tile_cord_lo; +logic [num_mem_p-1:0][mem_noc_cord_width_p-1:0] mem_cord_lo; + +assign mmio_cord_lo[0+:mem_noc_x_cord_width_p] = mmio_x_pos_p; +assign mmio_cord_lo[mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = '0; +assign dram_cord_lo[0+:mem_noc_x_cord_width_p] = mem_noc_x_dim_p+2; +assign dram_cord_lo[mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = '0; +assign host_cord_lo[0+:mem_noc_x_cord_width_p] = mem_noc_x_dim_p+2; +assign host_cord_lo[mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = '0; + +for (genvar j = 0; j < mem_noc_y_dim_p; j++) + begin : y + for (genvar i = 0; i < mem_noc_x_dim_p; i++) + begin : x + localparam idx = j*mem_noc_x_dim_p + i; + assign tile_cord_lo[idx][0+:mem_noc_x_cord_width_p] = i+1; + assign tile_cord_lo[idx][mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = j+1; + end + end +for (genvar i = 0; i < num_mem_p; i++) + begin : x + assign mem_cord_lo[i][0+:mem_noc_x_cord_width_p] = i; + assign mem_cord_lo[i][mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = '0; + end + +// Chip +bp_chip + #(.cfg_p(cfg_p)) + chip + (.core_clk_i(clk_i) + ,.core_reset_i(reset_i) + + ,.coh_clk_i(clk_i) + ,.coh_reset_i(reset_i) + + ,.mem_clk_i(clk_i) + ,.mem_reset_i(reset_i) + + ,.mem_cord_i(mem_cord_lo) + ,.tile_cord_i(tile_cord_lo) + ,.dram_cord_i(dram_cord_lo) + ,.mmio_cord_i(mmio_cord_lo) + ,.host_cord_i(host_cord_lo) + + ,.prev_cmd_link_i('0) + ,.prev_cmd_link_o() + + ,.prev_resp_link_i('0) + ,.prev_resp_link_o() + + ,.next_cmd_link_i(cmd_link_lo) + ,.next_cmd_link_o(cmd_link_li) + + ,.next_resp_link_i(resp_link_lo) + ,.next_resp_link_o(resp_link_li) + ); + + bind bp_be_top + bp_nonsynth_commit_tracer + #(.cfg_p(cfg_p)) + commit_tracer + (.clk_i(clk_i & (ExampleBlackParrotSystem.cmt_trace_p == 1)) + ,.reset_i(reset_i) + + ,.mhartid_i('0) + + ,.commit_v_i(be_calculator.instret_mem3_o) + ,.commit_pc_i(be_calculator.pc_mem3_o) + ,.commit_instr_i(be_calculator.instr_mem3_o) + + ,.rd_w_v_i(be_calculator.int_regfile.rd_w_v_i) + ,.rd_addr_i(be_calculator.int_regfile.rd_addr_i) + ,.rd_data_i(be_calculator.int_regfile.rd_data_i) + ); + + + bind bp_be_top + bp_be_nonsynth_tracer + #(.cfg_p(cfg_p)) + tracer + // Workaround for verilator binding by accident + // TODO: Figure out why tracing is always enabled + (.clk_i(clk_i & (ExampleBlackParrotSystem.calc_trace_p == 1)) + ,.reset_i(reset_i) + + ,.mhartid_i(be_calculator.proc_cfg.core_id) + + ,.issue_pkt_i(be_calculator.issue_pkt) + ,.issue_pkt_v_i(be_calculator.issue_pkt_v_i) + + ,.fe_nop_v_i(be_calculator.fe_nop_v) + ,.be_nop_v_i(be_calculator.be_nop_v) + ,.me_nop_v_i(be_calculator.me_nop_v) + ,.dispatch_pkt_i(be_calculator.dispatch_pkt) + + ,.ex1_br_tgt_i(be_calculator.calc_status.int1_br_tgt) + ,.ex1_btaken_i(be_calculator.calc_status.int1_btaken) + ,.iwb_result_i(be_calculator.comp_stage_n[3]) + ,.fwb_result_i(be_calculator.comp_stage_n[4]) + + ,.cmt_trace_exc_i(be_calculator.exc_stage_n[1+:5]) + + ,.trap_v_i(be_mem.csr.trap_v_o) + ,.mtvec_i(be_mem.csr.mtvec_n) + ,.mtval_i(be_mem.csr.mtval_n[0+:vaddr_width_p]) + ,.ret_v_i(be_mem.csr.ret_v_o) + ,.mepc_i(be_mem.csr.mepc_n[0+:vaddr_width_p]) + ,.mcause_i(be_mem.csr.mcause_n) + + ,.priv_mode_i(be_mem.csr.priv_mode_n) + ,.mpp_i(be_mem.csr.mstatus_n.mpp) + ); + +/*bind bp_be_top + bp_be_nonsynth_perf + #(.cfg_p(cfg_p)) + perf + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.mhartid_i(be_calculator.proc_cfg.core_id) + + ,.fe_nop_i(be_calculator.exc_stage_r[2].fe_nop_v) + ,.be_nop_i(be_calculator.exc_stage_r[2].be_nop_v) + ,.me_nop_i(be_calculator.exc_stage_r[2].me_nop_v) + ,.poison_i(be_calculator.exc_stage_r[2].poison_v) + ,.roll_i(be_calculator.exc_stage_r[2].roll_v) + ,.instr_cmt_i(be_calculator.calc_status.mem3_cmt_v) + + ,.program_finish_i(testbench.program_finish) + ); +*/ +/*if (dram_trace_p) + bp_mem_nonsynth_tracer + #(.cfg_p(cfg_p)) + bp_mem_tracer + (.clk_i(clk_i & (testbench.dram_trace_p == 1)) + ,.reset_i(reset_i) + + ,.mem_cmd_i(dram_cmd_li) + ,.mem_cmd_v_i(dram_cmd_v_li) + ,.mem_cmd_yumi_i(dram_cmd_yumi_lo) + + ,.mem_resp_i(dram_resp_lo) + ,.mem_resp_v_i(dram_resp_v_lo) + ,.mem_resp_ready_i(dram_resp_ready_li) + ); + +if (cce_trace_p) + bind bp_cce_top + bp_cce_nonsynth_tracer + #(.cfg_p(cfg_p)) + bp_cce_tracer + (.clk_i(clk_i & (testbench.cce_trace_p == 1)) + ,.reset_i(reset_i) + + ,.cce_id_i(cce_id_i) + + // To CCE + ,.lce_req_i(lce_req_to_cce) + ,.lce_req_v_i(lce_req_v_to_cce) + ,.lce_req_yumi_i(lce_req_yumi_from_cce) + ,.lce_resp_i(lce_resp_to_cce) + ,.lce_resp_v_i(lce_resp_v_to_cce) + ,.lce_resp_yumi_i(lce_resp_yumi_from_cce) + + // From CCE + ,.lce_cmd_i(lce_cmd_o) + ,.lce_cmd_v_i(lce_cmd_v_o) + ,.lce_cmd_ready_i(lce_cmd_ready_i) + + // To CCE + ,.mem_resp_i(mem_resp_to_cce) + ,.mem_resp_v_i(mem_resp_v_to_cce) + ,.mem_resp_yumi_i(mem_resp_yumi_from_cce) + + // From CCE + ,.mem_cmd_i(mem_cmd_from_cce) + ,.mem_cmd_v_i(mem_cmd_v_from_cce) + ,.mem_cmd_ready_i(mem_cmd_ready_to_cce) + ); +*/ +// DRAM + link +bp_me_cce_to_wormhole_link_client + #(.cfg_p(cfg_p)) + client_link + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.mem_cmd_o(mem_cmd_lo) + ,.mem_cmd_v_o(mem_cmd_v_lo) + ,.mem_cmd_yumi_i(mem_cmd_yumi_li) + + ,.mem_resp_i(mem_resp_li) + ,.mem_resp_v_i(mem_resp_v_li) + ,.mem_resp_ready_o(mem_resp_ready_lo) + + ,.my_cord_i(dram_cord_lo) + ,.my_cid_i(mem_noc_cid_width_p'(0)) + + ,.cmd_link_i(mem_cmd_link_li) + ,.cmd_link_o(mem_cmd_link_lo) + + ,.resp_link_i(mem_resp_link_li) + ,.resp_link_o(mem_resp_link_lo) + ); + +bp2wb_convertor + #(.cfg_p(cfg_p)) +bp2wb + (.clk_i(clk_i) + ,.reset_i(reset_i) + ,.mem_cmd_i(dram_cmd_li) + ,.mem_cmd_v_i(dram_cmd_v_li) + ,.mem_cmd_yumi_o(dram_cmd_yumi_lo) + ,.mem_resp_o(dram_resp_lo) + ,.mem_resp_v_o(dram_resp_v_lo) + ,.mem_resp_ready_i(dram_resp_ready_li) + ,.dat_i(wbm_dat_i) + ,.dat_o(wbm_dat_o) + ,.ack_i(wbm_ack_i) + ,.adr_o(wbm_adr_o) + ,.stb_o(wbm_stb_o) + ,.cyc_o(wbm_cyc_o) + ,.sel_o(wbm_sel_o ) + ,.we_o(wbm_we_o) + ,.cti_o(wbm_cti_o) + ,.bte_o(wbm_bte_o ) + ); + +logic [num_core_p-1:0] program_finish; + +bp_nonsynth_host + #(.cfg_p(cfg_p)) + host_mmio + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.mem_cmd_i(host_cmd_li) + ,.mem_cmd_v_i(host_cmd_v_li) + ,.mem_cmd_yumi_o(host_cmd_yumi_lo) + + ,.mem_resp_o(host_resp_lo) + ,.mem_resp_v_o(host_resp_v_lo) + ,.mem_resp_ready_i(host_resp_ready_li) + + ,.program_finish_o(program_finish) + ,.all_finished_debug_o(all_finished_debug_o) + ,.core_passed_debug(core_passed_debug) + ,.core_failed_debug(core_failed_debug) + ); + +/*bp_nonsynth_if_verif + #(.cfg_p(cfg_p)) + if_verif + (); +*/ +// MMIO arbitration +// Should this be on its own I/O router? +logic req_outstanding_r; +bsg_dff_reset_en + #(.width_p(1)) + req_outstanding_reg + (.clk_i(clk_i) + ,.reset_i(reset_i) + ,.en_i(mem_cmd_yumi_li | mem_resp_v_li) + + ,.data_i(mem_cmd_yumi_li) + ,.data_o(req_outstanding_r) + ); + +wire host_cmd_not_dram = mem_cmd_v_lo & (mem_cmd_lo.addr < 39'h00_4000_0000 );//dram_base_addr_gp + +assign host_cmd_li = mem_cmd_lo; +assign host_cmd_v_li = mem_cmd_v_lo & host_cmd_not_dram & ~req_outstanding_r; +assign dram_cmd_li = mem_cmd_lo; +assign dram_cmd_v_li = mem_cmd_v_lo & ~host_cmd_not_dram & ~req_outstanding_r; +assign mem_cmd_yumi_li = host_cmd_not_dram + ? host_cmd_yumi_lo + : dram_cmd_yumi_lo; + +assign mem_resp_li = host_resp_v_lo ? host_resp_lo : dram_resp_lo; +assign mem_resp_v_li = host_resp_v_lo | dram_resp_v_lo; +assign host_resp_ready_li = mem_resp_ready_lo; +assign dram_resp_ready_li = mem_resp_ready_lo; + +// CFG loader + rom + link +bp_me_cce_to_wormhole_link_master + #(.cfg_p(cfg_p)) + master_link + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.mem_cmd_i(cfg_cmd_lo) + ,.mem_cmd_v_i(cfg_cmd_ready_li & cfg_cmd_v_lo) + ,.mem_cmd_ready_o(cfg_cmd_ready_li) + + ,.mem_resp_o(cfg_resp_li) + ,.mem_resp_v_o(cfg_resp_v_li) + ,.mem_resp_yumi_i(cfg_resp_ready_lo & cfg_resp_v_li) + + ,.my_cord_i(dram_cord_lo) + ,.my_cid_i(mem_noc_cid_width_p'(0)) + ,.dram_cord_i(dram_cord_lo) + ,.mmio_cord_i(mmio_cord_lo) + ,.host_cord_i(host_cord_lo) + + ,.cmd_link_i(cfg_cmd_link_li) + ,.cmd_link_o(cfg_cmd_link_lo) + + ,.resp_link_i(cfg_resp_link_li) + ,.resp_link_o(cfg_resp_link_lo) + ); + +localparam cce_instr_ram_addr_width_lp = `BSG_SAFE_CLOG2(num_cce_instr_ram_els_p); +bp_cce_mmio_cfg_loader + #(.cfg_p(cfg_p) + ,.inst_width_p(`bp_cce_inst_width) + ,.inst_ram_addr_width_p(cce_instr_ram_addr_width_lp) + ,.inst_ram_els_p(num_cce_instr_ram_els_p) + ,.skip_ram_init_p(skip_init_p) + ) + cfg_loader + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.mem_cmd_o(cfg_cmd_lo) + ,.mem_cmd_v_o(cfg_cmd_v_lo) + ,.mem_cmd_yumi_i(cfg_cmd_ready_li & cfg_cmd_v_lo) + + ,.mem_resp_i(cfg_resp_li) + ,.mem_resp_v_i(cfg_resp_v_li) + ,.mem_resp_ready_o(cfg_resp_ready_lo) + ); + +endmodule + diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v new file mode 100644 index 0000000000..3780fb8ced --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v @@ -0,0 +1,214 @@ +/** + * bp2wb_convertor.v + * DESCRIPTION: THIS MODULE ADAPTS BP MEMORY BUS TO 64-BIT WISHBONE + */ + +module bp2wb_convertor + import bp_common_pkg::*; + import bp_common_aviary_pkg::*; + import bp_cce_pkg::*; + import bp_me_pkg::*; + #(parameter bp_cfg_e cfg_p = e_bp_single_core_cfg + `declare_bp_proc_params(cfg_p) + `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p) + +// , parameter [paddr_width_p-1:0] dram_offset_p = '0 + , localparam num_block_words_lp = cce_block_width_p / 64 + , localparam num_block_bytes_lp = cce_block_width_p / 8 + , localparam num_word_bytes_lp = dword_width_p / 8 + , localparam block_offset_bits_lp = `BSG_SAFE_CLOG2(num_block_bytes_lp) + , localparam word_offset_bits_lp = `BSG_SAFE_CLOG2(num_block_words_lp) + , localparam byte_offset_bits_lp = `BSG_SAFE_CLOG2(num_word_bytes_lp) + , localparam wbone_data_width = 64 + , localparam wbone_addr_ubound = paddr_width_p + , localparam mem_granularity = 64 //TODO: adapt selection bit parametrized + , localparam wbone_addr_lbound = 3 //`BSG_SAFE_CLOG2(wbone_data_width / mem_granularity) //dword granularity + , localparam total_datafetch_cycle_lp = cce_block_width_p / wbone_data_width + , localparam total_datafetch_cycle_width = `BSG_SAFE_CLOG2(total_datafetch_cycle_lp) + , localparam cached_addr_base = 32'h4000_4000// 32'h5000_0000 + ) + (input clk_i + ,(* mark_debug = "true" *) input reset_i + + // BP side + ,(* mark_debug = "true" *) input [cce_mem_msg_width_lp-1:0] mem_cmd_i + ,(* mark_debug = "true" *) input mem_cmd_v_i + ,(* mark_debug = "true" *) output mem_cmd_yumi_o + + , (* mark_debug = "true" *) output [cce_mem_msg_width_lp-1:0] mem_resp_o + , (* mark_debug = "true" *) output mem_resp_v_o + , (* mark_debug = "true" *) input mem_resp_ready_i + + // Wishbone side + , (* mark_debug = "true" *) input [63:0] dat_i + , (* mark_debug = "true" *) output logic [63:0] dat_o + , (* mark_debug = "true" *) input ack_i + // , input err_i + // , input rty_i + , (* mark_debug = "true" *) output logic [wbone_addr_ubound-wbone_addr_lbound-1:0] adr_o//TODO: Double check!!! + , (* mark_debug = "true" *) output logic stb_o + , output cyc_o + , output sel_o //TODO: double check!!! + , (* mark_debug = "true" *) output we_o + , output [2:0] cti_o //TODO: hardwire in Litex + , output [1:0] bte_o //TODO: hardwire in Litex + + ); + + `declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p); + + //locals + (* mark_debug = "true" *) logic [total_datafetch_cycle_width:0] ack_ctr = 0; + (* mark_debug = "true" *) bp_cce_mem_msg_s mem_cmd_cast_i, mem_resp_cast_o, mem_cmd_r; + (* mark_debug = "true" *) logic ready_li, v_li, stb_justgotack; + (* mark_debug = "true" *) logic [cce_block_width_p-1:0] data_lo; + (* mark_debug = "true" *) logic [cce_block_width_p-1:0] data_li; + (* mark_debug = "true" *) wire [paddr_width_p-1:0] mem_cmd_addr_l; + (* mark_debug = "true" *) logic [paddr_width_p-1:0] addr_lo; + (* mark_debug = "true" *) logic set_stb; + (* mark_debug = "true" *) wire [63:0] data_little_end; + + + //reset + //TODO: reset ack_ctr here + //Handshaking between Wishbone and BlackParrot through convertor + //3.1.3:At every rising edge of [CLK_I] the terminating signal(ACK) is sampled. If it + //is asserted, then [STB_O] is negated. + + assign ready_li = ( ack_ctr == 0 ); + assign mem_cmd_yumi_o = mem_cmd_v_i && ready_li;//!stb_o then ready to take! + // assign v_li = (ack_ctr == total_datafetch_cycle_lp-1); + assign mem_resp_v_o = mem_resp_ready_i & v_li; + assign stb_o = (set_stb) && !stb_justgotack; //addresi mem_cmd_rdan aldigimiz icin 1 cycle geriden geliyo + assign cyc_o = stb_o; + assign sel_o = 0; + assign cti_o = 0; + assign bte_o = 0; + + initial begin + ack_ctr = 0; + //stb_reset_lo =0; + end + +/* always_ff @(posedge clk_i) + if ( mem_cmd_yumi_o )// || (ack_ctr > 0)) + begin + data_li <= 0; + set_stb <= 1; + end +*/ + + +//Flip stb after each ack--->RULE 3.20: + +// Every time we get an ACK from WB, increment counter until the counter reaches to total_datafetch_cycle_lp +assign data_little_end = dat_i; + always_ff @(posedge clk_i) + begin + + if(reset_i) + begin + ack_ctr <= 0; + set_stb <= 0; + v_li <=0; + end + + else if (mem_cmd_yumi_o) + begin + data_li <= 0; + set_stb <= 1; + v_li <= 0; + stb_justgotack <= 0; + end + + else + begin + if (ack_i)//stb should be negated after ack + begin + stb_justgotack <= 1; + data_li[(ack_ctr*wbone_data_width) +: wbone_data_width] <= data_little_end; + if ((ack_ctr == total_datafetch_cycle_lp-1) || (mem_cmd_addr_l < cached_addr_base && mem_cmd_r.msg_type == e_cce_mem_uc_wr )) //if uncached store, just one cycle is fine + begin + ack_ctr <= 0; + v_li <=1; + set_stb <= 0; + end + else + ack_ctr <= ack_ctr + 1; + end + else + begin + stb_justgotack <= 0; + v_li <=0; + end + end + end + + //Packet Pass from BP to BP2WB + assign mem_cmd_cast_i = mem_cmd_i; + + bsg_dff_reset_en + #(.width_p(cce_mem_msg_width_lp)) + mshr_reg + (.clk_i(clk_i) + ,.reset_i(reset_i) + ,.en_i(mem_cmd_yumi_o)//when + ,.data_i(mem_cmd_i) + ,.data_o(mem_cmd_r) + ); + + + //Addr && Data && Command Pass from BP2WB to WB + logic [wbone_addr_lbound-1:0] throw_away; + assign mem_cmd_addr_l = mem_cmd_r.addr; + assign data_lo = mem_cmd_r.data; + logic [39:0] mem_cmd_addr_l_zero64; + logic [7:0] partial; + always_comb begin + if( mem_cmd_addr_l < cached_addr_base ) + begin + adr_o = mem_cmd_addr_l[wbone_addr_ubound-1:wbone_addr_lbound];//no need to change address for uncached stores/loads + dat_o = data_lo[(0*wbone_data_width) +: wbone_data_width];//unchached data is stored in LS 64bits + end + + else + begin + mem_cmd_addr_l_zero64 = mem_cmd_addr_l >> 6 << 6; + // addr_lo = + {adr_o,throw_away} = mem_cmd_addr_l_zero64 + (ack_ctr*8);//TODO:careful + // adr_o = addr_lo[wbone_addr_ubound-1:wbone_addr_lbound]; + dat_o = data_lo[(ack_ctr*wbone_data_width) +: wbone_data_width]; + end + end + + assign we_o = (mem_cmd_r.msg_type inside {e_cce_mem_uc_wr, e_cce_mem_wb}); + +//DEBUG + +wire [3:0] typean; +assign typean = mem_cmd_r.msg_type; +wire [2:0] debug1; +assign debug1 = (mem_cmd_r.addr[5:0]>>3); + +//Data Pass from BP2WB to BP + +wire [cce_block_width_p-1:0] rd_word_offset = mem_cmd_r.addr[3+:3]; +//wire [cce_block_width_p-1:0] rd_byte_offset = mem_cmd_r.addr[0+:3]; +wire [cce_block_width_p-1:0] rd_bit_shift = rd_word_offset*64; // We rely on receiver to adjust bits + +wire [cce_block_width_p-1:0] data_li_resp = (mem_cmd_r.msg_type == e_cce_mem_uc_rd) + ? data_li >> rd_bit_shift + : data_li; + +assign mem_resp_cast_o = '{data : data_li_resp + ,payload : mem_cmd_r.payload + ,size : mem_cmd_r.size + ,addr : mem_cmd_r.addr + ,msg_type: mem_cmd_r.msg_type + }; + +assign mem_resp_o = mem_resp_cast_o; + + +endmodule + diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v new file mode 100644 index 0000000000..a6fdae9a60 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v @@ -0,0 +1,55 @@ +/* +* bsg_mem_1rw_sync_mask_write_bit.v +* +* distributed synchronous 1-port ram for xilinx ultrascale or ultrascale plus FPGA +* Write mode: No-change | Read mode: No-change +* Note: +* There are 2 basic BRAM library primitives, RAMB18E2 and RAMB36E2 in Vivado. +* But none of them support bit-wise mask. They have Byte-wide write enable ports though. +* So we use the RAM_STYLE attribute to instruct the tool to infer distributed LUT RAM instead. +* +* To save resources, the code is written to be inferred as Signle-port distributed ram RAM64X1S. +* https://www.xilinx.com/support/documentation/user_guides/ug574-ultrascale-clb.pdf +* +*/ + + +module bsg_mem_1rw_sync_mask_write_bit #( + parameter width_p = "inv" + , parameter els_p = "inv" + , parameter latch_last_read_p=0 + , parameter enable_clock_gating_p=0 + , localparam addr_width_lp = `BSG_SAFE_CLOG2(els_p) +) ( + input clk_i + , input reset_i + , input [ width_p-1:0] data_i + , input [addr_width_lp-1:0] addr_i + , input v_i + , input [ width_p-1:0] w_mask_i + , input w_i + , output [ width_p-1:0] data_o +); + + wire unused = reset_i; + + (* ram_style = "distributed" *) logic [width_p-1:0] mem [els_p-1:0]; + + logic [width_p-1:0] data_r; + always_ff @(posedge clk_i) begin + if (v_i & ~w_i) + data_r <= mem[addr_i]; + end + + assign data_o = data_r; + + for (genvar i=0; i> 1'b1; + + always_comb + begin + mem_cmd_v_o = cfg_v_lo; + + // uncached store + mem_cmd_cast_o.msg_type = e_cce_mem_uc_wr; + mem_cmd_cast_o.addr = bp_cfg_base_addr_gp; + mem_cmd_cast_o.payload = '0; + mem_cmd_cast_o.size = e_mem_size_8; + mem_cmd_cast_o.data = cce_block_width_p'({cfg_core_lo, cfg_addr_lo, cfg_data_lo}); + end + + always_comb + begin + ucode_cnt_clr = 1'b0; + ucode_cnt_inc = 1'b0; + + cfg_v_lo = '0; + cfg_core_lo = 8'hff; + cfg_addr_lo = '0; + cfg_data_lo = '0; + + case (state_r) + RESET: begin + state_n = skip_ram_init_p ? BP_FREEZE_CLR : BP_RESET_SET; + + ucode_cnt_clr = 1'b1; + end + BP_RESET_SET: begin + state_n = BP_FREEZE_SET; + + cfg_v_lo = 1'b1; + cfg_addr_lo = bp_cfg_reg_reset_gp; + cfg_data_lo = cfg_data_width_p'(1); + end + BP_FREEZE_SET: begin + state_n = BP_RESET_CLR; + + cfg_v_lo = 1'b1; + cfg_addr_lo = bp_cfg_reg_freeze_gp; + cfg_data_lo = cfg_data_width_p'(1); + end + BP_RESET_CLR: begin + state_n = SEND_RAM_LO; + + cfg_v_lo = 1'b1; + cfg_addr_lo = bp_cfg_reg_reset_gp; + cfg_data_lo = cfg_data_width_p'(0); + end + SEND_RAM_LO: begin + state_n = SEND_RAM_HI; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_mem_base_cce_ucode_gp) + (ucode_cnt_r << 1); + cfg_data_lo = cce_inst_boot_rom_data[0+:cfg_data_width_p]; + // TODO: This is nonsynth, won't work on FPGA + cfg_data_lo = (|cfg_data_lo === 'X) ? '0 : cfg_data_lo; + end + SEND_RAM_HI: begin + state_n = ucode_prog_done ? SEND_CCE_NORMAL : SEND_RAM_LO; + + ucode_cnt_inc = 1'b1; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_mem_base_cce_ucode_gp) + (ucode_cnt_r << 1) + 1'b1; + cfg_data_lo = cfg_data_width_p'(cce_inst_boot_rom_data[inst_width_p-1:cfg_data_width_p]); + // TODO: This is nonsynth, won't work on FPGA + cfg_data_lo = (|cfg_data_lo === 'X) ? '0 : cfg_data_lo; + end + SEND_CCE_NORMAL: begin + state_n = SEND_ICACHE_NORMAL; + + cfg_v_lo = 1'b1; + cfg_addr_lo = bp_cfg_reg_cce_mode_gp; + cfg_data_lo = cfg_data_width_p'(e_cce_mode_normal); + end + SEND_ICACHE_NORMAL: begin + state_n = SEND_DCACHE_NORMAL; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_icache_mode_gp); + cfg_data_lo = cfg_data_width_p'(e_dcache_lce_mode_normal); // TODO: tapeout hack, change to icache + end + SEND_DCACHE_NORMAL: begin + state_n = SEND_PC_LO; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_dcache_mode_gp); + cfg_data_lo = cfg_data_width_p'(e_dcache_lce_mode_normal); + end + SEND_PC_LO: begin + state_n = SEND_PC_HI; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_start_pc_lo_gp); + cfg_data_lo = bp_pc_entry_point_gp[0+:cfg_data_width_p]; + end + SEND_PC_HI: begin + state_n = BP_FREEZE_CLR; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_start_pc_hi_gp); + cfg_data_lo = cfg_data_width_p'(bp_pc_entry_point_gp[vaddr_width_p-1:cfg_data_width_p]); + end + BP_FREEZE_CLR: begin + state_n = DONE; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_freeze_gp); + cfg_data_lo = cfg_data_width_p'(0);; + end + DONE: begin + state_n = DONE; + end + default: begin + state_n = RESET; + end + endcase + end + +endmodule diff --git a/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh new file mode 100644 index 0000000000..9500673b83 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh @@ -0,0 +1,55 @@ +/* + * bp_common_pkg.vh + * + * Contains the interface structures used for communicating between FE, BE, ME in BlackParrot. + * Additionally contains global parameters used to configure the system. In the future, when + * multiple configurations are supported, these global parameters will belong to groups + * e.g. SV39, VM-disabled, ... + * + */ + +package bp_common_pkg; + + `include "bsg_defines.v" + `include "bp_common_defines.vh" + `include "bp_common_fe_be_if.vh" + `include "bp_common_me_if.vh" + + /* + * RV64 specifies a 64b effective address and 32b instruction. + * BlackParrot supports SV39 virtual memory, which specifies 39b virtual / 56b physical address. + * Effective addresses must have bits 39-63 match bit 38 + * or a page fault exception will occur during translation. + * Currently, we only support a very limited number of parameter configurations. + * Thought: We could have a `define surrounding core instantiations of each parameter and then + * when they import this package, `declare the if structs. No more casting! + */ + + localparam bp_eaddr_width_gp = 64; + localparam bp_instr_width_gp = 32; + + parameter bp_sv39_page_table_depth_gp = 3; + parameter bp_sv39_pte_width_gp = 64; + parameter bp_sv39_vaddr_width_gp = 39; + parameter bp_sv39_paddr_width_gp = 56; + parameter bp_sv39_ppn_width_gp = 44; + parameter bp_page_size_in_bytes_gp = 4096; + parameter bp_page_offset_width_gp = `BSG_SAFE_CLOG2(bp_page_size_in_bytes_gp); + + parameter bp_data_resp_num_flit_gp = 4; + parameter bp_data_cmd_num_flit_gp = 4; + + localparam dram_base_addr_gp = 32'h5000_0000; + + localparam cfg_link_dev_base_addr_gp = 32'h01??_????; + localparam clint_dev_base_addr_gp = 32'h02??_????; + localparam host_dev_base_addr_gp = 32'h03??_????; + localparam plic_dev_base_addr_gp = 32'h0c??_????; + + localparam mipi_reg_base_addr_gp = 32'h0200_0???; + localparam mtimecmp_reg_base_addr_gp = 32'h0200_4???; + localparam mtime_reg_addr_gp = 32'h0200_bff8; + localparam plic_reg_base_addr_gp = 32'h0c00_0???; + +endpackage : bp_common_pkg + diff --git a/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v new file mode 100644 index 0000000000..e64ce690a4 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v @@ -0,0 +1,190 @@ + +module bp_nonsynth_host + import bp_common_pkg::*; + import bp_common_aviary_pkg::*; + import bp_be_pkg::*; + import bp_common_rv64_pkg::*; + import bp_cce_pkg::*; + import bsg_noc_pkg::*; + import bp_cfg_link_pkg::*; + #(parameter bp_cfg_e cfg_p = e_bp_inv_cfg + `declare_bp_proc_params(cfg_p) + `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p) + ) + (input clk_i + , input reset_i + + , input [cce_mem_msg_width_lp-1:0] mem_cmd_i + , input mem_cmd_v_i + , output logic mem_cmd_yumi_o + + , output logic [cce_mem_msg_width_lp-1:0] mem_resp_o + , output logic mem_resp_v_o + , input mem_resp_ready_i + + , output [num_core_p-1:0] program_finish_o + ,(* mark_debug = "true" *) output logic all_finished_debug_o //SC_add + , (* mark_debug = "true" *) output logic core_passed_debug + , (* mark_debug = "true" *) output logic core_failed_debug + ); + +`declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p); + +// HOST I/O mappings +//localparam host_dev_base_addr_gp = 32'h03??_????; + +// Host I/O mappings (arbitrarily decided for now) +// Overall host controls 32'h0300_0000-32'h03FF_FFFF + +localparam hprint_base_addr_gp = paddr_width_p'(32'h0300_0???); +localparam cprint_base_addr_gp = paddr_width_p'(64'h0300_1???); +localparam finish_base_addr_gp = paddr_width_p'(64'h0300_2???); + +bp_cce_mem_msg_s mem_cmd_cast_i; + +assign mem_cmd_cast_i = mem_cmd_i; + +localparam lg_num_core_lp = `BSG_SAFE_CLOG2(num_core_p); + +logic hprint_data_cmd_v; +logic cprint_data_cmd_v; +logic finish_data_cmd_v; + +always_comb + begin + hprint_data_cmd_v = 1'b0; + cprint_data_cmd_v = 1'b0; + finish_data_cmd_v = 1'b0; + + unique + casez (mem_cmd_cast_i.addr) + hprint_base_addr_gp: hprint_data_cmd_v = mem_cmd_v_i; + cprint_base_addr_gp: cprint_data_cmd_v = mem_cmd_v_i; + finish_base_addr_gp: finish_data_cmd_v = mem_cmd_v_i; + default: begin end + endcase + end + +logic [num_core_p-1:0] hprint_w_v_li; +logic [num_core_p-1:0] cprint_w_v_li; +logic [num_core_p-1:0] finish_w_v_li; + +// Memory-mapped I/O is 64 bit aligned +localparam byte_offset_width_lp = 3; +wire [lg_num_core_lp-1:0] mem_cmd_core_enc = + mem_cmd_cast_i.addr[byte_offset_width_lp+:lg_num_core_lp]; + +bsg_decode_with_v + #(.num_out_p(num_core_p)) + hprint_data_cmd_decoder + (.v_i(hprint_data_cmd_v) + ,.i(mem_cmd_core_enc) + + ,.o(hprint_w_v_li) + ); + +bsg_decode_with_v + #(.num_out_p(num_core_p)) + cprint_data_cmd_decoder + (.v_i(cprint_data_cmd_v) + ,.i(mem_cmd_core_enc) + + ,.o(cprint_w_v_li) + ); + +bsg_decode_with_v + #(.num_out_p(num_core_p)) + finish_data_cmd_decoder + (.v_i(finish_data_cmd_v) + ,.i(mem_cmd_core_enc) + + ,.o(finish_w_v_li) + ); + +logic [num_core_p-1:0] finish_r; +bsg_dff_reset + #(.width_p(num_core_p)) + finish_accumulator + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.data_i(finish_r | finish_w_v_li) + ,.data_o(finish_r) + ); + +logic all_finished_r; +bsg_dff_reset + #(.width_p(1)) + all_finished_reg + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.data_i(&finish_r) + ,.data_o(all_finished_r) + ); + +assign program_finish_o = finish_r; + +always_ff @(negedge clk_i) + begin + for (integer i = 0; i < num_core_p; i++) + begin + if (hprint_w_v_li[i] & mem_cmd_yumi_o) + $display("[CORE%0x PRT] %x", i, mem_cmd_cast_i.data[0+:8]); + if (cprint_w_v_li[i] & mem_cmd_yumi_o) + $display("[CORE%0x PRT] %c", i, mem_cmd_cast_i.data[0+:8]); + if (finish_w_v_li[i] & mem_cmd_yumi_o & ~mem_cmd_cast_i.data[0]) + begin + $display("[CORE%0x FSH] PASS", i); + core_passed_debug <= 1; + end + if (finish_w_v_li[i] & mem_cmd_yumi_o & mem_cmd_cast_i.data[0]) + begin + $display("[CORE%0x FSH] FAIL", i); + core_failed_debug <=1; + end + end + + if (all_finished_r) + begin + $display("All cores finished! Terminating..."); + $finish(); + all_finished_debug_o <= 1; + end + if (reset_i) + begin + all_finished_debug_o <= 0; + core_passed_debug <= 0; + core_failed_debug <= 0; + end + end +bp_cce_mem_msg_s mem_resp_lo; +logic mem_resp_v_lo, mem_resp_ready_lo; +assign mem_cmd_yumi_o = mem_cmd_v_i & mem_resp_ready_lo; +bsg_one_fifo + #(.width_p(cce_mem_msg_width_lp)) + mem_resp_buffer + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.data_i(mem_resp_lo) + ,.v_i(mem_cmd_yumi_o) + ,.ready_o(mem_resp_ready_lo) + + ,.data_o(mem_resp_o) + ,.v_o(mem_resp_v_lo) + ,.yumi_i(mem_resp_ready_i & mem_resp_v_lo) + ); +assign mem_resp_v_o = mem_resp_v_lo & mem_resp_ready_i; + +assign mem_resp_lo = + '{msg_type : mem_cmd_cast_i.msg_type + ,addr : mem_cmd_cast_i.addr + ,payload : mem_cmd_cast_i.payload + ,size : mem_cmd_cast_i.size + ,data : '0 + }; + + +endmodule : bp_nonsynth_host + diff --git a/litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem b/litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem new file mode 100644 index 0000000000..5815a73238 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem @@ -0,0 +1,96 @@ +010001000001111100000000000000000000000001000000 +001000000001111100000000000010110000000000000000 +000001000000000011111000000000000000100000000000 +010001000011111100000000000000000000000000000010 +001000000011111100000000000000010000000000000000 +000001000010000111111000000000000000100000000000 +010001000101111100000000000000000000000000001000 +001000000101111100000000000001000000000000000000 +000001000100001011111000000000000000100000000000 +101001000000010010100110100000000000000000000000 +001111000000000000000000000001110000000000000000 +010001000001111100000000000000000000000000000000 +010001000011111100000000000000000000000000000010 +001101000010000000000000000110000000000000000000 +010001000101111100000000000000000000000000000000 +010001000111111100000000000000000000000001000000 +010001001001111100000000000000000000000000000000 +001101000110001000000000000101100000000000000000 +111001000001000001001100100100000000000000000000 +000000000100001011111000000000000000100000000000 +000000001000010011111000000000100000000000000000 +001111000000000000000000000100010000000000000000 +000000000000000011111000000000000000100000000000 +001111000000000000000000000011010000000000000000 +010001000001111100000000000000000000000000000000 +010001000011111100000000000000000000000000000010 +010001000111111100000000000000000000000000000000 +001101000010000000000000001000010000000000000000 +111001000000000010101100100100000000000000000000 +111010011001000000000000000000000000000000000000 +001001000110010000000000010111110000000000000000 +000000000000000011111000000000000000100000000000 +001111000000000000000000000110110000000000000000 +111000010000000000000000000000000000000000000000 +110001000000000000000000000000000000000000000000 +111011000000000000000000000000000000000000000000 +001010000011111100000000010110010000000000000001 +100000100000000000000000000000000000000000000000 +001010001001111100000000001000010000000000000001 +111010000000000000000000000000000000000000000000 +100001100010000000100000000000000000000000000000 +110000000000000000000000000000000000000000000000 +001010000001111100000000001100010000000000000001 +001010000101111100000000001011110000000000000001 +001010001101111100000000001011110000000000000001 +010100000001111100000000000000000000000000000010 +001111000000000000000000001100100000000000000000 +010100000001111100000000000000000000000000000001 +001111000000000000000000001100100000000000000000 +010100000001111100000000000000000000000000000110 +001010011011111100000000010000110000000000000000 +010001000001111100000000000000000000000000000000 +010001000011111100000000000000000000000000000010 +010001000101111100000000000000000000000000000000 +010001000111111100000000000000000000000000000001 +001101000010000000000000001111110000000000000000 +001011000001111100000000001111010000000000000000 +001011010000000000000000001111010000000000000000 +000000000100001011111000000000000000100000000000 +111001000110000010001010100100000000000000000000 +101010100000001010100100000000000000000000000000 +000000000000000011111000000000000000100000000000 +001111000000000000000000001101110000000000000000 +001000000101111100000000010000110000000000000000 +111010011001000000000000000000000000000000000000 +000001000100001011111000000000000000100000000000 +001111000000000000000000001111110000000000000000 +001010011001111100000000010001110000000000000000 +101010100010001000100000000000000000000000000000 +111001000101100010001000100100000000000000000000 +001111000000000000000000001000010000000000000000 +101001100010001001100010000000000000000000000000 +001010010111111100000000010011110000000000000000 +111001000011100010011011100100000000000000000000 +111011011000000000000000000000000000000000000000 +001010011101111100000000010011010000000000000001 +111001010100101010101100100000000000000000000000 +111010011000000000000000000000000000000000000000 +011000010110000000000000000000000000000000000000 +001010010101111100000000010101110000000000000000 +111001000010100110001001100100000000000000000000 +111001000011100110001001100100000000000000000000 +111011011000000000000000000000000000000000000000 +001010011101111100000000010101010000000000000001 +111001010100101010101100100100000000000000000000 +111010011000000000000000000000000000000000000000 +001111000000000000000000001000010000000000000000 +111001010000101010101100100100000000000000000000 +001111000000000000000000001000010000000000000000 +111010000000000000000000000000000000000000000000 +001010000001111100000000010111010000000000000001 +111001010000101010101100100100000000000000000000 +001111000000000000000000001000010000000000000000 +111001010000101010101100100100000000000000000000 +001111000000000000000000001000010000000000000000 +110111000000000000000000000000000000000000000000 diff --git a/litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c b/litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c new file mode 100644 index 0000000000..a57c6e0e6e --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c @@ -0,0 +1,358 @@ +/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivmoddi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef __blackparrot__ +#include "int_lib.h" + +/* Effects: if rem != 0, *rem = a % b + * Returns: a / b + */ + +/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ + +COMPILER_RT_ABI du_int +__udivmoddi4(du_int a, du_int b, du_int* rem) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + udwords n; + n.all = a; + udwords d; + d.all = b; + udwords q; + udwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) + { + if (d.s.high == 0) + { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 0 + */ + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + /* d.s.high != 0 */ + if (n.s.low == 0) + { + /* K 0 + * --- + * K 0 + */ + if (rem) + { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctz(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 2 or sr large */ + if (sr > n_uword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else /* d.s.low != 0 */ + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctz(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); + /* 2 <= sr <= n_udword_bits - 1 + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_uword_bits) + { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 + { + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 + { + q.s.low = n.s.low << (n_udword_bits - sr); + q.s.high = (n.s.high << (n_udword_bits - sr)) | + (n.s.low >> (sr - n_uword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_uword_bits); + } + } + else + { + /* K X + * --- + * K K + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + if (sr == n_uword_bits) + { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else + { + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + } + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_udword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} +#else + +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* This is extracted from gcc's libgcc/libgcc2.c with these typedefs added: */ +typedef short Wtype; +typedef int DWtype; +typedef unsigned int UWtype; +typedef unsigned long long UDWtype; +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +struct DWstruct {Wtype high, low;}; +#else +struct DWstruct {Wtype low, high;}; +#endif +typedef union { + struct DWstruct s; + DWtype ll; +} DWunion; + +UDWtype +__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) +{ + UDWtype q = 0, r = n, y = d; + UWtype lz1, lz2, i, k; + + /* Implements align divisor shift dividend method. This algorithm + aligns the divisor under the dividend and then perform number of + test-subtract iterations which shift the dividend left. Number of + iterations is k + 1 where k is the number of bit positions the + divisor must be shifted left to align it under the dividend. + quotient bits can be saved in the rightmost positions of the dividend + as it shifts left on each test-subtract iteration. */ + + if (y <= r) + { + lz1 = __builtin_clzll (d); + lz2 = __builtin_clzll (n); + + k = lz1 - lz2; + y = (y << k); + + /* Dividend can exceed 2 ^ (width − 1) − 1 but still be less than the + aligned divisor. Normal iteration can drops the high order bit + of the dividend. Therefore, first test-subtract iteration is a + special case, saving its quotient bit in a separate location and + not shifting the dividend. */ + if (r >= y) + { + r = r - y; + q = (1ULL << k); + } + + if (k > 0) + { + y = y >> 1; + + /* k additional iterations where k regular test subtract shift + dividend iterations are done. */ + i = k; + do + { + if (r >= y) + r = ((r - y) << 1) + 1; + else + r = (r << 1); + i = i - 1; + } while (i != 0); + + /* First quotient bit is combined with the quotient bits resulting + from the k regular iterations. */ + q = q + r; + r = r >> k; + q = q - (r << k); + } + } + + if (rp) + *rp = r; + return q; +} + +DWtype +__moddi3 (DWtype u, DWtype v) +{ + Wtype c = 0; + DWunion uu = {.ll = u}; + DWunion vv = {.ll = v}; + DWtype w; + + if (uu.s.high < 0) + c = ~c, + uu.ll = -uu.ll; + if (vv.s.high < 0) + vv.ll = -vv.ll; + + (void) __udivmoddi4 (uu.ll, vv.ll, (UDWtype*)&w); + if (c) + w = -w; + + return w; +} + +#endif diff --git a/litex/soc/cores/cpu/blackparrot/core.py b/litex/soc/cores/cpu/blackparrot/core.py new file mode 100644 index 0000000000..884b0791d3 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/core.py @@ -0,0 +1,174 @@ +# litex/soc/cores/cpu/blackparrot/core.py +# BlackParrot Chip core support for the LiteX SoC. +# +# Authors: Sadullah Canakci & Cansu Demirkiran <{scanakci,cansu}@bu.edu> +# Copyright (c) 2019, Boston University +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following +# disclaimer in the documentation and/or other materials provided +# with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import os + +from migen import * + +from litex.soc.interconnect import axi +from litex.soc.interconnect import wishbone +from litex.soc.cores.cpu import CPU + +CPU_VARIANTS = { + "standard": "freechips.rocketchip.system.LitexConfig", +# "linux": "freechips.rocketchip.system.LitexLinuxConfig", +# "full": "freechips.rocketchip.system.LitexFullConfig", +} + +GCC_FLAGS = { + "standard": "-march=rv64ia -mabi=lp64 -O0 ", +# "linux": "-march=rv64imac -mabi=lp64 ", +# "full": "-march=rv64imafdc -mabi=lp64 ", +} + +class BlackParrotRV64(Module): + name = "blackparrot" + data_width = 64 + endianness = "little" + gcc_triple = ("riscv64-unknown-elf") + linker_output_format = "elf64-littleriscv" + # io_regions = {0x10000000: 0x70000000} # origin, length + io_regions = {0x30000000: 0x20000000} # origin, length + + @property + def mem_map(self): + return { + "ethmac" : 0x30000000, + "csr" : 0x40000000, + "rom" : 0x50000000, + "sram" : 0x51000000, + "main_ram" : 0x80000000, + } + + @property + def gcc_flags(self): + flags = "-mno-save-restore " + flags += GCC_FLAGS[self.variant] + flags += "-D__blackparrot__ " + return flags + + def __init__(self, platform, variant="standard"): + assert variant in CPU_VARIANTS, "Unsupported variant %s" % variant + print("SC: Check how to get cpu_reset_addr properly!!!!!!!!") + #assert cpu_reset_addr == 0x10000000, "cpu_reset_addr hardcoded in Chisel elaboration!" + + self.platform = platform + self.variant = variant + self.reset = Signal() + self.interrupt = Signal(4)#TODO: how interrupts work? +# print(self.interrupt) +# old self.wbone = wbn = wishbone.Interface(data_width=64, adr_width=40) + self.wbone = wbn = wishbone.Interface(data_width=64, adr_width=37) + + self.interrupts = {}#TODO: Idk why this is necessary. Without this, soc_core.py raises error with no object attirubute "interrupts" + + self.buses = [wbn] + # # # + # connect BP adaptor to Wishbone + self.cpu_params = dict( + # clock, reset + i_clk_i = ClockSignal(), + i_reset_i = ResetSignal() | self.reset, + # irq + i_interrupts = self.interrupt, + i_wbm_dat_i = wbn.dat_r, + o_wbm_dat_o = wbn.dat_w, + i_wbm_ack_i = wbn.ack, + # i_wbm_err_i = wbn.err, + # i_wbm_rty_i = wbn.try, + o_wbm_adr_o = wbn.adr, + o_wbm_stb_o = wbn.stb, + o_wbm_cyc_o = wbn.cyc, + o_wbm_sel_o = wbn.sel, + o_wbm_we_o = wbn.we, + o_wbm_cti_o = wbn.cti, + o_wbm_bte_o = wbn.bte, + ) + +# self.submodules += mem_a2w, mmio_a2w #need to change most probably! + # add verilog sources + self.add_sources(platform, variant) + + def set_reset_address(self, reset_address):#note sure if reset address needs to be changed for BB + assert not hasattr(self, "reset_address") + self.reset_address = reset_address + print(hex(reset_address)) + #assert reset_address == 0x10000000, "cpu_reset_addr hardcoded in during elaboration!" + + + @staticmethod + def add_sources(platform, variant="standard"): + #Read from a file and use add_source function + # vdir = os.path.join( + #os.path.abspath(os.path.dirname(__file__)),"pre-alpha-release", "verilog",variant) + # incdir = os.path.join( + #os.path.abspath(os.path.dirname(__file__)),"pre-alpha-release", "verilog",variant) + print("Adding the sources") + #vdir = os.path.join( + #os.path.abspath(os.path.dirname(__file__)),"verilog") + #platform.add_source_dir(vdir) + filename= os.path.join(os.path.abspath(os.path.dirname(__file__)),"flist_litex.verilator") + print(filename) +# platform.add_source('/home/scanakci/Research_sado/litex/litex/litex/soc/cores/cpu/blackparrot/pre-alpha-release/bp_fpga/ExampleBlackParrotSystem.v') + with open(filename) as openfileobject: + for line in openfileobject: + temp = line + # print(line) + if (temp[0] == '/' and temp[1] == '/'): + continue + elif ("+incdir+" in temp) : + s1 = line.find('$') + s2 = line.find('/') + dir_ = line[s1:s2] + a = os.popen('echo '+ str(dir_)) + dir_start = a.read() + vdir = dir_start[:-1] + line[s2:-1] + print("INCDIR" + vdir) + platform.add_verilog_include_path(vdir) #this line might be changed + elif (temp[0]=='$') : + s2 = line.find('/') + dir_ = line[0:s2] + a = os.popen('echo '+ str(dir_)) + dir_start = a.read() + vdir = dir_start[:-1]+ line[s2:-1] + print(vdir) + platform.add_source(vdir) #this line might be changed + elif (temp[0] == '/'): + assert("No support for absolute path for now") + + + + + def do_finalize(self): + assert hasattr(self, "reset_address") + self.specials += Instance("ExampleBlackParrotSystem", **self.cpu_params) + + diff --git a/litex/soc/cores/cpu/blackparrot/flist_litex.verilator b/litex/soc/cores/cpu/blackparrot/flist_litex.verilator new file mode 100644 index 0000000000..65e8e1c4ec --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/flist_litex.verilator @@ -0,0 +1,228 @@ +//// Includes +// bsg_ip_cores includes ++incdir+$BASEJUMP_STL_DIR/bsg_dataflow ++incdir+$BASEJUMP_STL_DIR/bsg_mem ++incdir+$BASEJUMP_STL_DIR/bsg_misc ++incdir+$BASEJUMP_STL_DIR/bsg_test ++incdir+$BASEJUMP_STL_DIR/bsg_noc +// common includes ++incdir+$BP_COMMON_DIR/src/include +// fe includes ++incdir+$BP_FE_DIR/src/include +// be includes ++incdir+$BP_BE_DIR/src/include ++incdir+$BP_BE_DIR/src/include/bp_be_dcache +// me includes ++incdir+$BP_ME_DIR/src/include/v +// top includes ++incdir+$BP_TOP_DIR/src/include +//// Packages +// bsg_ip_cores packages +$BASEJUMP_STL_DIR/bsg_noc/bsg_noc_pkg.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_pkg.v +// Interface packages +$BP_COMMON_DIR/src/include/bp_common_rv64_pkg.vh +$BP_COMMON_DIR/src/include/bp_common_pkg.vh +$BP_COMMON_DIR/src/include/bp_common_aviary_pkg.vh +// FE packages +$BP_FE_DIR/src/include/bp_fe_icache_pkg.vh +$BP_FE_DIR/src/include/bp_fe_pkg.vh +// BE packages +$BP_BE_DIR/src/include/bp_be_pkg.vh +$BP_BE_DIR/src/include/bp_be_dcache/bp_be_dcache_pkg.vh +// ME packages +$BP_ME_DIR/src/include/v/bp_cce_pkg.v +$BP_ME_DIR/src/include/v/bp_me_pkg.vh +// Top packages +$BP_TOP_DIR/src/include/bp_cfg_link_pkg.vh +//// bsg_ip_cores files +$BASEJUMP_STL_DIR/bsg_async/bsg_async_fifo.v +$BASEJUMP_STL_DIR/bsg_async/bsg_launch_sync_sync.v +$BASEJUMP_STL_DIR/bsg_async/bsg_async_ptr_gray.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_channel_tunnel.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_channel_tunnel_in.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_channel_tunnel_out.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_1_to_n_tagged_fifo.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_1_to_n_tagged.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_1r1w_large.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_1rw_large.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_serial_in_parallel_out.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_one_fifo.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_round_robin_2_to_2.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_1r1w_pseudo_large.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_1r1w_small.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_tracker.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_flow_counter.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_parallel_in_serial_out_dynamic.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_round_robin_n_to_1.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_serial_in_parallel_out_dynamic.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_shift_reg.v +$BASEJUMP_STL_DIR/bsg_dataflow/bsg_two_fifo.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_cam_1r1w.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_sync.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_sync_synth.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_synth.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync.v +// $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_bit.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_bit_synth.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_byte.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_byte_synth.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_synth.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_2r1w_sync.v +$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_2r1w_sync_synth.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_adder_ripple_carry.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_arb_fixed.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_array_concentrate_static.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_circular_ptr.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_concentrate_static.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_counter_clear_up.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_counter_set_down.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_counter_up_down.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_counter_up_down_variable.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_crossbar_o_by_i.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_cycle_counter.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_decode.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_decode_with_v.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_dff.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_en_bypass.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_chain.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_en.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_reset.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_reset_en.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_encode_one_hot.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_lfsr.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_lru_pseudo_tree_decode.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_lru_pseudo_tree_encode.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_mux.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_mux_butterfly.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_mux_one_hot.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_mux_segmented.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_priority_encode.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_priority_encode_one_hot_out.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_round_robin_arb.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_scan.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_swap.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_thermometer_count.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_transpose.v +$BASEJUMP_STL_DIR/bsg_misc/bsg_unconcentrate_static.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_mesh_router.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_mesh_router_buffered.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_noc_repeater_node.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_concentrator.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_concentrator_in.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_concentrator_out.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_adapter.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_adapter_in.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_adapter_out.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_decoder_dor.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_input_control.v +$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_output_control.v +// Common files +$BP_COMMON_DIR/src/v/bsg_fifo_1r1w_fence.v +$BP_COMMON_DIR/src/v/bsg_fifo_1r1w_rolly.v +$BP_COMMON_DIR/src/v/bp_tlb.v +$BP_COMMON_DIR/src/v/bp_tlb_replacement.v +// BE files +$BP_BE_DIR/src/v/bp_be_top.v +// Calculator +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_bypass.v +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_calculator_top.v +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_instr_decoder.v +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_int_alu.v +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_pipe_fp.v +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_pipe_int.v +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_pipe_mem.v +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_pipe_mul.v +$BP_BE_DIR/src/v/bp_be_calculator/bp_be_regfile.v +// Checker +$BP_BE_DIR/src/v/bp_be_checker/bp_be_checker_top.v +$BP_BE_DIR/src/v/bp_be_checker/bp_be_detector.v +$BP_BE_DIR/src/v/bp_be_checker/bp_be_director.v +$BP_BE_DIR/src/v/bp_be_checker/bp_be_scheduler.v +// MMU +$BP_BE_DIR/src/v/bp_be_mem/bp_be_ptw.v +$BP_BE_DIR/src/v/bp_be_mem/bp_be_csr.v +$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache.v +$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_lce_cmd.v +$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_lce.v +$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_lce_req.v +$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_wbuf.v +$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_wbuf_queue.v +$BP_BE_DIR/src/v/bp_be_mem/bp_be_mem_top.v +//// FE files +$BP_FE_DIR/src/v/bp_fe_bht.v +$BP_FE_DIR/src/v/bp_fe_btb.v +$BP_FE_DIR/src/v/bp_fe_lce_cmd.v +$BP_FE_DIR/src/v/bp_fe_icache.v +$BP_FE_DIR/src/v/bp_fe_instr_scan.v +$BP_FE_DIR/src/v/bp_fe_lce.v +$BP_FE_DIR/src/v/bp_fe_lce_req.v +$BP_FE_DIR/src/v/bp_fe_mem.v +$BP_FE_DIR/src/v/bp_fe_pc_gen.v +$BP_FE_DIR/src/v/bp_fe_top.v +//// ME files +// CCE +$BP_ME_DIR/src/v/cce/bp_cce.v +$BP_ME_DIR/src/v/cce/bp_cce_alu.v +$BP_ME_DIR/src/v/cce/bp_cce_dir.v +$BP_ME_DIR/src/v/cce/bp_cce_dir_tag_checker.v +$BP_ME_DIR/src/v/cce/bp_cce_dir_lru_extract.v +$BP_ME_DIR/src/v/cce/bp_cce_gad.v +$BP_ME_DIR/src/v/cce/bp_cce_inst_decode.v +$BP_ME_DIR/src/v/cce/bp_cce_msg.v +$BP_ME_DIR/src/v/cce/bp_cce_msg_cached.v +$BP_ME_DIR/src/v/cce/bp_cce_msg_uncached.v +$BP_ME_DIR/src/v/cce/bp_cce_pc.v +$BP_ME_DIR/src/v/cce/bp_cce_pending.v +$BP_ME_DIR/src/v/cce/bp_cce_reg.v +$BP_ME_DIR/src/v/cce/bp_cce_top.v +// Network +$BP_ME_DIR/src/v/wormhole/bp_me_cce_id_to_cord.v +$BP_ME_DIR/src/v/wormhole/bp_me_cce_to_wormhole_link_client.v +$BP_ME_DIR/src/v/wormhole/bp_me_cce_to_wormhole_link_master.v +$BP_ME_DIR/src/v/wormhole/bp_me_lce_id_to_cord.v +$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_lce_cmd.v +$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_lce_req.v +$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_lce_resp.v +$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_mem_cmd.v +$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_mem_resp.v +//// TOP +$BP_TOP_DIR/src/v/bp_chip.v +$BP_TOP_DIR/src/v/bp_core.v +$BP_TOP_DIR/src/v/bp_core_complex.v +$BP_TOP_DIR/src/v/bp_mem_complex.v +$BP_TOP_DIR/src/v/bp_mmio_enclave.v +$BP_TOP_DIR/src/v/bp_mmio_node.v +$BP_TOP_DIR/src/v/bp_tile.v +$BP_TOP_DIR/src/v/bp_tile_node.v +//// Common +$BP_COMMON_DIR/src/v/bp_addr_map.v + +// bsg_ip_cores files +$BASEJUMP_STL_DIR/bsg_fsb/bsg_fsb_node_trace_replay.v +// be files +$BP_BE_DIR/test/common/bp_be_nonsynth_tracer.v +// $BP_BE_DIR/test/common/bp_be_nonsynth_perf.v +// me files +// $BP_ME_DIR/test/common/bp_mem.v +// $BP_ME_DIR/test/common/bp_mem_delay_model.v +// $BP_ME_DIR/test/common/bp_mem_transducer.v +// $BP_ME_DIR/test/common/bp_mem_storage_sync.v +// $BP_ME_DIR/test/common/dramsim2_wrapper.cpp +$BP_ME_DIR/test/common/bp_cce_mmio_cfg_loader.v +// $BP_ME_DIR/test/common/bp_mem_nonsynth_tracer.v +// $BP_ME_DIR/test/common/bp_cce_nonsynth_tracer.v +// $BP_ME_DIR/test/common/bp_mem_utils.cpp +// top files +$BP_TOP_DIR/test/common/bp_nonsynth_host.v +// $BP_TOP_DIR/test/common/bp_nonsynth_if_verif.v +$BP_TOP_DIR/test/common/bp_nonsynth_commit_tracer.v +// /home/scanakci/Research_sado/litex/litex/litex/soc/cores/cpu/blackparrot/pre-alpha-release/bp_top/syn/results/verilator/bp_top_trace_demo.e_bp_single_core_cfg.build/wrapper.v +// /home/scanakci/Research_sado/litex/litex/litex/soc/cores/cpu/blackparrot/pre-alpha-release/bp_top/syn/results/verilator/bp_top_trace_demo.e_bp_single_core_cfg.build/test_bp.cpp +$BP_FPGA_DIR/bp2wb_convertor.v +$BP_FPGA_DIR/ExampleBlackParrotSystem.v +$BP_FPGA_DIR/bsg_mem_1rw_sync_mask_write_bit.v +// Recent +$BASEJUMP_STL_DIR/bsg_noc/bsg_mesh_stitch.v diff --git a/litex/soc/cores/cpu/blackparrot/pre-alpha-release b/litex/soc/cores/cpu/blackparrot/pre-alpha-release new file mode 160000 index 0000000000..8aa6b62593 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/pre-alpha-release @@ -0,0 +1 @@ +Subproject commit 8aa6b6259308105872e19675c1cd5aee22283913 diff --git a/litex/soc/cores/cpu/blackparrot/setEnvironment.sh b/litex/soc/cores/cpu/blackparrot/setEnvironment.sh new file mode 100755 index 0000000000..d818ec52b9 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/setEnvironment.sh @@ -0,0 +1,97 @@ +#!/bin/bash +## Set common environment variables +export LITEX=$(git rev-parse --show-toplevel) +export BP=$PWD +cp bp_software/cce_ucode.mem /tmp/. +cd pre-alpha-release +TOP=$(git rev-parse --show-toplevel) +export BP_COMMON_DIR=$TOP/bp_common +export BP_FE_DIR=$TOP/bp_fe +export BP_BE_DIR=$TOP/bp_be +export BP_ME_DIR=$TOP/bp_me +export BP_TOP_DIR=$TOP/bp_top +export BP_EXTERNAL_DIR=$TOP/external +export BASEJUMP_STL_DIR=$BP_EXTERNAL_DIR/basejump_stl +export BP_FPGA_DIR=$TOP/bp_fpga +## Setup CAD tools + +# If the machine you are working on is bsg_cadenv compliant, then you do not +# need to setup the cad tools, simply put bsg_cadenv in the same root dir. +#BSG_CADENV_DIR=$(TOP)/external/bsg_cadenv +#-include $(BSG_CADENV_DIR)/cadenv.mk + +## Sepcify license path if needed +#LM_LICENSE_FILE ?= + +## Override tool paths if needed +#GCC ?= gcc +#VCS_HOME ?= +#VCS ?= vcs +#URG ?= urg +#VERILATOR ?= verilator +#DC_SHELL ?= dc_shell +#DVE ?= dve +#PYTHON ?= python + +## Needed for verilator g++ compilations +export SYSTEMC_INCLUDE=$BP_EXTERNAL_DIR/include +export SYSTEMC_LIBDIR=$BP_EXTERNAL_DIR/lib-linux64 + +## Add external tools and libraries to environment +export LD_LIBRARY_PATH=$SYSTEMC_LIBDIR:$LD_LIBRARY_PATH +#export PATH=$(BP_EXTERNAL_DIR)/bin:$(PATH) +#export SYN_PATH=$(BP_TOP_DIR)/syn +#export TB_PATH=$(BP_TOP_DIR)/test/tb +#export MEM_PATH=$(BP_COMMON_DIR)/test/mem + +#export LOG_PATH=$(BP_TOP_DIR)/syn/logs +#export RESULTS_PATH=$(BP_TOP_DIR)/syn/results +#export REPORT_PATH=$(BP_TOP_DIR)/syn/reports + +TB="bp_top_trace_demo" +CFG="e_bp_single_core_cfg" +START_PC=0x80000000 +TOLERANCE=2 + +# Select CCE ROM based on CFG and Coherence Protocol +# TODO: is there a more scalable way to do this? +if [ $CFG = "e_bp_half_core_cfg" ] +then + NUM_LCE_P=1 + N_WG=64 +elif [ $CFG = "e_bp_single_core_cfg" ] +then + NUM_LCE_P=2 + N_WG=64 + #echo "Single Core config" +#elif ($CFG -eq e_bp_dual_core_cfg) +# NUM_LCE_P=4 +# N_WG=32 +#elif ($CFG -eq e_bp_quad_core_cfg) +# NUM_LCE_P=8 +# N_WG=16 +#elif ($CFG -eq e_bp_oct_core_cfg) +# NUM_LCE_P=16 +# N_WG=8 +#elif ($(CFG), e_bp_sexta_core_cfg) +# NUM_LCE_P=32 +# N_WG=4 +#elif ($(CFG), e_bp_quad_core_2d_cfg) +# NUM_LCE_P=8 +# N_WG=16 +#elif ($(CFG), e_bp_oct_core_2d_cfg) +# NUM_LCE_P=16 +# N_WG=8 +fi + +COH_PROTO="mesi" +CCE_MEM_PATH=$BP_ME_DIR/src/asm/roms/$COH_PROTO +CCE_MEM=bp_cce_inst_rom_$COH_PROTO_lce$NUM_LCE_P_wg$N_WG_assoc8.mem +#DRAMSIM_CH_CFG=DDR2_micron_16M_8b_x8_sg3E.ini +#DRAMSIM_SYS_CFG=system.ini +#$include $BP_COMMON_DIR/syn/Makefile.verilator +#iinclude $(BP_COMMON_DIR)/syn/Makefile.common +#include $(BP_COMMON_DIR)/syn/Makefile.dc +#include $(BP_COMMON_DIR)/syn/Makefile.regress +#include $(BP_COMMON_DIR)/syn/Makefile.vcs +cd ../ diff --git a/litex/soc/cores/cpu/blackparrot/update_BP.sh b/litex/soc/cores/cpu/blackparrot/update_BP.sh new file mode 100755 index 0000000000..c6ddde977c --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/update_BP.sh @@ -0,0 +1,17 @@ +#!/bin/bash + + +##SOFTWARE CHANGES## + +#for a reason, provided udivmoddi4.c is not functionally correct when used with either BP or Rocket under IA extension. Another version of udivmoddi4.c is a workaround to run BIOS on these architectures. +cp bp_software/udivmoddi4.c $LITEX/litex/soc/software/compiler_rt/lib/builtins/. +cp bp_software/cce_ucode.mem /tmp/. + +##HARDWARE CHANGES## +#Need to change some files because of memory map differences and proper syntesis +cp bp_hardware/bp_common_pkg.vh $BP_COMMON_DIR/src/include/. +cp bp_hardware/bp_cce_mmio_cfg_loader.v $BP_ME_DIR/test/common/. +cp bp_hardware/bp_nonsynth_host.v $BP_TOP_DIR/test/common/. + +# Neccessary files for FPGA Implementations +cp -r bp_fpga $BP_TOP/DIR diff --git a/litex/soc/software/bios/Makefile b/litex/soc/software/bios/Makefile index a454a17ea8..7c34c9872c 100755 --- a/litex/soc/software/bios/Makefile +++ b/litex/soc/software/bios/Makefile @@ -1,6 +1,10 @@ include ../include/generated/variables.mak include $(SOC_DIRECTORY)/software/common.mak +ifeq ($(CPU),blackparrot) +BP_LIBS = -L$(BP_EXTERNAL_DIR)/lib/gcc/riscv64-unknown-elf/8.3.0 +BP_FLAGS = -lgcc +endif # Permit TFTP_SERVER_PORT override from shell environment / command line ifdef TFTP_SERVER_PORT CFLAGS += -DTFTP_SERVER_PORT=$(TFTP_SERVER_PORT) @@ -23,6 +27,7 @@ endif bios.elf: $(BIOS_DIRECTORY)/linker.ld $(OBJECTS) + %.elf: ../libbase/crt0-$(CPU)-ctr.o ../libnet/libnet.a ../libbase/libbase-nofloat.a ../libcompiler_rt/libcompiler_rt.a $(LD) $(LDFLAGS) -T $(BIOS_DIRECTORY)/linker.ld -N -o $@ \ ../libbase/crt0-$(CPU)-ctr.o \ @@ -30,7 +35,10 @@ bios.elf: $(BIOS_DIRECTORY)/linker.ld $(OBJECTS) -L../libnet \ -L../libbase \ -L../libcompiler_rt \ - -lnet -lbase-nofloat -lcompiler_rt + $(BP_LIBS) \ + -lnet -lbase-nofloat -lcompiler_rt \ + $(BP_FLAGS) + ifneq ($(OS),Windows_NT) chmod -x $@ endif diff --git a/litex/soc/software/bios/boot-helper-blackparrot.S b/litex/soc/software/bios/boot-helper-blackparrot.S new file mode 100644 index 0000000000..6dd74aaeb9 --- /dev/null +++ b/litex/soc/software/bios/boot-helper-blackparrot.S @@ -0,0 +1,4 @@ +.section .text, "ax", @progbits +.global boot_helper +boot_helper: + jr x13 diff --git a/litex/soc/software/bios/isr.c b/litex/soc/software/bios/isr.c index 93c231bb1d..971bd37b6c 100644 --- a/litex/soc/software/bios/isr.c +++ b/litex/soc/software/bios/isr.c @@ -8,7 +8,20 @@ #include #include -#ifdef __rocket__ + +#if defined(__blackparrot__) /*TODO: Update this function for BP*/ // + +void isr(void); +void isr(void) +{ + static int onetime = 0; + if ( onetime == 0){ + printf("ISR blackparrot\n"); + printf("TRAP!!\n"); + onetime++; + } +} +#elif defined(__rocket__) void plic_init(void); void plic_init(void) { diff --git a/litex/soc/software/bios/main.c b/litex/soc/software/bios/main.c index 621d377aca..9aa9ff912d 100644 --- a/litex/soc/software/bios/main.c +++ b/litex/soc/software/bios/main.c @@ -459,7 +459,6 @@ static void do_command(char *c) #endif else if(strcmp(token, "memtest") == 0) memtest(); #endif - else if(strcmp(token, "") != 0) printf("Command not found\n"); } @@ -589,6 +588,8 @@ int main(int i, char **c) printf("Minerva"); #elif __rocket__ printf("RocketRV64[imac]"); +#elif __blackparrot__ + printf("BlackParrotRV64[ia]"); #else printf("Unknown"); #endif @@ -603,9 +604,10 @@ int main(int i, char **c) #endif printf("\n"); - sdr_ok = 1; + sdr_ok = 1; + #if defined(CSR_ETHMAC_BASE) || defined(CSR_SDRAM_BASE) - printf("--========== \e[1mInitialization\e[0m ============--\n"); + printf("--========== \e[1mInitialization\e[0m ============--\n"); #ifdef CSR_ETHMAC_BASE eth_init(); #endif @@ -628,7 +630,7 @@ int main(int i, char **c) } printf("--============= \e[1mConsole\e[0m ================--\n"); - while(1) { + while(1) { putsnonl("\e[92;1mlitex\e[0m> "); readstr(buffer, 64); do_command(buffer); diff --git a/litex/soc/software/bios/sdram.c b/litex/soc/software/bios/sdram.c index f7d8458c8e..01f66e6b06 100644 --- a/litex/soc/software/bios/sdram.c +++ b/litex/soc/software/bios/sdram.c @@ -46,6 +46,8 @@ __attribute__((unused)) static void cdelay(int i) __asm__ volatile("nop"); #elif defined (__microwatt__) __asm__ volatile("nop"); +#elif defined (__blackparrot__) + __asm__ volatile("nop"); #else #error Unsupported architecture #endif diff --git a/litex/soc/software/include/base/irq.h b/litex/soc/software/include/base/irq.h index babc5424f7..7ff9b40338 100644 --- a/litex/soc/software/include/base/irq.h +++ b/litex/soc/software/include/base/irq.h @@ -40,6 +40,21 @@ extern void _irq_setmask(unsigned int); #define PLIC_CLAIM 0x0c200004L // Claim & completion register address #endif /* __rocket__ */ + +#ifdef __blackparrot__ +// The RocketChip uses a Platform-Level Interrupt Controller (PLIC) which +// is programmed and queried via a set of MMIO registers. +// TODO: How about Blackparrot? Should be probably included in linux version + +#define PLIC_BASE 0x0c000000L // Base address and per-pin priority array +#define PLIC_PENDING 0x0c001000L // Bit field matching currently pending pins +#define PLIC_ENABLED 0x0c002000L // Bit field corresponding to the current mask +#define PLIC_THRSHLD 0x0c200000L // Per-pin priority must be >= this to trigger +#define PLIC_CLAIM 0x0c200004L // Claim & completion register address +#endif /* __blackparrot__ */ + + + static inline unsigned int irq_getie(void) { #if defined (__lm32__) @@ -58,6 +73,8 @@ static inline unsigned int irq_getie(void) return (csrr(mstatus) & CSR_MSTATUS_MIE) != 0; #elif defined (__microwatt__) return 0; // FIXME +#elif defined (__blackparrot__) + return (csrr(mstatus) & CSR_MSTATUS_MIE) != 0;//TODO #else #error Unsupported architecture #endif @@ -85,6 +102,8 @@ static inline void irq_setie(unsigned int ie) if(ie) csrs(mstatus,CSR_MSTATUS_MIE); else csrc(mstatus,CSR_MSTATUS_MIE); #elif defined (__microwatt__) // FIXME +#elif defined (__blackparrot__) + if(ie) csrs(mstatus,CSR_MSTATUS_MIE); else csrc(mstatus,CSR_MSTATUS_MIE);//TODO:BP #else #error Unsupported architecture #endif @@ -114,6 +133,8 @@ static inline unsigned int irq_getmask(void) return *((unsigned int *)PLIC_ENABLED) >> 1; #elif defined (__microwatt__) return 0; // FIXME +#elif defined (__blackparrot__) + //TODO:BP #else #error Unsupported architecture #endif @@ -137,6 +158,8 @@ static inline void irq_setmask(unsigned int mask) *((unsigned int *)PLIC_ENABLED) = mask << 1; #elif defined (__microwatt__) // FIXME +#elif defined (__blackparrot__) + //TODO:BP #else #error Unsupported architecture #endif @@ -164,6 +187,8 @@ static inline unsigned int irq_pending(void) return *((unsigned int *)PLIC_PENDING) >> 1; #elif defined (__microwatt__) return 0; // FIXME +#elif defined (__blackparrot__) + return csr_readl(PLIC_PENDING) >> 1;//TODO:BP #else #error Unsupported architecture #endif diff --git a/litex/soc/software/include/base/system.h b/litex/soc/software/include/base/system.h index 9b41a737ec..18753548e1 100644 --- a/litex/soc/software/include/base/system.h +++ b/litex/soc/software/include/base/system.h @@ -26,8 +26,7 @@ static inline void mtspr(unsigned long add, unsigned long val) } #endif - -#if defined(__vexriscv__) || defined(__minerva__) || defined(__rocket__) +#if defined(__vexriscv__) || defined(__minerva__) || defined(__rocket__) || defined(__blackparrot__) #include #define csrr(reg) ({ unsigned long __tmp; \ asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ diff --git a/litex/soc/software/libbase/crt0-blackparrot.S b/litex/soc/software/libbase/crt0-blackparrot.S new file mode 100644 index 0000000000..9badaa48ab --- /dev/null +++ b/litex/soc/software/libbase/crt0-blackparrot.S @@ -0,0 +1,77 @@ +.global main +.global isr +.global _start + +_start: + j crt_init + nop + nop + nop + nop + nop + nop + nop + +trap_entry: + sd x1, - 1*8(sp) + sd x5, - 2*8(sp) + sd x6, - 3*8(sp) + sd x7, - 4*8(sp) + sd x10, - 5*8(sp) + sd x11, - 6*8(sp) + sd x12, - 7*8(sp) + sd x13, - 8*8(sp) + sd x14, - 9*8(sp) + sd x15, -10*8(sp) + sd x16, -11*8(sp) + sd x17, -12*8(sp) + sd x28, -13*8(sp) + sd x29, -14*8(sp) + sd x30, -15*8(sp) + sd x31, -16*8(sp) + addi sp,sp,-16*8 + call isr + ld x1 , 15*8(sp) + ld x5, 14*8(sp) + ld x6, 13*8(sp) + ld x7, 12*8(sp) + ld x10, 11*8(sp) + ld x11, 10*8(sp) + ld x12, 9*8(sp) + ld x13, 8*8(sp) + ld x14, 7*8(sp) + ld x15, 6*8(sp) + ld x16, 5*8(sp) + ld x17, 4*8(sp) + ld x28, 3*8(sp) + ld x29, 2*8(sp) + ld x30, 1*8(sp) + ld x31, 0*8(sp) + addi sp,sp,16*8 + mret + .text + + +crt_init: + la sp, _fstack + 8 + la a0, trap_entry + csrw mtvec, a0 + +bss_init: + la a0, _fbss + la a1, _ebss +bss_loop: + beq a0,a1,bss_done + sd zero,0(a0) + add a0,a0,8 + j bss_loop +bss_done: + +// call plic_init // initialize external interrupt controller +# li a0, 0x800 // external interrupt sources only (using LiteX timer); + // NOTE: must still enable mstatus.MIE! + csrw mie,a0 + + call main +inf_loop: + j inf_loop diff --git a/litex/soc/software/libbase/system.c b/litex/soc/software/libbase/system.c index 6e7bfafdfd..83ecd40866 100644 --- a/litex/soc/software/libbase/system.c +++ b/litex/soc/software/libbase/system.c @@ -58,6 +58,9 @@ void flush_cpu_icache(void) asm volatile("nop"); #elif defined (__microwatt__) /* FIXME: do something useful here! */ + asm volatile("nop"); +#elif defined (__blackparrot__) + /* TODO: BP do something useful here! */ asm volatile("nop"); #else #error Unsupported architecture @@ -107,6 +110,10 @@ void flush_cpu_dcache(void) #elif defined (__microwatt__) /* FIXME: do something useful here! */ asm volatile("nop"); +/*SC_add: What BB does here?*/ +#elif defined (__blackparrot__) + /* FIXME: do something useful here! */ + asm volatile("nop"); #else #error Unsupported architecture #endif