From 509076c09ab067831c38c1cf933fa56055467a6e Mon Sep 17 00:00:00 2001 From: sadullah Date: Wed, 15 Jan 2020 18:36:37 -0500 Subject: [PATCH] code cleaning --- litex/soc/cores/cpu/blackparrot/README.md | 5 +- .../bp_fpga/.bp2wb_convertor.v.swp | Bin 20480 -> 0 bytes .../bp_fpga/ExampleBlackParrotSystem.v | 6 +- .../cpu/blackparrot/bp_fpga/bp2wb_convertor.v | 2 +- .../blackparrot/bp_fpga/bp2wb_convertor_v2.v | 156 -------- .../bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v | 55 +++ .../cores/cpu/blackparrot/bp_fpga/mem.init | 357 ----------------- .../bp_hardware/bp_cce_mmio_cfg_loader.v | 231 +++++++++++ .../blackparrot/bp_hardware/bp_common_pkg.vh | 55 +++ .../bp_hardware/bp_nonsynth_host.v | 190 ++++++++++ .../cpu/blackparrot/bp_software/cce_ucode.mem | 96 +++++ .../cpu/blackparrot/bp_software/udivmoddi4.c | 358 ++++++++++++++++++ litex/soc/cores/cpu/blackparrot/core.py | 14 +- .../cpu/blackparrot/flist_litex.verilator | 23 +- .../cores/cpu/blackparrot/setEnvironment.sh | 8 +- litex/soc/cores/cpu/blackparrot/update_BP.sh | 17 + litex/soc/software/bios/Makefile | 10 +- litex/soc/software/bios/isr.c | 18 +- litex/soc/software/bios/main.c | 15 +- litex/soc/software/libbase/crt0-blackparrot.S | 4 +- 20 files changed, 1064 insertions(+), 556 deletions(-) delete mode 100644 litex/soc/cores/cpu/blackparrot/bp_fpga/.bp2wb_convertor.v.swp delete mode 100644 litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor_v2.v create mode 100644 litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v delete mode 100644 litex/soc/cores/cpu/blackparrot/bp_fpga/mem.init create mode 100644 litex/soc/cores/cpu/blackparrot/bp_hardware/bp_cce_mmio_cfg_loader.v create mode 100644 litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh create mode 100644 litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v create mode 100644 litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem create mode 100644 litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c create mode 100755 litex/soc/cores/cpu/blackparrot/update_BP.sh diff --git a/litex/soc/cores/cpu/blackparrot/README.md b/litex/soc/cores/cpu/blackparrot/README.md index e08e4b649c..d320d4b457 100644 --- a/litex/soc/cores/cpu/blackparrot/README.md +++ b/litex/soc/cores/cpu/blackparrot/README.md @@ -3,10 +3,9 @@ git submodule update --init --recursive (for blackparrot pre-alpha repo) cd pre_alpha_release follow getting_started to install blackparrot cd .. -source ./setEnvironment.sh #required before running build_dut.sh +source ./setEnvironment.sh #should be sourced each time you open a terminal or just add this line to bashrc Add $BP_TOP/external/bin to $PATH for verilator and riscv-gnu tools - - +./update_BP.sh #to modify some of the files in Blackparrot repo (one-time process) Currently, we could simulate the LITEX-BIOS on BP processor. [![asciicast](https://asciinema.org/a/286568.svg)](https://asciinema.org/a/286568) diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/.bp2wb_convertor.v.swp b/litex/soc/cores/cpu/blackparrot/bp_fpga/.bp2wb_convertor.v.swp deleted file mode 100644 index 6178e7bb44cc56acdc17c17d0e89525fcf571b63..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20480 zcmeHNTZ|k>6>V@H)qIEd$X&ZSzBgV+CAMhGo9_3 zZn}GRGQ`Gw1R+8IDJUQz2tpvCeDVWC2*d|L0ttzS1X7TQ5pDUx^?PSRoAU5O`cs^rVr<4HC*>;+Phx-(dPbJKB4{MeVP_pX5HMf z9DcRiHEO=kq#5-!5ehS~szzwqp6)u4c&z7t=vlhu1tQcfzoD00)7tXQAn+pH55$D& z`c-ox5Uwyop_hE4>{m?Or<*0i^6J|nh`b=Ty_?DEonqi7WZ)icW_oIpE57gjd+CAu z*Ka}&RFxG2iUGxdVn8vV7*Gr-1{4D~F9VUkN4t#bzdfPgRC0e?-~E>4`JUvy-1mM@ z@_a10pYMC0JgS>wKrx^gPz)#r6a$I@#eiZ!F`yVw3@8Q^1OEpGOk2}l$6|ky7x38s zPuKr%-m7V^0^bF`4Ll2c9&mwEz%=k);MI3(+6#aO)PY058we8o6nFyo2ypdoP5T~j z2{;X0y-U-+2`m7|fct=#?$oru0KWyk1yq4aKnJeffii$E0#5?lzzN{pz-#Z&v|E8c z-mYoi2Oa}X0S^LyzfIHr2D}2i3_J~d1$Y8j0qzCzJ zXe@2!X`@h_H&!;Yv~m3Was0>;-iDDG38F?la$KUQir;|~I`s-6+nLi4BocN7$x+pe zh{~Qj;d;$bX03~g8982^yfReb72dEYt1-ig5CxCi_N^cQ4 z$RbOY`h*Q#MQG^gTb?Tv7uP6sDs|HhW7(3RBs;+_7EkO>&1S2iX>I+?BF*HcC-ZPX zB3-9N9N=hCvq-1z>lxm`8SOcjUnOsA?uYdI^Oht`WqvoK9ix(+_<0^&~1@uvx92=wk`)My1 zkK-&NGF-2n@Mt59Dqe)#hv5osqu3VY^^NVe#qtrAvRj5j`)S*7ox>dgenizeko7njMNnjJ ziM<^|((EuMLn#R`W|Fv2hN~BckG85xXc*q49%MF3Oh%%R3;~qiciAH!>YN(*-Ij73e@VH z6IM%J9bE$*V4lw$t;#b;Ff;HPl`6a|Ia?%K3!PwPVI>bT(5&s+Rz)CW2{M^IIuI4E z6&oG90Q28;0zn(TY1@I(bnK{Vpk?xt=*(%8$X9MQzIh!P%MwP(_26MWa@^8zgDk~A zyu(Z&7oE0AR(efVb_8h~JTZRiazn`AI29McX4JxpfhI{Uvdl0tPy#)S?!q9=A_`6^W%QXYk(}x{~Q_l1dm8Ag=vQ%%>3>S9$ zS>hXf%(Dz1ZU=oG@8hAvZ(vl|^$H?%GmWhpEx!l?nV+)cHKOb4ZwkYM_7QW-f$Ijc z3-?B}V_4|RzR~19U`W4flrX&76J058oD#b#>L4!@PVA-K0Rkexyn#OCl;P*_ZSI6P z%@+tO!lV%(G3jU+qpdCGj5g*L){K=*J(rsv+ay2mN@mGz(d=Y?2Hm-C2|74CQF74F zaB2|81+A!o4%Bp@ywt)3SqaQ~13u$KEpoj|=HRS;XpC0p*5;NMi)%|CU({gfXsyY< z$*tdMpN)evvTGu3@Irq}8H{~v*WBDaW9^ve+5=AL*g`t$Xco3RU@{|Uuxd2dYfEBk z>5CRI!;Dp~;4=U@_CY^Vq_zBwgLFaiEliW28oa8)H!`0) z=hu*NYQ7gl{7Hn*5Pz$Z&yVxhN++4g8Jh0ZYF^ziw;eNR?Uo}Iu6cID6)36a3_JK5 z*8y5sEF523S}iWEJd&s4$)y4MU|wOHWsZh3KeW$ir8uNSDe!n5W4QjyLr z6;95tJc8M8yk9rY$Lr(c8n6HVina1Jtf6`RpYGrP8*BTQfj<$+CLCV>i+%KRreM|bSVyu+_?5WbNU{KRvdY{ zvdKDXqALZAr4{GUS?>c9hH}}LYf!-mZrCHVQ$fAKuE9pziP`@2?&mB=)}od#^6iyt zkW15G$nKYn338ThX^z)uyUQU+5IUd6PC>6Er(IvtH(Rb}+C60^X%OyjqS(XvoBwfM z&2f>ClnXo8L4N${`U1aMg=ieupI@CmH_t)k`PHr-l*nwPG4=8O<>(k4$PX-ZH&HYs z4Tr;^X=Ey@I`u6w5y5JQYY&s@Vs}7pCDgFhflZ5M;2|b_U}9>hk*S3+waZA3=%K{+ zRzRmQ5pU!`1U-%=1=b2Y1T-w88Dz2KNE4c+Y}Z3@X$;Z!IJ7($ZqWiZKbqAR>_XUU zqUM0EKzynp!id9abuS_gIp@G=(g<8{wkBrIyHV!Y!#kP~Q*Q5?U|S%_vDU6EGW3VS zk}&m<;Net$Ajq)UqX5Cfo-$%pbfCSRF@C7ch=r(dxq*$axN+v-^t?^6z@CKraV)#! z?$r>lLd&6Moyw(grlh#e)<=-zB$$Z2e98S1{*jfHH@r?W%YZ`XjnjVPbl!-%(MgYk zVPm@9#5Rh?N?C0Zd&(sDL^hL?A*PJ{AE(vAYJku?FHQGla!Dj0lll=4AaNQ(9V}2n zf?Y1hyAB2l)Y6tsD<6mS(SB)y!%d^>s|bQ(Q=^;4xMwDQk3Gk_jhuXSe(_Tp**}s* z_BjnJ=5o6=Y4^>kly#Qk(06BZf_ha7H|z{sJKB(%bAYC}X6ZGkx$AXFRGhSNI;J~l zHHcAI^%}0tBSm|Av&<0?l+-SO)yItq&+az6TXk0jy3Fqv?XoMa%T5ZhPtxN-XSHq=g8jx&H-lu zDWAxyn_@sQpcqgLCGw>nN{{qIL9smFU diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v index 9b2da6935c..cdd1a95e07 100644 --- a/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v +++ b/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v @@ -20,7 +20,7 @@ module ExampleBlackParrotSystem // Tracing parameters , parameter calc_trace_p = 1 , parameter cce_trace_p = 0 - , parameter cmt_trace_p = 1 + , parameter cmt_trace_p = 0 , parameter dram_trace_p = 0 , parameter skip_init_p = 0 @@ -171,7 +171,7 @@ bp_chip ); -/* bind bp_be_top + bind bp_be_top bp_be_nonsynth_tracer #(.cfg_p(cfg_p)) tracer @@ -207,7 +207,7 @@ bp_chip ,.priv_mode_i(be_mem.csr.priv_mode_n) ,.mpp_i(be_mem.csr.mstatus_n.mpp) ); -*/ + /*bind bp_be_top bp_be_nonsynth_perf #(.cfg_p(cfg_p)) diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v index e523c4aff3..3780fb8ced 100644 --- a/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v +++ b/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v @@ -25,7 +25,7 @@ module bp2wb_convertor , localparam wbone_addr_lbound = 3 //`BSG_SAFE_CLOG2(wbone_data_width / mem_granularity) //dword granularity , localparam total_datafetch_cycle_lp = cce_block_width_p / wbone_data_width , localparam total_datafetch_cycle_width = `BSG_SAFE_CLOG2(total_datafetch_cycle_lp) - , localparam cached_addr_base = 32'h5000_0000 + , localparam cached_addr_base = 32'h4000_4000// 32'h5000_0000 ) (input clk_i ,(* mark_debug = "true" *) input reset_i diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor_v2.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor_v2.v deleted file mode 100644 index bb0fac1724..0000000000 --- a/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor_v2.v +++ /dev/null @@ -1,156 +0,0 @@ -/** - * bp2wb_convertor.v - * DESCRIPTION: THIS MODULE ADAPTS BP MEMORY BUS TO 64-BIT WISHBONE - */ - -module bp2wb_convertor - import bp_common_pkg::*; - import bp_common_aviary_pkg::*; - import bp_cce_pkg::*; - import bp_me_pkg::*; - #(parameter bp_cfg_e cfg_p = e_bp_single_core_cfg - `declare_bp_proc_params(cfg_p) - `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p) - - , parameter [paddr_width_p-1:0] dram_offset_p = '0 - , localparam num_block_words_lp = cce_block_width_p / 64 - , localparam num_block_bytes_lp = cce_block_width_p / 8 - , localparam num_word_bytes_lp = dword_width_p / 8 - , localparam block_offset_bits_lp = `BSG_SAFE_CLOG2(num_block_bytes_lp) - , localparam word_offset_bits_lp = `BSG_SAFE_CLOG2(num_block_words_lp) - , localparam byte_offset_bits_lp = `BSG_SAFE_CLOG2(num_word_bytes_lp) - , localparam wbone_data_width = 64 - , localparam wbone_addr_ubound = paddr_width_p - , localparam wbone_addr_lbound = `BSG_SAFE_CLOG2(wbone_data_width / 8) //byte granularity - , localparam total_datafetch_cycle_lp = cce_block_width_p / wbone_data_width - , localparam total_datafetch_cycle_width = `BSG_SAFE_CLOG2(total_datafetch_cycle_lp) - ) - (input clk_i - , input reset_i - - // BP side - , input [cce_mem_msg_width_lp-1:0] mem_cmd_i - , input mem_cmd_v_i - , output mem_cmd_yumi_o - - , output [cce_mem_msg_width_lp-1:0] mem_resp_o - , output mem_resp_v_o - , input mem_resp_ready_i - - // Wishbone side - , input [63:0] dat_i - , output [63:0] dat_o - , input ack_i - , output [wbone_addr_ubound-wbone_addr_lbound-1:0] adr_o//TODO: understand if width is correct for wishbone check out [ADR_O(n..2)] probably 40:3 since 64-bit architecture with byte granularity log(64/8) PARAMETRIZE - , output cyc_o - , output sel_o - , output stb_o - , output we_o - //, input err_i - //, input rty_i - - ); - - `declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p); - - //locals - reg [total_datafetch_cycle_width:0] ack_ctr = 0; - bp_cce_mem_msg_s mem_cmd_cast_i, mem_resp_cast_o, mem_cmd_r; - reg ready_li, v_li, stb_reset_lo, stb_justgotack; - wire [cce_block_width_p-1:0] data_lo; - reg [cce_block_width_p-1:0]data_li; - wire [paddr_width_p-1:0] mem_cmd_addr_l,addr_lo; - reg negedge_reset; - - //reset - //TODO: reset ack_ctr here - //Handshaking between Wishbone and BlackParrot through convertor - //3.1.3:At every rising edge of [CLK_I] the terminating signal(ACK) is sampled. If it - //is asserted, then [STB_O] is negated. - - assign ready_li = ( ack_ctr == 0 ); - assign mem_cmd_yumi_o = mem_cmd_v_i && ready_li;//!stb_o then ready to take! - assign v_li = (ack_ctr == total_datafetch_cycle_lp-1); - assign mem_resp_v_o = mem_resp_ready_i & v_li; - assign stb_o = (mem_cmd_yumi_o || (ack_ctr > 0)) && !stb_reset_lo && !stb_justgotack; - assign cyc_o = stb_o; - assign sel_o = 0; - - // Every time we get an ACK from WB, increment counter untill counter reaches to total_datafetch_cycle_lp - - -//RULE 3.20: -//The following MASTER signals MUST be negated at the rising [CLK_I] edge -//following the as-sertion of [RST_I], and MUST stay in the negated -//state until the rising [CLK_I] edge that fol-lows the negation of -//[RST_I]: [STB_O], [CYC_O]. - -initial begin -ack_ctr = 0; -stb_reset_lo =0; -end - - always_ff @(posedge clk_i) - begin - if(reset_i) - begin - ack_ctr <= 0; - stb_reset_lo <= 1; - end - else - begin - stb_reset_lo <= 0; - if (ack_i)//stb should be negated after ack - begin - stb_justgotack <= 1; - ack_ctr <= ack_ctr + 1; - data_li[(ack_ctr*wbone_data_width) +: wbone_data_width] <= dat_i; //Data Pass from WB to BP2WB - //TODO: think about ack_ctr; alignment may be wrong +-1 - - end - else - stb_justgotack <= 0; - if (ack_ctr == total_datafetch_cycle_lp-1) begin - ack_ctr <= 0; - data_li[(ack_ctr*wbone_data_width) +: wbone_data_width] <= dat_i; - end - end - - end - - //Packet Pass from BP to BP2WB - assign mem_cmd_cast_i = mem_cmd_i; - - bsg_dff_reset_en - #(.width_p(cce_mem_msg_width_lp)) - mshr_reg - (.clk_i(clk_i) - ,.reset_i(reset_i) - ,.en_i(mem_cmd_yumi_o)//when - ,.data_i(mem_cmd_i) - ,.data_o(mem_cmd_r) - ); - - assign mem_cmd_addr_l = mem_cmd_r.addr; - - //Addr && Data && Command Pass from BP2WB to WB - assign addr_lo = mem_cmd_addr_l + (ack_ctr*8);//TODO:careful - assign adr_o = addr_lo[wbone_addr_ubound-1:wbone_addr_lbound]; - assign data_lo = mem_cmd_r.data; - assign dat_o = data_lo[(ack_ctr*wbone_data_width) +: wbone_data_width]; - assign we_o = (mem_cmd_r.msg_type inside {e_cce_mem_uc_wr, e_cce_mem_wb,e_cce_mem_wr});//TODO: Ask Dan why write-miss request is not here. - - - //Data Pass from BP2WB to BP - - assign mem_resp_cast_o = '{data : data_li - ,payload : mem_cmd_r.payload - ,size : mem_cmd_r.size - ,addr : mem_cmd_r.addr - ,msg_type: mem_cmd_r.msg_type - - }; - assign mem_resp_o = mem_resp_cast_o; - - -endmodule diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v new file mode 100644 index 0000000000..a6fdae9a60 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v @@ -0,0 +1,55 @@ +/* +* bsg_mem_1rw_sync_mask_write_bit.v +* +* distributed synchronous 1-port ram for xilinx ultrascale or ultrascale plus FPGA +* Write mode: No-change | Read mode: No-change +* Note: +* There are 2 basic BRAM library primitives, RAMB18E2 and RAMB36E2 in Vivado. +* But none of them support bit-wise mask. They have Byte-wide write enable ports though. +* So we use the RAM_STYLE attribute to instruct the tool to infer distributed LUT RAM instead. +* +* To save resources, the code is written to be inferred as Signle-port distributed ram RAM64X1S. +* https://www.xilinx.com/support/documentation/user_guides/ug574-ultrascale-clb.pdf +* +*/ + + +module bsg_mem_1rw_sync_mask_write_bit #( + parameter width_p = "inv" + , parameter els_p = "inv" + , parameter latch_last_read_p=0 + , parameter enable_clock_gating_p=0 + , localparam addr_width_lp = `BSG_SAFE_CLOG2(els_p) +) ( + input clk_i + , input reset_i + , input [ width_p-1:0] data_i + , input [addr_width_lp-1:0] addr_i + , input v_i + , input [ width_p-1:0] w_mask_i + , input w_i + , output [ width_p-1:0] data_o +); + + wire unused = reset_i; + + (* ram_style = "distributed" *) logic [width_p-1:0] mem [els_p-1:0]; + + logic [width_p-1:0] data_r; + always_ff @(posedge clk_i) begin + if (v_i & ~w_i) + data_r <= mem[addr_i]; + end + + assign data_o = data_r; + + for (genvar i=0; i> 1'b1; + + always_comb + begin + mem_cmd_v_o = cfg_v_lo; + + // uncached store + mem_cmd_cast_o.msg_type = e_cce_mem_uc_wr; + mem_cmd_cast_o.addr = bp_cfg_base_addr_gp; + mem_cmd_cast_o.payload = '0; + mem_cmd_cast_o.size = e_mem_size_8; + mem_cmd_cast_o.data = cce_block_width_p'({cfg_core_lo, cfg_addr_lo, cfg_data_lo}); + end + + always_comb + begin + ucode_cnt_clr = 1'b0; + ucode_cnt_inc = 1'b0; + + cfg_v_lo = '0; + cfg_core_lo = 8'hff; + cfg_addr_lo = '0; + cfg_data_lo = '0; + + case (state_r) + RESET: begin + state_n = skip_ram_init_p ? BP_FREEZE_CLR : BP_RESET_SET; + + ucode_cnt_clr = 1'b1; + end + BP_RESET_SET: begin + state_n = BP_FREEZE_SET; + + cfg_v_lo = 1'b1; + cfg_addr_lo = bp_cfg_reg_reset_gp; + cfg_data_lo = cfg_data_width_p'(1); + end + BP_FREEZE_SET: begin + state_n = BP_RESET_CLR; + + cfg_v_lo = 1'b1; + cfg_addr_lo = bp_cfg_reg_freeze_gp; + cfg_data_lo = cfg_data_width_p'(1); + end + BP_RESET_CLR: begin + state_n = SEND_RAM_LO; + + cfg_v_lo = 1'b1; + cfg_addr_lo = bp_cfg_reg_reset_gp; + cfg_data_lo = cfg_data_width_p'(0); + end + SEND_RAM_LO: begin + state_n = SEND_RAM_HI; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_mem_base_cce_ucode_gp) + (ucode_cnt_r << 1); + cfg_data_lo = cce_inst_boot_rom_data[0+:cfg_data_width_p]; + // TODO: This is nonsynth, won't work on FPGA + cfg_data_lo = (|cfg_data_lo === 'X) ? '0 : cfg_data_lo; + end + SEND_RAM_HI: begin + state_n = ucode_prog_done ? SEND_CCE_NORMAL : SEND_RAM_LO; + + ucode_cnt_inc = 1'b1; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_mem_base_cce_ucode_gp) + (ucode_cnt_r << 1) + 1'b1; + cfg_data_lo = cfg_data_width_p'(cce_inst_boot_rom_data[inst_width_p-1:cfg_data_width_p]); + // TODO: This is nonsynth, won't work on FPGA + cfg_data_lo = (|cfg_data_lo === 'X) ? '0 : cfg_data_lo; + end + SEND_CCE_NORMAL: begin + state_n = SEND_ICACHE_NORMAL; + + cfg_v_lo = 1'b1; + cfg_addr_lo = bp_cfg_reg_cce_mode_gp; + cfg_data_lo = cfg_data_width_p'(e_cce_mode_normal); + end + SEND_ICACHE_NORMAL: begin + state_n = SEND_DCACHE_NORMAL; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_icache_mode_gp); + cfg_data_lo = cfg_data_width_p'(e_dcache_lce_mode_normal); // TODO: tapeout hack, change to icache + end + SEND_DCACHE_NORMAL: begin + state_n = SEND_PC_LO; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_dcache_mode_gp); + cfg_data_lo = cfg_data_width_p'(e_dcache_lce_mode_normal); + end + SEND_PC_LO: begin + state_n = SEND_PC_HI; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_start_pc_lo_gp); + cfg_data_lo = bp_pc_entry_point_gp[0+:cfg_data_width_p]; + end + SEND_PC_HI: begin + state_n = BP_FREEZE_CLR; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_start_pc_hi_gp); + cfg_data_lo = cfg_data_width_p'(bp_pc_entry_point_gp[vaddr_width_p-1:cfg_data_width_p]); + end + BP_FREEZE_CLR: begin + state_n = DONE; + + cfg_v_lo = 1'b1; + cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_freeze_gp); + cfg_data_lo = cfg_data_width_p'(0);; + end + DONE: begin + state_n = DONE; + end + default: begin + state_n = RESET; + end + endcase + end + +endmodule diff --git a/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh new file mode 100644 index 0000000000..9500673b83 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh @@ -0,0 +1,55 @@ +/* + * bp_common_pkg.vh + * + * Contains the interface structures used for communicating between FE, BE, ME in BlackParrot. + * Additionally contains global parameters used to configure the system. In the future, when + * multiple configurations are supported, these global parameters will belong to groups + * e.g. SV39, VM-disabled, ... + * + */ + +package bp_common_pkg; + + `include "bsg_defines.v" + `include "bp_common_defines.vh" + `include "bp_common_fe_be_if.vh" + `include "bp_common_me_if.vh" + + /* + * RV64 specifies a 64b effective address and 32b instruction. + * BlackParrot supports SV39 virtual memory, which specifies 39b virtual / 56b physical address. + * Effective addresses must have bits 39-63 match bit 38 + * or a page fault exception will occur during translation. + * Currently, we only support a very limited number of parameter configurations. + * Thought: We could have a `define surrounding core instantiations of each parameter and then + * when they import this package, `declare the if structs. No more casting! + */ + + localparam bp_eaddr_width_gp = 64; + localparam bp_instr_width_gp = 32; + + parameter bp_sv39_page_table_depth_gp = 3; + parameter bp_sv39_pte_width_gp = 64; + parameter bp_sv39_vaddr_width_gp = 39; + parameter bp_sv39_paddr_width_gp = 56; + parameter bp_sv39_ppn_width_gp = 44; + parameter bp_page_size_in_bytes_gp = 4096; + parameter bp_page_offset_width_gp = `BSG_SAFE_CLOG2(bp_page_size_in_bytes_gp); + + parameter bp_data_resp_num_flit_gp = 4; + parameter bp_data_cmd_num_flit_gp = 4; + + localparam dram_base_addr_gp = 32'h5000_0000; + + localparam cfg_link_dev_base_addr_gp = 32'h01??_????; + localparam clint_dev_base_addr_gp = 32'h02??_????; + localparam host_dev_base_addr_gp = 32'h03??_????; + localparam plic_dev_base_addr_gp = 32'h0c??_????; + + localparam mipi_reg_base_addr_gp = 32'h0200_0???; + localparam mtimecmp_reg_base_addr_gp = 32'h0200_4???; + localparam mtime_reg_addr_gp = 32'h0200_bff8; + localparam plic_reg_base_addr_gp = 32'h0c00_0???; + +endpackage : bp_common_pkg + diff --git a/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v new file mode 100644 index 0000000000..e64ce690a4 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v @@ -0,0 +1,190 @@ + +module bp_nonsynth_host + import bp_common_pkg::*; + import bp_common_aviary_pkg::*; + import bp_be_pkg::*; + import bp_common_rv64_pkg::*; + import bp_cce_pkg::*; + import bsg_noc_pkg::*; + import bp_cfg_link_pkg::*; + #(parameter bp_cfg_e cfg_p = e_bp_inv_cfg + `declare_bp_proc_params(cfg_p) + `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p) + ) + (input clk_i + , input reset_i + + , input [cce_mem_msg_width_lp-1:0] mem_cmd_i + , input mem_cmd_v_i + , output logic mem_cmd_yumi_o + + , output logic [cce_mem_msg_width_lp-1:0] mem_resp_o + , output logic mem_resp_v_o + , input mem_resp_ready_i + + , output [num_core_p-1:0] program_finish_o + ,(* mark_debug = "true" *) output logic all_finished_debug_o //SC_add + , (* mark_debug = "true" *) output logic core_passed_debug + , (* mark_debug = "true" *) output logic core_failed_debug + ); + +`declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p); + +// HOST I/O mappings +//localparam host_dev_base_addr_gp = 32'h03??_????; + +// Host I/O mappings (arbitrarily decided for now) +// Overall host controls 32'h0300_0000-32'h03FF_FFFF + +localparam hprint_base_addr_gp = paddr_width_p'(32'h0300_0???); +localparam cprint_base_addr_gp = paddr_width_p'(64'h0300_1???); +localparam finish_base_addr_gp = paddr_width_p'(64'h0300_2???); + +bp_cce_mem_msg_s mem_cmd_cast_i; + +assign mem_cmd_cast_i = mem_cmd_i; + +localparam lg_num_core_lp = `BSG_SAFE_CLOG2(num_core_p); + +logic hprint_data_cmd_v; +logic cprint_data_cmd_v; +logic finish_data_cmd_v; + +always_comb + begin + hprint_data_cmd_v = 1'b0; + cprint_data_cmd_v = 1'b0; + finish_data_cmd_v = 1'b0; + + unique + casez (mem_cmd_cast_i.addr) + hprint_base_addr_gp: hprint_data_cmd_v = mem_cmd_v_i; + cprint_base_addr_gp: cprint_data_cmd_v = mem_cmd_v_i; + finish_base_addr_gp: finish_data_cmd_v = mem_cmd_v_i; + default: begin end + endcase + end + +logic [num_core_p-1:0] hprint_w_v_li; +logic [num_core_p-1:0] cprint_w_v_li; +logic [num_core_p-1:0] finish_w_v_li; + +// Memory-mapped I/O is 64 bit aligned +localparam byte_offset_width_lp = 3; +wire [lg_num_core_lp-1:0] mem_cmd_core_enc = + mem_cmd_cast_i.addr[byte_offset_width_lp+:lg_num_core_lp]; + +bsg_decode_with_v + #(.num_out_p(num_core_p)) + hprint_data_cmd_decoder + (.v_i(hprint_data_cmd_v) + ,.i(mem_cmd_core_enc) + + ,.o(hprint_w_v_li) + ); + +bsg_decode_with_v + #(.num_out_p(num_core_p)) + cprint_data_cmd_decoder + (.v_i(cprint_data_cmd_v) + ,.i(mem_cmd_core_enc) + + ,.o(cprint_w_v_li) + ); + +bsg_decode_with_v + #(.num_out_p(num_core_p)) + finish_data_cmd_decoder + (.v_i(finish_data_cmd_v) + ,.i(mem_cmd_core_enc) + + ,.o(finish_w_v_li) + ); + +logic [num_core_p-1:0] finish_r; +bsg_dff_reset + #(.width_p(num_core_p)) + finish_accumulator + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.data_i(finish_r | finish_w_v_li) + ,.data_o(finish_r) + ); + +logic all_finished_r; +bsg_dff_reset + #(.width_p(1)) + all_finished_reg + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.data_i(&finish_r) + ,.data_o(all_finished_r) + ); + +assign program_finish_o = finish_r; + +always_ff @(negedge clk_i) + begin + for (integer i = 0; i < num_core_p; i++) + begin + if (hprint_w_v_li[i] & mem_cmd_yumi_o) + $display("[CORE%0x PRT] %x", i, mem_cmd_cast_i.data[0+:8]); + if (cprint_w_v_li[i] & mem_cmd_yumi_o) + $display("[CORE%0x PRT] %c", i, mem_cmd_cast_i.data[0+:8]); + if (finish_w_v_li[i] & mem_cmd_yumi_o & ~mem_cmd_cast_i.data[0]) + begin + $display("[CORE%0x FSH] PASS", i); + core_passed_debug <= 1; + end + if (finish_w_v_li[i] & mem_cmd_yumi_o & mem_cmd_cast_i.data[0]) + begin + $display("[CORE%0x FSH] FAIL", i); + core_failed_debug <=1; + end + end + + if (all_finished_r) + begin + $display("All cores finished! Terminating..."); + $finish(); + all_finished_debug_o <= 1; + end + if (reset_i) + begin + all_finished_debug_o <= 0; + core_passed_debug <= 0; + core_failed_debug <= 0; + end + end +bp_cce_mem_msg_s mem_resp_lo; +logic mem_resp_v_lo, mem_resp_ready_lo; +assign mem_cmd_yumi_o = mem_cmd_v_i & mem_resp_ready_lo; +bsg_one_fifo + #(.width_p(cce_mem_msg_width_lp)) + mem_resp_buffer + (.clk_i(clk_i) + ,.reset_i(reset_i) + + ,.data_i(mem_resp_lo) + ,.v_i(mem_cmd_yumi_o) + ,.ready_o(mem_resp_ready_lo) + + ,.data_o(mem_resp_o) + ,.v_o(mem_resp_v_lo) + ,.yumi_i(mem_resp_ready_i & mem_resp_v_lo) + ); +assign mem_resp_v_o = mem_resp_v_lo & mem_resp_ready_i; + +assign mem_resp_lo = + '{msg_type : mem_cmd_cast_i.msg_type + ,addr : mem_cmd_cast_i.addr + ,payload : mem_cmd_cast_i.payload + ,size : mem_cmd_cast_i.size + ,data : '0 + }; + + +endmodule : bp_nonsynth_host + diff --git a/litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem b/litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem new file mode 100644 index 0000000000..5815a73238 --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem @@ -0,0 +1,96 @@ +010001000001111100000000000000000000000001000000 +001000000001111100000000000010110000000000000000 +000001000000000011111000000000000000100000000000 +010001000011111100000000000000000000000000000010 +001000000011111100000000000000010000000000000000 +000001000010000111111000000000000000100000000000 +010001000101111100000000000000000000000000001000 +001000000101111100000000000001000000000000000000 +000001000100001011111000000000000000100000000000 +101001000000010010100110100000000000000000000000 +001111000000000000000000000001110000000000000000 +010001000001111100000000000000000000000000000000 +010001000011111100000000000000000000000000000010 +001101000010000000000000000110000000000000000000 +010001000101111100000000000000000000000000000000 +010001000111111100000000000000000000000001000000 +010001001001111100000000000000000000000000000000 +001101000110001000000000000101100000000000000000 +111001000001000001001100100100000000000000000000 +000000000100001011111000000000000000100000000000 +000000001000010011111000000000100000000000000000 +001111000000000000000000000100010000000000000000 +000000000000000011111000000000000000100000000000 +001111000000000000000000000011010000000000000000 +010001000001111100000000000000000000000000000000 +010001000011111100000000000000000000000000000010 +010001000111111100000000000000000000000000000000 +001101000010000000000000001000010000000000000000 +111001000000000010101100100100000000000000000000 +111010011001000000000000000000000000000000000000 +001001000110010000000000010111110000000000000000 +000000000000000011111000000000000000100000000000 +001111000000000000000000000110110000000000000000 +111000010000000000000000000000000000000000000000 +110001000000000000000000000000000000000000000000 +111011000000000000000000000000000000000000000000 +001010000011111100000000010110010000000000000001 +100000100000000000000000000000000000000000000000 +001010001001111100000000001000010000000000000001 +111010000000000000000000000000000000000000000000 +100001100010000000100000000000000000000000000000 +110000000000000000000000000000000000000000000000 +001010000001111100000000001100010000000000000001 +001010000101111100000000001011110000000000000001 +001010001101111100000000001011110000000000000001 +010100000001111100000000000000000000000000000010 +001111000000000000000000001100100000000000000000 +010100000001111100000000000000000000000000000001 +001111000000000000000000001100100000000000000000 +010100000001111100000000000000000000000000000110 +001010011011111100000000010000110000000000000000 +010001000001111100000000000000000000000000000000 +010001000011111100000000000000000000000000000010 +010001000101111100000000000000000000000000000000 +010001000111111100000000000000000000000000000001 +001101000010000000000000001111110000000000000000 +001011000001111100000000001111010000000000000000 +001011010000000000000000001111010000000000000000 +000000000100001011111000000000000000100000000000 +111001000110000010001010100100000000000000000000 +101010100000001010100100000000000000000000000000 +000000000000000011111000000000000000100000000000 +001111000000000000000000001101110000000000000000 +001000000101111100000000010000110000000000000000 +111010011001000000000000000000000000000000000000 +000001000100001011111000000000000000100000000000 +001111000000000000000000001111110000000000000000 +001010011001111100000000010001110000000000000000 +101010100010001000100000000000000000000000000000 +111001000101100010001000100100000000000000000000 +001111000000000000000000001000010000000000000000 +101001100010001001100010000000000000000000000000 +001010010111111100000000010011110000000000000000 +111001000011100010011011100100000000000000000000 +111011011000000000000000000000000000000000000000 +001010011101111100000000010011010000000000000001 +111001010100101010101100100000000000000000000000 +111010011000000000000000000000000000000000000000 +011000010110000000000000000000000000000000000000 +001010010101111100000000010101110000000000000000 +111001000010100110001001100100000000000000000000 +111001000011100110001001100100000000000000000000 +111011011000000000000000000000000000000000000000 +001010011101111100000000010101010000000000000001 +111001010100101010101100100100000000000000000000 +111010011000000000000000000000000000000000000000 +001111000000000000000000001000010000000000000000 +111001010000101010101100100100000000000000000000 +001111000000000000000000001000010000000000000000 +111010000000000000000000000000000000000000000000 +001010000001111100000000010111010000000000000001 +111001010000101010101100100100000000000000000000 +001111000000000000000000001000010000000000000000 +111001010000101010101100100100000000000000000000 +001111000000000000000000001000010000000000000000 +110111000000000000000000000000000000000000000000 diff --git a/litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c b/litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c new file mode 100644 index 0000000000..a57c6e0e6e --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c @@ -0,0 +1,358 @@ +/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== + * + * The LLVM Compiler Infrastructure + * + * This file is dual licensed under the MIT and the University of Illinois Open + * Source Licenses. See LICENSE.TXT for details. + * + * ===----------------------------------------------------------------------=== + * + * This file implements __udivmoddi4 for the compiler_rt library. + * + * ===----------------------------------------------------------------------=== + */ + +#ifndef __blackparrot__ +#include "int_lib.h" + +/* Effects: if rem != 0, *rem = a % b + * Returns: a / b + */ + +/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ + +COMPILER_RT_ABI du_int +__udivmoddi4(du_int a, du_int b, du_int* rem) +{ + const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; + const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; + udwords n; + n.all = a; + udwords d; + d.all = b; + udwords q; + udwords r; + unsigned sr; + /* special cases, X is unknown, K != 0 */ + if (n.s.high == 0) + { + if (d.s.high == 0) + { + /* 0 X + * --- + * 0 X + */ + if (rem) + *rem = n.s.low % d.s.low; + return n.s.low / d.s.low; + } + /* 0 X + * --- + * K X + */ + if (rem) + *rem = n.s.low; + return 0; + } + /* n.s.high != 0 */ + if (d.s.low == 0) + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 0 + */ + if (rem) + *rem = n.s.high % d.s.low; + return n.s.high / d.s.low; + } + /* d.s.high != 0 */ + if (n.s.low == 0) + { + /* K 0 + * --- + * K 0 + */ + if (rem) + { + r.s.high = n.s.high % d.s.high; + r.s.low = 0; + *rem = r.all; + } + return n.s.high / d.s.high; + } + /* K K + * --- + * K 0 + */ + if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + { + r.s.low = n.s.low; + r.s.high = n.s.high & (d.s.high - 1); + *rem = r.all; + } + return n.s.high >> __builtin_ctz(d.s.high); + } + /* K K + * --- + * K 0 + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 2 or sr large */ + if (sr > n_uword_bits - 2) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits - 1 */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + /* r.all = n.all >> sr; */ + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else /* d.s.low != 0 */ + { + if (d.s.high == 0) + { + /* K X + * --- + * 0 K + */ + if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ + { + if (rem) + *rem = n.s.low & (d.s.low - 1); + if (d.s.low == 1) + return n.all; + sr = __builtin_ctz(d.s.low); + q.s.high = n.s.high >> sr; + q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + return q.all; + } + /* K X + * --- + * 0 K + */ + sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); + /* 2 <= sr <= n_udword_bits - 1 + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + */ + if (sr == n_uword_bits) + { + q.s.low = 0; + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 + { + q.s.low = 0; + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 + { + q.s.low = n.s.low << (n_udword_bits - sr); + q.s.high = (n.s.high << (n_udword_bits - sr)) | + (n.s.low >> (sr - n_uword_bits)); + r.s.high = 0; + r.s.low = n.s.high >> (sr - n_uword_bits); + } + } + else + { + /* K X + * --- + * K K + */ + sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); + /* 0 <= sr <= n_uword_bits - 1 or sr large */ + if (sr > n_uword_bits - 1) + { + if (rem) + *rem = n.all; + return 0; + } + ++sr; + /* 1 <= sr <= n_uword_bits */ + /* q.all = n.all << (n_udword_bits - sr); */ + q.s.low = 0; + if (sr == n_uword_bits) + { + q.s.high = n.s.low; + r.s.high = 0; + r.s.low = n.s.high; + } + else + { + q.s.high = n.s.low << (n_uword_bits - sr); + r.s.high = n.s.high >> sr; + r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); + } + } + } + /* Not a special case + * q and r are initialized with: + * q.all = n.all << (n_udword_bits - sr); + * r.all = n.all >> sr; + * 1 <= sr <= n_udword_bits - 1 + */ + su_int carry = 0; + for (; sr > 0; --sr) + { + /* r:q = ((r:q) << 1) | carry */ + r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); + r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); + q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); + q.s.low = (q.s.low << 1) | carry; + /* carry = 0; + * if (r.all >= d.all) + * { + * r.all -= d.all; + * carry = 1; + * } + */ + const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); + carry = s & 1; + r.all -= d.all & s; + } + q.all = (q.all << 1) | carry; + if (rem) + *rem = r.all; + return q.all; +} +#else + +/* More subroutines needed by GCC output code on some machines. */ +/* Compile this one with gcc. */ +/* Copyright (C) 1989-2014 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +/* This is extracted from gcc's libgcc/libgcc2.c with these typedefs added: */ +typedef short Wtype; +typedef int DWtype; +typedef unsigned int UWtype; +typedef unsigned long long UDWtype; +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ +struct DWstruct {Wtype high, low;}; +#else +struct DWstruct {Wtype low, high;}; +#endif +typedef union { + struct DWstruct s; + DWtype ll; +} DWunion; + +UDWtype +__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp) +{ + UDWtype q = 0, r = n, y = d; + UWtype lz1, lz2, i, k; + + /* Implements align divisor shift dividend method. This algorithm + aligns the divisor under the dividend and then perform number of + test-subtract iterations which shift the dividend left. Number of + iterations is k + 1 where k is the number of bit positions the + divisor must be shifted left to align it under the dividend. + quotient bits can be saved in the rightmost positions of the dividend + as it shifts left on each test-subtract iteration. */ + + if (y <= r) + { + lz1 = __builtin_clzll (d); + lz2 = __builtin_clzll (n); + + k = lz1 - lz2; + y = (y << k); + + /* Dividend can exceed 2 ^ (width − 1) − 1 but still be less than the + aligned divisor. Normal iteration can drops the high order bit + of the dividend. Therefore, first test-subtract iteration is a + special case, saving its quotient bit in a separate location and + not shifting the dividend. */ + if (r >= y) + { + r = r - y; + q = (1ULL << k); + } + + if (k > 0) + { + y = y >> 1; + + /* k additional iterations where k regular test subtract shift + dividend iterations are done. */ + i = k; + do + { + if (r >= y) + r = ((r - y) << 1) + 1; + else + r = (r << 1); + i = i - 1; + } while (i != 0); + + /* First quotient bit is combined with the quotient bits resulting + from the k regular iterations. */ + q = q + r; + r = r >> k; + q = q - (r << k); + } + } + + if (rp) + *rp = r; + return q; +} + +DWtype +__moddi3 (DWtype u, DWtype v) +{ + Wtype c = 0; + DWunion uu = {.ll = u}; + DWunion vv = {.ll = v}; + DWtype w; + + if (uu.s.high < 0) + c = ~c, + uu.ll = -uu.ll; + if (vv.s.high < 0) + vv.ll = -vv.ll; + + (void) __udivmoddi4 (uu.ll, vv.ll, (UDWtype*)&w); + if (c) + w = -w; + + return w; +} + +#endif diff --git a/litex/soc/cores/cpu/blackparrot/core.py b/litex/soc/cores/cpu/blackparrot/core.py index 35ecad5546..884b0791d3 100644 --- a/litex/soc/cores/cpu/blackparrot/core.py +++ b/litex/soc/cores/cpu/blackparrot/core.py @@ -44,7 +44,7 @@ } GCC_FLAGS = { - "standard": "-march=rv64ima -mabi=lp64 ", + "standard": "-march=rv64ia -mabi=lp64 -O0 ", # "linux": "-march=rv64imac -mabi=lp64 ", # "full": "-march=rv64imafdc -mabi=lp64 ", } @@ -56,23 +56,16 @@ class BlackParrotRV64(Module): gcc_triple = ("riscv64-unknown-elf") linker_output_format = "elf64-littleriscv" # io_regions = {0x10000000: 0x70000000} # origin, length - io_regions = {0x40000000: 0x10000000} # origin, length + io_regions = {0x30000000: 0x20000000} # origin, length @property def mem_map(self): - # We do not know yet how blackparrot memory map looks like. For now, assume same as rocket. - print("MEMORY MAPPED") return { - # "rom" : 0x10000000, - # "sram" : 0x11000000, - # "main_ram" : 0x40000000, -# "ethmac" : 0x30000000, + "ethmac" : 0x30000000, "csr" : 0x40000000, "rom" : 0x50000000, "sram" : 0x51000000, - "ethmac" : 0x60000000, "main_ram" : 0x80000000, - } @property @@ -127,7 +120,6 @@ def __init__(self, platform, variant="standard"): def set_reset_address(self, reset_address):#note sure if reset address needs to be changed for BB assert not hasattr(self, "reset_address") self.reset_address = reset_address - print("HI") print(hex(reset_address)) #assert reset_address == 0x10000000, "cpu_reset_addr hardcoded in during elaboration!" diff --git a/litex/soc/cores/cpu/blackparrot/flist_litex.verilator b/litex/soc/cores/cpu/blackparrot/flist_litex.verilator index d7f6d4e505..65e8e1c4ec 100644 --- a/litex/soc/cores/cpu/blackparrot/flist_litex.verilator +++ b/litex/soc/cores/cpu/blackparrot/flist_litex.verilator @@ -64,7 +64,7 @@ $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_sync.v $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_sync_synth.v $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_synth.v $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync.v -$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_bit.v +// $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_bit.v $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_bit_synth.v $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_byte.v $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_byte_synth.v @@ -203,23 +203,26 @@ $BP_COMMON_DIR/src/v/bp_addr_map.v // bsg_ip_cores files $BASEJUMP_STL_DIR/bsg_fsb/bsg_fsb_node_trace_replay.v // be files -// $BP_BE_DIR/test/common/bp_be_nonsynth_tracer.v +$BP_BE_DIR/test/common/bp_be_nonsynth_tracer.v // $BP_BE_DIR/test/common/bp_be_nonsynth_perf.v // me files -$BP_ME_DIR/test/common/bp_mem.v -$BP_ME_DIR/test/common/bp_mem_delay_model.v -$BP_ME_DIR/test/common/bp_mem_transducer.v -$BP_ME_DIR/test/common/bp_mem_storage_sync.v +// $BP_ME_DIR/test/common/bp_mem.v +// $BP_ME_DIR/test/common/bp_mem_delay_model.v +// $BP_ME_DIR/test/common/bp_mem_transducer.v +// $BP_ME_DIR/test/common/bp_mem_storage_sync.v // $BP_ME_DIR/test/common/dramsim2_wrapper.cpp $BP_ME_DIR/test/common/bp_cce_mmio_cfg_loader.v -$BP_ME_DIR/test/common/bp_mem_nonsynth_tracer.v -$BP_ME_DIR/test/common/bp_cce_nonsynth_tracer.v -$BP_ME_DIR/test/common/bp_mem_utils.cpp +// $BP_ME_DIR/test/common/bp_mem_nonsynth_tracer.v +// $BP_ME_DIR/test/common/bp_cce_nonsynth_tracer.v +// $BP_ME_DIR/test/common/bp_mem_utils.cpp // top files $BP_TOP_DIR/test/common/bp_nonsynth_host.v // $BP_TOP_DIR/test/common/bp_nonsynth_if_verif.v -// $BP_TOP_DIR/test/common/bp_nonsynth_commit_tracer.v +$BP_TOP_DIR/test/common/bp_nonsynth_commit_tracer.v // /home/scanakci/Research_sado/litex/litex/litex/soc/cores/cpu/blackparrot/pre-alpha-release/bp_top/syn/results/verilator/bp_top_trace_demo.e_bp_single_core_cfg.build/wrapper.v // /home/scanakci/Research_sado/litex/litex/litex/soc/cores/cpu/blackparrot/pre-alpha-release/bp_top/syn/results/verilator/bp_top_trace_demo.e_bp_single_core_cfg.build/test_bp.cpp $BP_FPGA_DIR/bp2wb_convertor.v $BP_FPGA_DIR/ExampleBlackParrotSystem.v +$BP_FPGA_DIR/bsg_mem_1rw_sync_mask_write_bit.v +// Recent +$BASEJUMP_STL_DIR/bsg_noc/bsg_mesh_stitch.v diff --git a/litex/soc/cores/cpu/blackparrot/setEnvironment.sh b/litex/soc/cores/cpu/blackparrot/setEnvironment.sh index 030c49a5bc..d818ec52b9 100755 --- a/litex/soc/cores/cpu/blackparrot/setEnvironment.sh +++ b/litex/soc/cores/cpu/blackparrot/setEnvironment.sh @@ -1,8 +1,10 @@ #!/bin/bash ## Set common environment variables +export LITEX=$(git rev-parse --show-toplevel) +export BP=$PWD +cp bp_software/cce_ucode.mem /tmp/. cd pre-alpha-release TOP=$(git rev-parse --show-toplevel) - export BP_COMMON_DIR=$TOP/bp_common export BP_FE_DIR=$TOP/bp_fe export BP_BE_DIR=$TOP/bp_be @@ -61,7 +63,7 @@ elif [ $CFG = "e_bp_single_core_cfg" ] then NUM_LCE_P=2 N_WG=64 - echo "Single Core config" + #echo "Single Core config" #elif ($CFG -eq e_bp_dual_core_cfg) # NUM_LCE_P=4 # N_WG=32 @@ -87,7 +89,7 @@ CCE_MEM_PATH=$BP_ME_DIR/src/asm/roms/$COH_PROTO CCE_MEM=bp_cce_inst_rom_$COH_PROTO_lce$NUM_LCE_P_wg$N_WG_assoc8.mem #DRAMSIM_CH_CFG=DDR2_micron_16M_8b_x8_sg3E.ini #DRAMSIM_SYS_CFG=system.ini -$include $BP_COMMON_DIR/syn/Makefile.verilator +#$include $BP_COMMON_DIR/syn/Makefile.verilator #iinclude $(BP_COMMON_DIR)/syn/Makefile.common #include $(BP_COMMON_DIR)/syn/Makefile.dc #include $(BP_COMMON_DIR)/syn/Makefile.regress diff --git a/litex/soc/cores/cpu/blackparrot/update_BP.sh b/litex/soc/cores/cpu/blackparrot/update_BP.sh new file mode 100755 index 0000000000..c6ddde977c --- /dev/null +++ b/litex/soc/cores/cpu/blackparrot/update_BP.sh @@ -0,0 +1,17 @@ +#!/bin/bash + + +##SOFTWARE CHANGES## + +#for a reason, provided udivmoddi4.c is not functionally correct when used with either BP or Rocket under IA extension. Another version of udivmoddi4.c is a workaround to run BIOS on these architectures. +cp bp_software/udivmoddi4.c $LITEX/litex/soc/software/compiler_rt/lib/builtins/. +cp bp_software/cce_ucode.mem /tmp/. + +##HARDWARE CHANGES## +#Need to change some files because of memory map differences and proper syntesis +cp bp_hardware/bp_common_pkg.vh $BP_COMMON_DIR/src/include/. +cp bp_hardware/bp_cce_mmio_cfg_loader.v $BP_ME_DIR/test/common/. +cp bp_hardware/bp_nonsynth_host.v $BP_TOP_DIR/test/common/. + +# Neccessary files for FPGA Implementations +cp -r bp_fpga $BP_TOP/DIR diff --git a/litex/soc/software/bios/Makefile b/litex/soc/software/bios/Makefile index a454a17ea8..7c34c9872c 100755 --- a/litex/soc/software/bios/Makefile +++ b/litex/soc/software/bios/Makefile @@ -1,6 +1,10 @@ include ../include/generated/variables.mak include $(SOC_DIRECTORY)/software/common.mak +ifeq ($(CPU),blackparrot) +BP_LIBS = -L$(BP_EXTERNAL_DIR)/lib/gcc/riscv64-unknown-elf/8.3.0 +BP_FLAGS = -lgcc +endif # Permit TFTP_SERVER_PORT override from shell environment / command line ifdef TFTP_SERVER_PORT CFLAGS += -DTFTP_SERVER_PORT=$(TFTP_SERVER_PORT) @@ -23,6 +27,7 @@ endif bios.elf: $(BIOS_DIRECTORY)/linker.ld $(OBJECTS) + %.elf: ../libbase/crt0-$(CPU)-ctr.o ../libnet/libnet.a ../libbase/libbase-nofloat.a ../libcompiler_rt/libcompiler_rt.a $(LD) $(LDFLAGS) -T $(BIOS_DIRECTORY)/linker.ld -N -o $@ \ ../libbase/crt0-$(CPU)-ctr.o \ @@ -30,7 +35,10 @@ bios.elf: $(BIOS_DIRECTORY)/linker.ld $(OBJECTS) -L../libnet \ -L../libbase \ -L../libcompiler_rt \ - -lnet -lbase-nofloat -lcompiler_rt + $(BP_LIBS) \ + -lnet -lbase-nofloat -lcompiler_rt \ + $(BP_FLAGS) + ifneq ($(OS),Windows_NT) chmod -x $@ endif diff --git a/litex/soc/software/bios/isr.c b/litex/soc/software/bios/isr.c index 39a2020ada..42456b8dcc 100644 --- a/litex/soc/software/bios/isr.c +++ b/litex/soc/software/bios/isr.c @@ -8,11 +8,24 @@ #include #include -/*SC_add: Does BB support PLIC?*/ -#if defined(__rocket__) || defined(__blackparrot__) + +/*TODO: Update this function for BP*/ // +#if defined(__blackparrot__) +void isr(void); +void isr(void) +{ + static int onetime = 0; + if ( onetime == 0){ + printf("ISR blackparrot\n"); + printf("TRAP!!\n"); + onetime++; + } +} +#elif defined(__rocket__) void plic_init(void); void plic_init(void) { + int i; // priorities for interrupt pins 1..4 @@ -27,6 +40,7 @@ void plic_init(void) void isr(void); void isr(void) { + unsigned int claim; while ((claim = *((unsigned int *)PLIC_CLAIM))) { diff --git a/litex/soc/software/bios/main.c b/litex/soc/software/bios/main.c index f0e1187433..2e6b5612fa 100644 --- a/litex/soc/software/bios/main.c +++ b/litex/soc/software/bios/main.c @@ -459,7 +459,6 @@ static void do_command(char *c) #endif else if(strcmp(token, "memtest") == 0) memtest(); #endif - else if(strcmp(token, "") != 0) printf("Command not found\n"); } @@ -588,9 +587,9 @@ int main(int i, char **c) #elif __minerva__ printf("Minerva"); #elif __rocket__ - printf("RocketRV64[imac] HI"); -#elif __blackparrot - printf("BlackParrotRV64[imac] Hello World"); + printf("RocketRV64[imac]"); +#elif __blackparrot__ + printf("BlackParrotRV64[ia]"); #else printf("Unknown"); #endif @@ -605,9 +604,10 @@ int main(int i, char **c) #endif printf("\n"); - sdr_ok = 1; + sdr_ok = 1; + #if defined(CSR_ETHMAC_BASE) || defined(CSR_SDRAM_BASE) - printf("--========== \e[1mInitialization\e[0m ============--\n"); + printf("--========== \e[1mInitialization\e[0m ============--\n"); #ifdef CSR_ETHMAC_BASE eth_init(); #endif @@ -620,6 +620,7 @@ int main(int i, char **c) #endif if (sdr_ok !=1) printf("Memory initialization failed\n"); + printf("\n"); #endif @@ -630,7 +631,7 @@ int main(int i, char **c) } printf("--============= \e[1mConsole\e[0m ================--\n"); - while(1) { + while(1) { putsnonl("\e[92;1mlitex\e[0m> "); readstr(buffer, 64); do_command(buffer); diff --git a/litex/soc/software/libbase/crt0-blackparrot.S b/litex/soc/software/libbase/crt0-blackparrot.S index 69b9d574c6..9badaa48ab 100644 --- a/litex/soc/software/libbase/crt0-blackparrot.S +++ b/litex/soc/software/libbase/crt0-blackparrot.S @@ -67,8 +67,8 @@ bss_loop: j bss_loop bss_done: - call plic_init // initialize external interrupt controller - li a0, 0x800 // external interrupt sources only (using LiteX timer); +// call plic_init // initialize external interrupt controller +# li a0, 0x800 // external interrupt sources only (using LiteX timer); // NOTE: must still enable mstatus.MIE! csrw mie,a0