Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[rtl] optimize CPU barrel shifter timing #301

Merged
merged 2 commits into from
Apr 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ mimpid = 0x01040312 => 01.04.03.12 => Version 01.04.03.12 => v1.4.3.12

| Date (*dd.mm.yyyy*) | Version | Comment |
|:----------:|:-------:|:--------|
| 13.04.2022 | 1.7.0.4 | improve timing of CPU's barrel shifter (`FAST_SHIFT_EN` = true) by moving the register stage; [#301](https://github.com/stnolting/neorv32/pull/301) |
| 12.04.2022 | 1.7.0.3 | CPU front-end is now controlled by a _synchronous_ state machine (all outgoing signals are driven by registers), reducing critical path of memory system & reducing area costs; :warning: `CPU_IPB_ENTRIES` now has to be >= 2; [#300](https://github.com/stnolting/neorv32/pull/300) |
| 11.04.2022 | 1.7.0.2 | cleanup of CPU front-end (instruction fetch); cleaner code, less area costs; [#299](https://github.com/stnolting/neorv32/pull/299) |
| 10.04.2022 | 1.7.0.1 | rework handling of `x0` register (`zero`): shortens critical path and reduces area costs; [#298](https://github.com/stnolting/neorv32/pull/298) |
Expand Down
80 changes: 35 additions & 45 deletions rtl/core/neorv32_cpu_cp_shifter.vhd
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
-- #################################################################################################
-- # << NEORV32 - CPU Co-Processor: Shifter (CPU Core ISA) >> #
-- # << NEORV32 - CPU Co-Processor: Shifter (CPU Base ISA) >> #
-- # ********************************************************************************************* #
-- # Bit-shift unit for base ISA. #
-- # FAST_SHIFT_EN = false (default): Use bit-serial shifter architecture (small but slow) #
-- # FAST_SHIFT_EN = true: Use barrel shifter architecture (large but fast) #
-- # FAST_SHIFT_EN = false (default) : Use bit-serial shifter architecture (small but slow) #
-- # FAST_SHIFT_EN = true : Use barrel shifter architecture (large but fast) #
-- # ********************************************************************************************* #
-- # BSD 3-Clause License #
-- # #
Expand Down Expand Up @@ -45,7 +44,7 @@ use neorv32.neorv32_package.all;

entity neorv32_cpu_cp_shifter is
generic (
FAST_SHIFT_EN : boolean -- use barrel shifter for shift operations
FAST_SHIFT_EN : boolean -- implement fast but large barrel shifter
);
port (
-- global control --
Expand Down Expand Up @@ -78,21 +77,24 @@ architecture neorv32_cpu_cp_shifter_rtl of neorv32_cpu_cp_shifter is
-- barrel shifter --
type bs_level_t is array (index_size_f(data_width_c) downto 0) of std_ulogic_vector(data_width_c-1 downto 0);
signal bs_level : bs_level_t;
signal bs_start : std_ulogic;
signal bs_result : std_ulogic_vector(data_width_c-1 downto 0);

begin

-- Iterative Shifter Core (small but slow) ------------------------------------------------
-- Serial Shifter (small but slow) --------------------------------------------------------
-- -------------------------------------------------------------------------------------------
serial_shifter_sync:
serial_shifter:
if (FAST_SHIFT_EN = false) generate
shifter_unit_sync: process(rstn_i, clk_i)

-- shifter core --
serial_shifter_core: process(rstn_i, clk_i)
begin
if (rstn_i = '0') then
shifter.busy <= '0';
shifter.busy_ff <= def_rst_val_c;
shifter.sreg <= (others => def_rst_val_c);
shifter.busy <= '0';
shifter.cnt <= (others => def_rst_val_c);
shifter.sreg <= (others => def_rst_val_c);
elsif rising_edge(clk_i) then
shifter.busy_ff <= shifter.busy;
if (start_i = '1') then
Expand All @@ -102,8 +104,8 @@ begin
end if;
--
if (start_i = '1') then -- trigger new shift
shifter.sreg <= rs1_i; -- shift operand
shifter.cnt <= shamt_i; -- shift amount
shifter.sreg <= rs1_i; -- shift data
elsif (or_reduce_f(shifter.cnt) = '1') then -- running shift (cnt != 0)
shifter.cnt <= std_ulogic_vector(unsigned(shifter.cnt) - 1);
if (ctrl_i(ctrl_ir_funct3_2_c) = '0') then -- SLL: shift left logical
Expand All @@ -113,32 +115,31 @@ begin
end if;
end if;
end if;
end process shifter_unit_sync;
end generate;
end process serial_shifter_core;

-- shift control/output --
serial_shifter_ctrl:
if (FAST_SHIFT_EN = false) generate
-- shift control/output --
shifter.done <= '1' when (or_reduce_f(shifter.cnt(shifter.cnt'left downto 1)) = '0') else '0';
valid_o <= shifter.busy and shifter.done;
res_o <= shifter.sreg when (shifter.busy = '0') and (shifter.busy_ff = '1') else (others => '0');
end generate;

end generate; -- /serial_shifter


-- Barrel Shifter Core (fast but large) ---------------------------------------------------
-- Barrel Shifter (fast but large) --------------------------------------------------------
-- -------------------------------------------------------------------------------------------
barrel_shifter_async:
barrel_shifter:
if (FAST_SHIFT_EN = true) generate
shifter_unit_async: process(rs1_i, shamt_i, ctrl_i, bs_level)

-- shifter core --
barrel_shifter_core: process(rs1_i, shamt_i, ctrl_i, bs_level)
begin
-- input level: convert left shifts to right shifts --
if (ctrl_i(ctrl_ir_funct3_2_c) = '0') then -- is left shift?
bs_level(index_size_f(data_width_c)) <= bit_rev_f(rs1_i); -- reverse bit order of input operand
else
bs_level(index_size_f(data_width_c)) <= rs1_i;
end if;

-- shifter array --
-- shifter array (right-shifts only) --
for i in index_size_f(data_width_c)-1 downto 0 loop
if (shamt_i(i) = '1') then
bs_level(i)(data_width_c-1 downto data_width_c-(2**i)) <= (others => (bs_level(i+1)(data_width_c-1) and ctrl_i(ctrl_ir_funct12_10_c)));
Expand All @@ -147,35 +148,24 @@ begin
bs_level(i) <= bs_level(i+1);
end if;
end loop;
end process barrel_shifter_core;

-- re-convert original left shifts --
if (ctrl_i(ctrl_ir_funct3_2_c) = '0') then
bs_result <= bit_rev_f(bs_level(0));
else
bs_result <= bs_level(0);
end if;
end process shifter_unit_async;
end generate;

-- output register --
barrel_shifter_sync:
if (FAST_SHIFT_EN = true) generate
shifter_unit_sync: process(clk_i)
-- pipeline register --
barrel_shifter_buf: process(clk_i)
begin
if rising_edge(clk_i) then
res_o <= (others => '0');
if (start_i = '1') then
res_o <= bs_result;
end if;
bs_start <= start_i;
bs_result <= bs_level(0); -- this register can be moved by the register balancing
end if;
end process shifter_unit_sync;
end generate;
end process barrel_shifter_buf;

-- shift control/output --
barrel_shifter_ctrl:
if (FAST_SHIFT_EN = true) generate
-- output gate and re-convert original left shifts --
res_o <= (others => '0') when (bs_start = '0') else bit_rev_f(bs_result) when (ctrl_i(ctrl_ir_funct3_2_c) = '0') else bs_result;

-- processing done --
valid_o <= start_i;
end generate;

end generate; -- /barrel_shifter


end neorv32_cpu_cp_shifter_rtl;
2 changes: 1 addition & 1 deletion rtl/core/neorv32_package.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ package neorv32_package is
-- Architecture Constants (do not modify!) ------------------------------------------------
-- -------------------------------------------------------------------------------------------
constant data_width_c : natural := 32; -- native data path width - do not change!
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01070003"; -- NEORV32 version - no touchy!
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01070004"; -- NEORV32 version - no touchy!
constant archid_c : natural := 19; -- official NEORV32 architecture ID - hands off!

-- Check if we're inside the Matrix -------------------------------------------------------
Expand Down