I am trying to implement a simple ALU:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
entity simple_alu is
Port (
clk : in std_logic;
rst : in std_logic;
op : in std_logic_vector (1 downto 0);
in0 : in std_logic_vector (31 downto 0);
in1 : in std_logic_vector (31 downto 0);
res : out std_logic_vector (31 downto 0);
done_flag : out std_logic
);
end simple_alu;
architecture Behavioral of simple_alu is
type state_type is (start, add, sub, mul, pow, done);
signal state, next_state : state_type := start;
signal status : std_logic := '0';
signal inter, shift_reg, zero : std_logic_vector (31 downto 0) := (others => '0');
begin
SYNC_PROC: process (clk, rst)
begin
if (rst = '0') then
if rising_edge(clk) then
state <= next_state;
res <= inter;
done_flag <= status;
end if;
else
state <= start;
res <= (others => '0');
done_flag <= '0';
end if;
end process;
--MEALY State-Machine - Outputs based on state and inputs
OUTPUT_DECODE: process (state, in0, in1, shift_reg)
variable result, temp : std_logic_vector (31 downto 0);
variable flag : std_logic := '0';
begin
shift_reg <= in1;
temp := temp;
flag := flag;
result := result;
case state is
when start =>
result := std_logic_vector(to_signed(1, 32));
temp := in0;
flag := '0';
when add => result := std_logic_vector(signed(in0) + signed(in1));
when sub => result := std_logic_vector(signed(in0) - signed(in1));
when mul => result := std_logic_vector(resize(signed(in0) * signed(in1), 32));
when pow =>
if (shift_reg(shift_reg'low) = '1') then
result := std_logic_vector(resize(signed(result) * signed(temp), 32));
else
result := result;
end if;
temp := std_logic_vector(resize(signed(temp) * signed(temp), 32));
shift_reg <= std_logic_vector(shift_right(signed(shift_reg), 1));
when done =>
result := result;
flag := '1';
when others =>
end case;
inter <= result;
status <= flag;
end process;
NEXT_STATE_DECODE: process (state, op, shift_reg, zero, rst) -- rst indicates that one input (op, in0 or in1) changed
begin
--declare default state for next_state to avoid latches
next_state <= state; --default is to stay in current state
case (state) is
when start =>
case (op) is
when "00" => next_state <= add;
when "01" => next_state <= sub;
when "10" => next_state <= mul;
when "11" => next_state <= pow;
when others => next_state <= done;
end case;
when add => next_state <= done;
when sub => next_state <= done;
when mul => next_state <= done;
when pow =>
if (shift_reg = zero) then
next_state <= done;
else
next_state <= pow;
end if;
when done =>
if (rst = '1') then
next_state <= start;
end if;
when others =>
end case;
end process;
end Behavioral;
This seems to be working, at least in this testbench:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
entity simple_alu_tb is
end simple_alu_tb;
architecture Behavioral of simple_alu_tb is
component simple_alu is
Port (
clk : in std_logic;
rst : in std_logic;
op : in std_logic_vector (1 downto 0);
in0 : in std_logic_vector (31 downto 0);
in1 : in std_logic_vector (31 downto 0);
res : out std_logic_vector (31 downto 0);
done_flag : out std_logic
);
end component;
signal clk : std_logic := '0';
signal rst : std_logic := '0';
signal op : std_logic_vector (1 downto 0) := (others => '0');
signal in0 : std_logic_vector (31 downto 0) := (others => '0');
signal in1 : std_logic_vector (31 downto 0) := (others => '0');
signal res : std_logic_vector (31 downto 0) := (others => '0');
signal done_flag : std_logic := '0';
constant clk_period : time := 1 ns;
begin
--Instantiate the Unit Under Test (UUT)
uut: simple_alu Port Map (
clk => clk,
rst => rst,
op => op,
in0 => in0,
in1 => in1,
res => res,
done_flag => done_flag
);
-- Clock process definitions
clk_process :process
begin
clk <= '1';
wait for clk_period/2;
clk <= '0';
wait for clk_period/2;
end process;
stim_proc: process
begin
wait for 2*clk_period;
rst <= '1';
wait for clk_period;
rst <= '0';
wait for 4*clk_period;
op <= "00"; -- add
in0 <= std_logic_vector(to_signed(12, 32));
in1 <= std_logic_vector(to_signed(3, 32));
rst <= '1';
wait for clk_period;
rst <= '0';
wait for 2*clk_period;
assert (res = std_logic_vector(to_signed(15, 32))) report "addition failed" severity failure;
wait for 4*clk_period;
op <= "01"; -- sub
in0 <= std_logic_vector(to_signed(12, 32));
in1 <= std_logic_vector(to_signed(3, 32));
rst <= '1';
wait for clk_period;
rst <= '0';
wait for 2*clk_period;
assert (res = std_logic_vector(to_signed(9, 32))) report "subtraction failed" severity failure;
wait for 4*clk_period;
op <= "10"; -- mul
in0 <= std_logic_vector(to_signed(12, 32));
in1 <= std_logic_vector(to_signed(3, 32));
rst <= '1';
wait for clk_period;
rst <= '0';
wait for 2*clk_period;
assert (res = std_logic_vector(to_signed(36, 32))) report "multiplication failed" severity failure;
wait for 4*clk_period;
op <= "11"; -- pow
in0 <= std_logic_vector(to_signed(12, 32));
in1 <= std_logic_vector(to_signed(7, 32));
rst <= '1';
wait for clk_period;
rst <= '0';
wait for 4*clk_period;
assert (res = std_logic_vector(to_signed(35831808, 32))) report "power failed" severity failure;
wait for 4*clk_period;
op <= "11"; -- pow
in0 <= std_logic_vector(to_signed(12, 32));
in1 <= std_logic_vector(to_signed(6, 32));
rst <= '1';
wait for clk_period;
rst <= '0';
wait for 4*clk_period;
assert (res = std_logic_vector(to_signed(2985984, 32))) report "power failed" severity failure;
wait;
end process;
end Behavioral;
I would like to implement that as AXI4-lite component. So I generate the wrapper, adapt the write process and instantiate my module as follows:
...
process (S_AXI_ACLK)
variable loc_addr :std_logic_vector(OPT_MEM_ADDR_BITS downto 0);
begin
if rising_edge(S_AXI_ACLK) then
if S_AXI_ARESETN = '0' then
-- command_reg <= (others => '0');
-- done_flag <= '0';
slv_reg1 <= (others => '0');
slv_reg2 <= (others => '0');
-- slv_reg3 <= (others => '0');
else
loc_addr := axi_awaddr(ADDR_LSB + OPT_MEM_ADDR_BITS downto ADDR_LSB);
if (slv_reg_wren = '1') then
case loc_addr is
when b"00" =>
for byte_index in 0 to (C_S_AXI_DATA_WIDTH/8-4) loop -- write to command register only
if ( S_AXI_WSTRB(byte_index) = '1' ) then
-- Respective byte enables are asserted as per write strobes
-- slave registor 0
command_reg(byte_index*8+7 downto byte_index*8) <= S_AXI_WDATA(byte_index*8+7 downto byte_index*8);
end if;
end loop;
when b"01" =>
for byte_index in 0 to (C_S_AXI_DATA_WIDTH/8-1) loop
if ( S_AXI_WSTRB(byte_index) = '1' ) then
-- Respective byte enables are asserted as per write strobes
-- slave registor 1
slv_reg1(byte_index*8+7 downto byte_index*8) <= S_AXI_WDATA(byte_index*8+7 downto byte_index*8);
end if;
end loop;
when b"10" =>
for byte_index in 0 to (C_S_AXI_DATA_WIDTH/8-1) loop
if ( S_AXI_WSTRB(byte_index) = '1' ) then
-- Respective byte enables are asserted as per write strobes
-- slave registor 2
slv_reg2(byte_index*8+7 downto byte_index*8) <= S_AXI_WDATA(byte_index*8+7 downto byte_index*8);
end if;
end loop;
-- when b"11" => -- do not write to reg3
-- for byte_index in 0 to (C_S_AXI_DATA_WIDTH/8-1) loop
-- if ( S_AXI_WSTRB(byte_index) = '1' ) then
-- -- Respective byte enables are asserted as per write strobes
-- -- slave registor 3
-- slv_reg3(byte_index*8+7 downto byte_index*8) <= S_AXI_WDATA(byte_index*8+7 downto byte_index*8);
-- end if;
-- end loop;
when others =>
command_reg <= command_reg;
done_flag <= done_flag;
slv_reg1 <= slv_reg1;
slv_reg2 <= slv_reg2;
-- slv_reg3 <= slv_reg3;
end case;
end if;
end if;
end if;
end process;
...
-- Add user logic here
-- byte0 byte1 byte2 byte3
slv_reg0 <= done_flag & "0000000" & "00000000" & "00000000" & command_reg;
alu : simple_alu
port map (
clk => S_AXI_ACLK,
rst => slv_reg_wren, -- reset on every write to a register, high active
op => command_reg(1 downto 0),
in0 => slv_reg1,
in1 => slv_reg2,
res => slv_reg3,
done_flag => done_flag
);
-- User logic ends
But when I try to generate the bitstream for my wrapper design which includes the Zync UltraScale+ MPSoC, AXI Interconnect, Processor System Reset and my AXI Peripheral I get the following error:
ERROR: [DRC LUTLP-1] Combinatorial Loop Alert: 1 LUT cells form a combinatorial loop. This can create a race condition. Timing analysis may not be accurate. The preferred resolution is to modify the design to remove combinatorial logic loops. If the loop is known and understood, this DRC can be bypassed by acknowledging the condition and setting the following XDC constraint on any one of the nets in the loop: 'set_property ALLOW_COMBINATORIAL_LOOPS TRUE [get_nets <myHier/myNet>]'. One net in the loop is design_1_i/simple_alu_0/U0/simple_alu_v1_0_S00_AXI_inst/alu/state[0]_i_2_n_0. Please evaluate your design. The cells in the loop are: design_1_i/simple_alu_0/U0/simple_alu_v1_0_S00_AXI_inst/alu/state[0]_i_2.
ERROR: [DRC LUTLP-1] Combinatorial Loop Alert: 1 LUT cells form a combinatorial loop. This can create a race condition. Timing analysis may not be accurate. The preferred resolution is to modify the design to remove combinatorial logic loops. If the loop is known and understood, this DRC can be bypassed by acknowledging the condition and setting the following XDC constraint on any one of the nets in the loop: 'set_property ALLOW_COMBINATORIAL_LOOPS TRUE [get_nets <myHier/myNet>]'. One net in the loop is design_1_i/simple_alu_0/U0/simple_alu_v1_0_S00_AXI_inst/alu/state[1]_i_3_n_0. Please evaluate your design. The cells in the loop are: design_1_i/simple_alu_0/U0/simple_alu_v1_0_S00_AXI_inst/alu/state[1]_i_3.
Please excuse the huge amount of code, I couldn't find a way to show the error with a smaller example.
I tried the solution proposed here:
set_property SEVERITY {Warning} [get_drc_checks LUTLP-1]
But that did nothing. I also tried setting set_property ALLOW_COMBINATORIAL_LOOPS TRUE
for the two nets but that leaves me unsure about the functionality of my circuit. I am using Vivado v2018.3, my target is the Ultra96 from Avnet. Any clues?
EDIT: I have updated the code to reflect the current implementation, I get warnings about latches for result_reg
, flag_reg
and temp_reg
. How do I resolve those?
After a long struggle I finally came up with this solution:
library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;
entity simple_alu is
Port (
clk : in std_logic;
rst : in std_logic;
op : in std_logic_vector (1 downto 0);
in0 : in std_logic_vector (31 downto 0);
in1 : in std_logic_vector (31 downto 0);
res : out std_logic_vector (31 downto 0);
done_flag : out std_logic
);
end simple_alu;
architecture Behavioral of simple_alu is
type state_type is (start, add, sub, mul, pow, done);
signal state, next_state : state_type := start;
signal result, next_result, temp, next_temp, shift_reg, next_shift_reg, zero : std_logic_vector (31 downto 0) := (others => '0');
signal next_done_flag : std_logic := '0';
begin
SYNC_PROC: process (clk, rst)
begin
if rising_edge(clk) then
if (rst = '1') then
state <= start;
else
state <= next_state;
res <= next_result;
result <= next_result;
temp <= next_temp;
shift_reg <= next_shift_reg;
done_flag <= next_done_flag;
end if;
end if;
end process;
--MEALY State-Machine - Outputs based on state and inputs
OUTPUT_DECODE: process (state, result, in0, in1, temp, shift_reg)
begin
next_done_flag <= '0';
next_result <= result;
next_shift_reg <= shift_reg;
next_temp <= temp;
case state is
when start =>
next_result <= std_logic_vector(to_signed(1, 32));
next_temp <= in0;
next_shift_reg <= in1;
when add => next_result <= std_logic_vector(signed(in0) + signed(in1));
when sub => next_result <= std_logic_vector(signed(in0) - signed(in1));
when mul => next_result <= std_logic_vector(resize(signed(in0) * signed(in1), 32));
when pow =>
if (shift_reg(shift_reg'low) = '1') then
next_result <= std_logic_vector(resize(signed(result) * signed(temp), 32));
else
next_result <= result;
end if;
next_temp <= std_logic_vector(resize(signed(temp) * signed(temp), 32));
next_shift_reg <= std_logic_vector(shift_right(signed(shift_reg), 1));
when done => next_done_flag <= '1';
when others =>
end case;
end process;
NEXT_STATE_DECODE: process (state, op, shift_reg, zero)
begin
--declare default state for next_state to avoid latches
next_state <= state; --default is to stay in current state
case (state) is
when start =>
case (op) is
when "00" => next_state <= add;
when "01" => next_state <= sub;
when "10" => next_state <= mul;
when "11" => next_state <= pow;
when others => next_state <= done;
end case;
when add => next_state <= done;
when sub => next_state <= done;
when mul => next_state <= done;
when pow =>
if (shift_reg = zero) then
next_state <= done;
else
next_state <= pow;
end if;
when done =>
when others =>
end case;
end process;
end Behavioral;
The problem was that I did not understand how hardware description works, now I know a little (at least I hope so..). Especially how clocked and unclocked processes are connected (save intermediate results in registers). I will leave this question up just in case another beginner stumbles upon the same issue. If you think I should remove it, please state that in a comment and I will do so.
Here are some resources that helped me:
this question and in particular the accepted answer
some rules I picked up somewhere: