Search code examples
vhdlvivado

VHDL Vivado Combinatorial Loop Alert


I am trying to implement a simple ALU:

library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;


entity simple_alu is
  Port (
    clk : in std_logic;
    rst : in std_logic;
    op : in std_logic_vector (1 downto 0);
    in0 : in std_logic_vector (31 downto 0);
    in1 : in std_logic_vector (31 downto 0);
    res : out std_logic_vector (31 downto 0);
    done_flag : out std_logic
  );
end simple_alu;

architecture Behavioral of simple_alu is
    type state_type is (start, add, sub, mul, pow, done);
    signal state, next_state : state_type := start;
    signal status : std_logic := '0';
    signal inter, shift_reg, zero : std_logic_vector (31 downto 0) := (others => '0');
begin

   SYNC_PROC: process (clk, rst)
    begin
        if (rst = '0') then
            if rising_edge(clk) then
                state <= next_state;
                res <= inter;
                done_flag <= status;
            end if;
        else
            state <= start;
            res <= (others => '0');
            done_flag <= '0';
        end if;
    end process;

    --MEALY State-Machine - Outputs based on state and inputs
    OUTPUT_DECODE: process (state, in0, in1, shift_reg)
    variable result, temp : std_logic_vector (31 downto 0);
    variable flag : std_logic := '0';
    begin
        shift_reg <= in1;
        temp := temp;
        flag := flag;
        result := result;
        case state is
            when start =>
                result := std_logic_vector(to_signed(1, 32));
                temp := in0;
                flag := '0';
            when add => result := std_logic_vector(signed(in0) + signed(in1));
            when sub => result := std_logic_vector(signed(in0) - signed(in1));
            when mul => result := std_logic_vector(resize(signed(in0) * signed(in1), 32));
            when pow =>
                if (shift_reg(shift_reg'low) = '1') then
                    result := std_logic_vector(resize(signed(result) * signed(temp), 32));
                else
                    result := result;
                end if;
                temp := std_logic_vector(resize(signed(temp) * signed(temp), 32));
                shift_reg <= std_logic_vector(shift_right(signed(shift_reg), 1));
            when done =>
                result := result;
                flag := '1';
            when others =>
        end case;
        inter <= result;
        status <= flag;
    end process;

    NEXT_STATE_DECODE: process (state, op, shift_reg, zero, rst) -- rst indicates that one input (op, in0 or in1) changed
    begin
        --declare default state for next_state to avoid latches
        next_state <= state;  --default is to stay in current state
        case (state) is
            when start =>
                case (op) is
                    when "00" => next_state <= add;
                    when "01" => next_state <= sub;
                    when "10" => next_state <= mul;
                    when "11" => next_state <= pow;
                    when others => next_state <= done;
                end case;
            when add => next_state <= done;
            when sub => next_state <= done;
            when mul => next_state <= done;
            when pow =>
                if (shift_reg = zero) then
                    next_state <= done;
                else
                    next_state <= pow;
                end if;
            when done =>
                if (rst = '1') then
                    next_state <= start;
                end if;
            when others =>
        end case;
    end process;

end Behavioral;

This seems to be working, at least in this testbench:

library IEEE;
use IEEE.STD_LOGIC_1164.ALL;
use IEEE.NUMERIC_STD.ALL;

entity simple_alu_tb is
end simple_alu_tb;

architecture Behavioral of simple_alu_tb is
  component simple_alu is
    Port (
      clk : in std_logic;
      rst : in std_logic;
      op : in std_logic_vector (1 downto 0);
      in0 : in std_logic_vector (31 downto 0);
      in1 : in std_logic_vector (31 downto 0);
      res : out std_logic_vector (31 downto 0);
      done_flag : out std_logic
    );
  end component;

  signal clk : std_logic := '0';
  signal rst : std_logic := '0';
  signal op : std_logic_vector (1 downto 0) := (others => '0');
  signal in0 : std_logic_vector (31 downto 0) := (others => '0');
  signal in1 : std_logic_vector (31 downto 0) := (others => '0');
  signal res : std_logic_vector (31 downto 0) := (others => '0');
  signal done_flag : std_logic := '0';
  constant clk_period : time := 1 ns;
begin

    --Instantiate the Unit Under Test (UUT)
    uut: simple_alu Port Map (
        clk => clk,
        rst => rst,
        op => op,
        in0 => in0,
        in1 => in1,
        res => res,
        done_flag => done_flag
    );

    -- Clock process definitions
    clk_process :process
    begin
        clk <= '1';
        wait for clk_period/2;
        clk <= '0';
        wait for clk_period/2;
    end process;

    stim_proc: process
    begin
        wait for 2*clk_period;
        rst <= '1';
        wait for clk_period;
        rst <= '0';

        wait for 4*clk_period;
        op <= "00"; -- add
        in0 <= std_logic_vector(to_signed(12, 32));
        in1 <= std_logic_vector(to_signed(3, 32));
        rst <= '1';
        wait for clk_period;
        rst <= '0';
        wait for 2*clk_period;
        assert (res = std_logic_vector(to_signed(15, 32))) report "addition failed" severity failure;

        wait for 4*clk_period;
        op <= "01"; -- sub
        in0 <= std_logic_vector(to_signed(12, 32));
        in1 <= std_logic_vector(to_signed(3, 32));
        rst <= '1';
        wait for clk_period;
        rst <= '0';
        wait for 2*clk_period;
        assert (res = std_logic_vector(to_signed(9, 32))) report "subtraction failed" severity failure;

        wait for 4*clk_period;
        op <= "10"; -- mul
        in0 <= std_logic_vector(to_signed(12, 32));
        in1 <= std_logic_vector(to_signed(3, 32));
        rst <= '1';
        wait for clk_period;
        rst <= '0';
        wait for 2*clk_period;
        assert (res = std_logic_vector(to_signed(36, 32))) report "multiplication failed" severity failure;

        wait for 4*clk_period;
        op <= "11"; -- pow
        in0 <= std_logic_vector(to_signed(12, 32));
        in1 <= std_logic_vector(to_signed(7, 32));
        rst <= '1';
        wait for clk_period;
        rst <= '0';
        wait for 4*clk_period;
        assert (res = std_logic_vector(to_signed(35831808, 32))) report "power failed" severity failure;

        wait for 4*clk_period;
        op <= "11"; -- pow
        in0 <= std_logic_vector(to_signed(12, 32));
        in1 <= std_logic_vector(to_signed(6, 32));
        rst <= '1';
        wait for clk_period;
        rst <= '0';
        wait for 4*clk_period;
        assert (res = std_logic_vector(to_signed(2985984, 32))) report "power failed" severity failure;

        wait;
    end process;

end Behavioral;

I would like to implement that as AXI4-lite component. So I generate the wrapper, adapt the write process and instantiate my module as follows:

...

    process (S_AXI_ACLK)
    variable loc_addr :std_logic_vector(OPT_MEM_ADDR_BITS downto 0); 
    begin
      if rising_edge(S_AXI_ACLK) then 
        if S_AXI_ARESETN = '0' then
--        command_reg <= (others => '0');
--        done_flag <= '0';
          slv_reg1 <= (others => '0');
          slv_reg2 <= (others => '0');
--        slv_reg3 <= (others => '0');
        else
          loc_addr := axi_awaddr(ADDR_LSB + OPT_MEM_ADDR_BITS downto ADDR_LSB);
          if (slv_reg_wren = '1') then
            case loc_addr is
              when b"00" =>
                for byte_index in 0 to (C_S_AXI_DATA_WIDTH/8-4) loop -- write to command register only
                  if ( S_AXI_WSTRB(byte_index) = '1' ) then
                    -- Respective byte enables are asserted as per write strobes                   
                    -- slave registor 0
                    command_reg(byte_index*8+7 downto byte_index*8) <= S_AXI_WDATA(byte_index*8+7 downto byte_index*8);
                  end if;
                end loop;
              when b"01" =>
                for byte_index in 0 to (C_S_AXI_DATA_WIDTH/8-1) loop
                  if ( S_AXI_WSTRB(byte_index) = '1' ) then
                    -- Respective byte enables are asserted as per write strobes                   
                    -- slave registor 1
                    slv_reg1(byte_index*8+7 downto byte_index*8) <= S_AXI_WDATA(byte_index*8+7 downto byte_index*8);
                  end if;
                end loop;
              when b"10" =>
                for byte_index in 0 to (C_S_AXI_DATA_WIDTH/8-1) loop
                  if ( S_AXI_WSTRB(byte_index) = '1' ) then
                    -- Respective byte enables are asserted as per write strobes                   
                    -- slave registor 2
                    slv_reg2(byte_index*8+7 downto byte_index*8) <= S_AXI_WDATA(byte_index*8+7 downto byte_index*8);
                  end if;
                end loop;
--            when b"11" => -- do not write to reg3
--              for byte_index in 0 to (C_S_AXI_DATA_WIDTH/8-1) loop
--                if ( S_AXI_WSTRB(byte_index) = '1' ) then
--                  -- Respective byte enables are asserted as per write strobes                   
--                  -- slave registor 3
--                  slv_reg3(byte_index*8+7 downto byte_index*8) <= S_AXI_WDATA(byte_index*8+7 downto byte_index*8);
--                end if;
--              end loop;
              when others =>
                command_reg <= command_reg;
                done_flag <= done_flag;
                slv_reg1 <= slv_reg1;
                slv_reg2 <= slv_reg2;
--              slv_reg3 <= slv_reg3;
            end case;
          end if;
        end if;
      end if;                   
    end process; 

...

    -- Add user logic here
    --          byte0                   byte1        byte2        byte3
    slv_reg0 <= done_flag & "0000000" & "00000000" & "00000000" & command_reg;
    alu : simple_alu
    port map (
        clk => S_AXI_ACLK,
        rst => slv_reg_wren, -- reset on every write to a register, high active
        op => command_reg(1 downto 0),
        in0 => slv_reg1,
        in1 => slv_reg2,
        res => slv_reg3,
        done_flag => done_flag
    );
    -- User logic ends

But when I try to generate the bitstream for my wrapper design which includes the Zync UltraScale+ MPSoC, AXI Interconnect, Processor System Reset and my AXI Peripheral I get the following error:

ERROR: [DRC LUTLP-1] Combinatorial Loop Alert: 1 LUT cells form a combinatorial loop. This can create a race condition. Timing analysis may not be accurate. The preferred resolution is to modify the design to remove combinatorial logic loops. If the loop is known and understood, this DRC can be bypassed by acknowledging the condition and setting the following XDC constraint on any one of the nets in the loop: 'set_property ALLOW_COMBINATORIAL_LOOPS TRUE [get_nets <myHier/myNet>]'. One net in the loop is design_1_i/simple_alu_0/U0/simple_alu_v1_0_S00_AXI_inst/alu/state[0]_i_2_n_0. Please evaluate your design. The cells in the loop are: design_1_i/simple_alu_0/U0/simple_alu_v1_0_S00_AXI_inst/alu/state[0]_i_2.
ERROR: [DRC LUTLP-1] Combinatorial Loop Alert: 1 LUT cells form a combinatorial loop. This can create a race condition. Timing analysis may not be accurate. The preferred resolution is to modify the design to remove combinatorial logic loops. If the loop is known and understood, this DRC can be bypassed by acknowledging the condition and setting the following XDC constraint on any one of the nets in the loop: 'set_property ALLOW_COMBINATORIAL_LOOPS TRUE [get_nets <myHier/myNet>]'. One net in the loop is design_1_i/simple_alu_0/U0/simple_alu_v1_0_S00_AXI_inst/alu/state[1]_i_3_n_0. Please evaluate your design. The cells in the loop are: design_1_i/simple_alu_0/U0/simple_alu_v1_0_S00_AXI_inst/alu/state[1]_i_3.

Please excuse the huge amount of code, I couldn't find a way to show the error with a smaller example.

I tried the solution proposed here:

set_property SEVERITY {Warning} [get_drc_checks LUTLP-1]

But that did nothing. I also tried setting set_property ALLOW_COMBINATORIAL_LOOPS TRUE for the two nets but that leaves me unsure about the functionality of my circuit. I am using Vivado v2018.3, my target is the Ultra96 from Avnet. Any clues?

EDIT: I have updated the code to reflect the current implementation, I get warnings about latches for result_reg, flag_reg and temp_reg. How do I resolve those?


Solution

  • After a long struggle I finally came up with this solution:

    library IEEE;
    use IEEE.STD_LOGIC_1164.ALL;
    use IEEE.NUMERIC_STD.ALL;
    
    
    entity simple_alu is
      Port (
        clk : in std_logic;
        rst : in std_logic;
        op : in std_logic_vector (1 downto 0);
        in0 : in std_logic_vector (31 downto 0);
        in1 : in std_logic_vector (31 downto 0);
        res : out std_logic_vector (31 downto 0);
        done_flag : out std_logic
      );
    end simple_alu;
    
    architecture Behavioral of simple_alu is
        type state_type is (start, add, sub, mul, pow, done);
        signal state, next_state : state_type := start;
        signal result, next_result, temp, next_temp, shift_reg, next_shift_reg, zero : std_logic_vector (31 downto 0) := (others => '0');
        signal next_done_flag : std_logic := '0';
    begin
    
       SYNC_PROC: process (clk, rst)
        begin
            if rising_edge(clk) then
                if (rst = '1') then
                    state <= start;
                else
                    state <= next_state;
                    res <= next_result;
                    result <= next_result;
                    temp <= next_temp;
                    shift_reg <= next_shift_reg;
                    done_flag <= next_done_flag;
                end if;
            end if;
        end process;
    
        --MEALY State-Machine - Outputs based on state and inputs
        OUTPUT_DECODE: process (state, result, in0, in1, temp, shift_reg)
        begin
            next_done_flag <= '0';
            next_result <= result;
            next_shift_reg <= shift_reg;
            next_temp <= temp;
            case state is
                when start =>
                    next_result <= std_logic_vector(to_signed(1, 32));
                    next_temp <= in0;
                    next_shift_reg <= in1;
                when add => next_result <= std_logic_vector(signed(in0) + signed(in1));
                when sub => next_result <= std_logic_vector(signed(in0) - signed(in1));
                when mul => next_result <= std_logic_vector(resize(signed(in0) * signed(in1), 32));
                when pow =>
                    if (shift_reg(shift_reg'low) = '1') then
                        next_result <= std_logic_vector(resize(signed(result) * signed(temp), 32));
                    else
                        next_result <= result;
                    end if;
                    next_temp <= std_logic_vector(resize(signed(temp) * signed(temp), 32));
                    next_shift_reg <= std_logic_vector(shift_right(signed(shift_reg), 1));
                when done => next_done_flag <= '1';
                when others =>
            end case;
        end process;
    
        NEXT_STATE_DECODE: process (state, op, shift_reg, zero)
        begin
            --declare default state for next_state to avoid latches
            next_state <= state;  --default is to stay in current state
            case (state) is
                when start =>
                    case (op) is
                        when "00" => next_state <= add;
                        when "01" => next_state <= sub;
                        when "10" => next_state <= mul;
                        when "11" => next_state <= pow;
                        when others => next_state <= done;
                    end case;
                when add => next_state <= done;
                when sub => next_state <= done;
                when mul => next_state <= done;
                when pow =>
                    if (shift_reg = zero) then
                        next_state <= done;
                    else
                        next_state <= pow;
                    end if;
                when done =>
                when others =>
            end case;
        end process;
    
    end Behavioral;
    

    The problem was that I did not understand how hardware description works, now I know a little (at least I hope so..). Especially how clocked and unclocked processes are connected (save intermediate results in registers). I will leave this question up just in case another beginner stumbles upon the same issue. If you think I should remove it, please state that in a comment and I will do so.

    Here are some resources that helped me:

    this question and in particular the accepted answer

    some rules I picked up somewhere:

    • Don't read from the signals to which you write.
    • Have a correct sensitivity list (all signals that you read should be in the sensitivity list)
    • Make sure that all signals to which your write are assigned in every path. (for example: in each branch of an if-else-statement)
    • For processes which use variable, make sure every variable is initialized a default value before reading it (in another variable or signal ).