Search code examples
vhdlsignal-processingxilinx-ise

Resetting Preg of Dsp slice in virtex 6 FPGA


Here is the VHDL code where i have used a DSP as a MACC unit (multiply accumulate) using the primitives that are available in language templates. At every 7th clock cycle i am resetting the Preg, when i do that the multiplied output of that cycle is lost.How do i reset Preg without losing any data?

i have attached the screenshot of output waveform.

output waveform

---------------------------------code---------------------------------------

library IEEE;
use IEEE.STD_LOGIC_1164.ALL;


library UNISIM;
use UNISIM.VComponents.all;

use IEEE.STD_LOGIC_ARITH.ALL;
use IEEE.STD_LOGIC_UNSIGNED.ALL;


entity dsp12 is
    Port ( clk1 : in  STD_LOGIC;
           a_in1 : in  STD_LOGIC_vector(29 downto 0);
           b_in1 : in  STD_LOGIC_vector(17 downto 0);
           p_out : out  STD_LOGIC_vector(47 downto 0);
              reset_p: inout  std_logic;
              count :inout std_logic_vector(3 downto 0):="0000"
);
end dsp12;

architecture Behavioral of dsp12 is
signal reset: std_logic:='0';

begin
dsp1: DSP48E1

generic map(

-- Feature Control Attributes: Data Path Selection
A_INPUT => "DIRECT", 
B_INPUT => "DIRECT", 
USE_DPORT => FALSE, 
USE_MULT => "MULTIPLY",


AUTORESET_PATDET => "NO_RESET", 
MASK => X"ffffffffffff" , 
PATTERN => X"000000000000", 
SEL_MASK => "MASK", 
SEL_PATTERN => "PATTERN", 
USE_PATTERN_DETECT => "NO_PATDET", 


ACASCREG => 1, 
ADREG => 0,
ALUMODEREG => 1, 
AREG => 1,
BCASCREG => 1,
BREG => 1,
CARRYINREG => 1, 
CARRYINSELREG => 1, 
CREG =>0, 
DREG => 0, 
INMODEREG => 1, 
MREG => 1,
OPMODEREG => 1, 
PREG => 1, 
USE_SIMD => "ONE48" 
)


port map (


ACOUT =>open ,--ACOUT(i) ,
BCOUT =>open,--1,--BCOUT(i) , 
CARRYCASCOUT => open, 
MULTSIGNOUT => open,
PCOUT => open , 


OVERFLOW => open, 
PATTERNBDETECT => open, 
PATTERNDETECT => open, 
UNDERFLOW => open, 

-- Data: 4-bit (each) Data Ports
CARRYOUT => open, 
P => P_out,--P(i) , 

-- Cascade: 30-bit (each) Cascade Ports
ACIN =>"000000000000000000000000000000",
BCIN =>"000000000000000000", 
CARRYCASCIN => '0', 
MULTSIGNIN => '0', 
PCIN => X"000000000000" ,

-- Control: 4-bit (each) Control Inputs/Status Bits
ALUMODE => "0000", 
CARRYINSEL => "000", 
CEINMODE => '0', 
CLK => clk1, 
INMODE => "00000", 
OPMODE => "0100101", 
RSTINMODE => '0', 

-- Data: 30-bit (each) Data Ports
A => A_in1,
B => B_in1,
C => X"000000000000", 
CARRYIN => '0',
D => "0000000000000000000000000", 

-- Reset/Clock Enable: 1-bit (each) Reset/Clock Enable Inputs
CEA1 => '1', 
CEA2 => '1',
CEAD =>'0',
CEALUMODE => '1',
CEB1 => '1', 
CEB2 => '1', 
CEC => '0', 
CECARRYIN => '1',
CECTRL => '1',
CED =>'0' ,
CEM => '1', 
CEP => '1', 
RSTA => Reset, 
RSTALLCARRYIN => Reset, 
RSTALUMODE => Reset, 
RSTB => Reset,
RSTC => Reset,
RSTCTRL => Reset, 
RSTD =>Reset, 
RSTM =>Reset,
RSTP =>Reset_p
);


process(clk1)
begin

if clk1' event and clk1='1' then
count<=count+"0001";


if count(2 downto 0)="111" then
reset_p<='1';
else reset_p<='0';
end if;
end if;
end process;

end Behavioral;

Solution

  • Let's put my comment into an answer.

    From your description, it seems you do not want to reset register P. Instead, it seems you want to accumulate every 8 values.

    If you reset the register, your will always get the effect you see. Instead, you want to dynamically change the operational mode of the DSP to:

    • 1 cycle of the multiply mode P_out = A*B (+0)
    • 7 cycles of the MAC modeP_out = A*B+P_in

    As shown in the Xilinx DSP48E1 user guide, the OPMODE you are now using is "0100101", of which bits 6-4 are important for your purpose (table 2-9). "010" means the output of register P is the input for the post-adder. You want to set this to "000", the set the input to "zero"(0).

    A simple solution would therefore be to modify your code:

    OPMODE => "0100101", 
    RSTP =>Reset_p
    

    To:

    OPMODE => "0"&not(Reset_p)&"00101", 
    RSTP =>Reset
    

    But you could probably clean that up.


    Different solution

    Instead of instantiating the DSP as component, you can just as well do a RTL description. The synthesis tool will understand this and produce your MAC. Example: this is a VHDL-2008 description. It will synthesize.

    library ieee;
    use ieee.std_logic_1164.all;
    
    entity Accumulate8 is
        port(
            clk : in std_logic;
            rst : in std_logic;
            A : in std_logic_vector(29 downto 0);
            B : in std_logic_vector(17 downto 0);
            P : out std_logic_vector(47 downto 0)
        );
    end entity;
    
    architecture rtl of Accumulate8 is
        signal count : integer range 0 to 7 := 7;
        use ieee.numeric_std.all;
    begin
        mac: process(clk)
        begin
            if rising_edge(clk) then
                if count = 0 then
                    count <= 7;
                    P <= std_logic_vector(unsigned(A)*unsigned(B));
                else
                    count <= count - 1;
                    P <= std_logic_vector(unsigned(A)*unsigned(B)+unsigned(P));
                end if;
                if rst = '1' then
                    count <= 7;
                    P <= (others => '0');
                end if;
            end if;
        end process;
    end architecture;
    

    test bench

    entity Accumulate8_tb is end entity;
    
    library ieee;
    
    architecture rtl of Accumulate8_tb is
        use ieee.std_logic_1164.all;
        use ieee.numeric_std.all;
        signal clk : std_logic;
        signal rst : std_logic;
        signal A : unsigned(29 downto 0) := (others => '0');
        signal B : unsigned(17 downto 0) := (others => '0');
        signal P : std_logic_vector(47 downto 0);
    begin
    
        clk_proc: process begin
            clk <= '0', '1' after 1 ns;
            wait for 2 ns;
        end process;
    
        rst_proc: process begin
            rst <= '1';
            wait for 4 ns;
            rst <= '0';
            wait;
        end process;
    
        cnt_proc : process(clk) begin
            if rising_edge(clk) then
                A <= A + 3;
                B <= B + 7;
            end if;
        end process;
    
        DUT: entity work.Accumulate8
            port map(
                clk => clk,
                rst => rst,
                A => std_logic_vector(A),
                B => std_logic_vector(B),
                P => P
            );
    end architecture;