Search code examples
verilogsystem-verilogmodelsim

Verilog always @(posedge clk) dosent work


I want to introduce a one-cycle delay to valid_dat by using @(posedge clk) but it doesn't work Anyone knows why? The counter works fine with the clock and adds up by one ever clock. But valid_delay changes with valid_dat simultaneously. I don't know what is wrong. Could someone please let me know the reason?

// Butterfly
// 1. U <- (X + Y MOD) P
// 2. V <- ((X - Y) * W * INV) MOD P

// Macro for number of input data, `width groups = 2 x `width numbers in total 
`define width 4

// Top-Level Design
module BF
#(parameter inv = 256'h8fac2de6599e1a9fa53ddf925918fe6fff2d6e9209dacc5e13f75b68fe75c041,
            M = 256'h73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001,
            Mp = 256'h3d443ab0d7bf2839181b2c170004ec0653ba5bfffffe5bfdfffffffeffffffff
    ) 
(input clk, input nrst, input valid_dat, input valid_tw, input[255:0] dat_in, output valid_out, output [255:0] dat_out, output calc_done);

localparam wd = $clog2(`width);
localparam state_rst = 3'b000,
           state_ld = 3'b001,
           state_sub = 3'b010,
           state_mont = 3'b011,
           state_out = 3'b100,
           state_done = 3'b101,
           state_halt = 3'b111;

// Indicators
reg pkt_end;
reg pkt_start, valid_delay;
// For loop index
integer i;
//Buffers
reg [255:0] A_buffer[`width-1:0], B_buffer[`width-1:0], TW_buffer[`width-1:0], U_buffer[`width-1:0], V_buffer[`width-1:0];
// Counters 
reg [wd-1:0] cnt_tw, cnt_calc;
reg [wd:0] cnt_dat, cnt_out;
// Start signals
reg AddStart, SubStart, MontStart;
// Component Inputs
wire [255:0] AddA, AddB, SubA, SubB, MontA, MontB;
// Component Outpus -- Busy/Done signals
wire AddBusy, AddDone, SubBusy, SubDone, MontBusy, MontDone;
// Component Outpus -- Calculation results
wire [255:0] AddC, SubC, MontC;
// Outputs
reg valid_out_s;
reg calc_done_s;
reg [255:0] dat_out_s;
// Control Unit state
reg [2:0] state;

//test 
reg [255:0] dat_test;

// Instantiation of Calculation components
Mont256 #(.inv(inv), .M(M), .Mp(Mp)) mont (.clk(clk), .start(MontStart), .A(MontA), .B(MontB), .busy(MontBusy), .done(MontDone), .C(MontC));
Add256 #(.M(M)) add (.clk(clk), .start(SubStart), .A(AddA), .B(AddB), .busy(AddBusy), .done(AddDone), .C(AddC));
Sub256 #(.M(M)) sub (.clk(clk), .start(SubStart), .A(SubA), .B(SubB), .busy(SubBusy), .done(SubDone), .C(SubC));

// Indicators
always @(posedge clk or negedge nrst) begin
    if (!nrst) begin
        valid_delay <= 0;
    end
    else begin
        valid_delay <= valid_dat;
    end
end

always @(*) begin
    if (state == state_ld && cnt_tw == `width/2 && cnt_dat == `width) begin
        pkt_end <= 1;
    end 
    else begin
        pkt_end <= 0;
    end
end

// Counters
always @(posedge clk or pkt_start) begin
    if (pkt_start) begin
        cnt_dat <= 0;
        cnt_tw <= 0;
        cnt_calc <= 0;
        cnt_out <= 0;
    end
    else begin
        if (valid_dat) begin
            cnt_dat <= cnt_dat + 1;
        end 
        if (valid_tw) begin
            cnt_tw <= cnt_tw + 1;
        end
        if (MontDone) begin
            cnt_calc <= cnt_calc + 1;
        end
        if (state == state_out) begin
            cnt_out <= cnt_out + 1;
        end
    end 
end 

// Input Buffers
always @(cnt_dat or MontDone) begin // Note to readers: this may cause issues to on board testing since we don't know the status of cnt_dat when its not reset
            //                  but it should be working for the simulation.
    if (valid_dat) begin
        if (cnt_dat[0] == 0) begin
            for (i=1;i<`width;i=i+1) begin
                A_buffer[i] <= A_buffer[i-1];
                A_buffer[0] <= dat_in;
            end
        end
        else begin
        if (cnt_dat[0] == 1) begin
            for (i=1; i<`width; i=i+1) begin
                B_buffer[i] <= B_buffer[i-1];
                B_buffer[0] <= dat_in;
            end 
        end
        end
    end
    if (MontDone) begin
        for (i=1;i<`width;i=i+1) begin
            A_buffer[i] <= A_buffer[i-1];
            A_buffer[0] <= '0;
            B_buffer[i] <= B_buffer[i-1];
            B_buffer[0] <= '0;
        end
    end
end

always @(cnt_tw or MontDone) begin // Note to readers: this may cause issues to on board testing since we don't know the status of cnt_tw when its not reset
               //                  but it should be working for the simulation.
    if (valid_tw) begin
        for (i=1; i<`width; i=i+1) begin
            TW_buffer[i] <= TW_buffer[i-1];
            TW_buffer[0] <= dat_in;
        end
    end
    if (MontDone) begin
        for (i=1;i<`width;i=i+1) begin
            TW_buffer[i] <= TW_buffer[i-1];
            TW_buffer[0] <= '0;
        end
    end
end

// Inputs for Adder, Subtractor and Multiplier
assign AddA = A_buffer[`width-1];
assign AddB = B_buffer[`width-1];
assign SubA = A_buffer[`width-1];
assign SubB = B_buffer[`width-1];
assign MontA = SubC;
assign MontB = TW_buffer[`width-1];     

// Control Unit
always @(posedge clk or pkt_start or negedge nrst) begin
    if (!nrst) begin
        valid_out_s <= 0;
        calc_done_s <= 0;
        state <= state_rst;
        AddStart <= 0;
        SubStart <= 0;
        MontStart <= 0;
    end 
    else begin
        if (state == state_rst) begin
            if (pkt_start == 1) begin
                state <= state_ld;
            end 
        end
        if (state == state_ld) begin
            if (pkt_end == 1) begin
                state <= state_sub;
                AddStart <= 1;
                SubStart <= 1;
                MontStart <= 0;
            end
        end
        if (state == state_sub) begin
            if (SubDone == 1) begin
                state <= state_mont;
                SubStart <= 0;
                MontStart <= 1;
            end
        end
        if (state == state_mont) begin
            if (MontDone == 1 && cnt_calc == '1) begin
                state <= state_out;
                SubStart <= 0;
                MontStart <= 0;
                valid_out_s <= 1;
            end
            else begin
                if (MontDone == 1 && cnt_calc != '1) begin
                state <= state_sub;
                SubStart <= 1;
                MontStart <= 0;
                end
                else begin
                    state <= state_mont;
                    SubStart <= 0;
                    MontStart <= 1;
                end 
            end
        end
        if (state == state_out) begin
            if (cnt_out == 2*`width-2) begin
                calc_done_s <= 1;
            end
            if (cnt_out == '1) begin
                state <= state_done;
                calc_done_s <= 0;
                valid_out_s <= 0;
            end
            else begin
                state <= state_out;
            end
        end
        if (state == state_done) begin
            state <= state_done;
        end
    end
end
        
// Output Buffers
always @(AddDone) begin
    if (AddDone) begin
        for (i=1;i<`width;i=i+1) begin
                U_buffer[i] <= U_buffer[i-1];
                U_buffer[0] <= AddC;
            end
    end
end

always @(MontDone) begin
    if (MontDone) begin
        for (i=1;i<`width;i=i+1) begin
                V_buffer[i] <= V_buffer[i-1];
                V_buffer[0] <= MontC;
            end
    end
end

always @(posedge clk) begin
    if (valid_out_s == 1 && cnt_out[0] == 0) begin
        for (i=1;i<`width;i=i+1) begin
            U_buffer[i] <= U_buffer[i-1];
            U_buffer[0] <= '0;
        end
    end
    if (valid_out_s == 1 && cnt_out[0] == 1) begin
        for (i=1;i<`width;i=i+1) begin
            V_buffer[i] <= V_buffer[i-1];
            V_buffer[0] <= '0;
        end
    end
end

// Outuputs
always @(*) begin
    if (valid_out_s == 1 && cnt_out[0] == 0) begin
        dat_out_s <= U_buffer[`width-1];
    end
    else begin 
        if (valid_out_s == 1 && cnt_out[0] == 1) begin
            dat_out_s <= V_buffer[`width-1];
        end
        else begin
            dat_out_s <= '0;
        end
    end
end
assign dat_out = dat_out_s;
assign valid_out = valid_out_s;
assign calc_done = calc_done_s;
    
endmodule

Waveform


Solution

  • The signal is delayed, but as your valid_dat arrived at the clock edge, it was sampled and it looked like valid_delay is the same as valid_dat. You can change the tb code driving valid_dat to drive the pin right after clock edge, not before or on it. Attached you can see both scenarios, on the first time I drove valid_data high after some time have passed that falls on the positive clock edge (used clock period 10);

        initial
    begin
        nrst = 0;
        valid_dat = 0;
        #10;
        nrst = 1;
        #15;
        valid_dat = 1;
        #10;
        valid_dat = 0;
    

    on the second time I drove it after clock edge.

        @(posedge clk);
        valid_dat = 1;
    

    Functionally they are the same, but if you want to see that from the wave then you could make that change to your tb. enter image description here