Search code examples
veriloghardware-accelerationregister-transfer-level

How to remove unwanted output?


I am working on a very large module in which this multiplier and adder module is a small part, but it will help me to express my question here.

RTL code:

module mul_and_add #(parameter BITS = 32,
parameter SHIFT = 15
)

(
  clk,
  i_multiplicand,
  i_multiplier,
  i_adder,
  o_result
);

input clk;
input signed  [BITS-1:0]    i_multiplicand;
input signed  [BITS-1:0]    i_multiplier;
input signed  [BITS-1:0]    i_adder;
output signed [BITS-1:0]    o_result;

reg signed    [2*BITS-1:0]  mul_result;
reg signed    [BITS:0]      add_result;
wire signed   [BITS-1:0]    o_result;

always @(posedge clk)

begin

mul_result <= i_multiplicand * i_multiplier;
add_result <=  i_adder + (mul_result >> SHIFT);

end

assign o_result = add_result[BITS-1:0];

endmodule

TB code:

module tb_mul_and_add (
                        );

parameter BITS = 32;

reg  clk;
reg  signed [ BITS - 1 : 0 ] i_multiplicand;
reg  signed [ BITS - 1 : 0 ] i_multiplier;
reg  signed [ BITS - 1 : 0 ] i_adder;

wire signed [ BITS - 1 : 0 ] o_result;

mul_and_add mul_and_add_i (
    .clk(clk),
    .i_multiplicand(i_multiplicand),
    .i_multiplier(i_multiplier),
    .i_adder(i_adder),
    .o_result(o_result)
                  );

parameter CLKPERIODE = 10;

initial clk = 1'b1;
always #(CLKPERIODE/2) clk = !clk;

initial begin
  i_multiplicand =  32'h00010000;
  i_multiplier =    32'h00010000;
  i_adder =     32'h00010000;
  #30
  i_multiplicand = 32'h00008000;
  i_multiplier = 32'h00010000;
  i_adder = 32'h00020000;
  #70
  $finish();
end

endmodule

Output: Cadence SimVision

Output from Cadence SimVision

The data marked with red rectangle is the unwanted data which I want to get rid of because when I use this module several times there are many unwanted data before the correct one. So when I have to sort out the data to plot a graph, it's a lot to go through.

What magic trick is there which I am not aware of to get rid of the unwanted data?

Also, if you have an idea for better optimization or any criticism, please feel free to share.


Solution

  • Change the RTL code to make mul_result a wire, instead of having a one cycle delay for the calculation:

    wire signed   [2*BITS-1:0]  mul_result = i_multiplicand * i_multiplier;
    always @(posedge clk) begin
        add_result <= i_adder + (mul_result >> SHIFT);
    end
    

    Change the TB code to align input changes to the clock edge and use non-blocking assignments to avoid race conditions:

    initial begin
      i_multiplicand =  32'h00010000;
      i_multiplier =    32'h00010000;
      i_adder =     32'h00010000;
      repeat (3) @(posedge clk);
      i_multiplicand <= 32'h00008000;
      i_multiplier <= 32'h00010000;
      i_adder <= 32'h00020000;
      #70
      $finish();
    end
    

    As a coding style note, you can reduce clutter by using ANSI module ports:

    module mul_and_add #(
        parameter BITS  = 32,
        parameter SHIFT = 15
    )
    (
       input clk,
       input signed  [BITS-1:0] i_multiplicand,
       input signed  [BITS-1:0] i_multiplier,
       input signed  [BITS-1:0] i_adder,
       output signed [BITS-1:0] o_result
    );
    
    reg signed    [BITS:0]      add_result;
    wire signed   [2*BITS-1:0]  mul_result = i_multiplicand * i_multiplier;
    always @(posedge clk) begin
        add_result <= i_adder + (mul_result >> SHIFT);
    end
    
    assign o_result = add_result[BITS-1:0];
    
    endmodule