verilog hardware-acceleration register-transfer-level

How to remove unwanted output?

I am working on a very large module in which this multiplier and adder module is a small part, but it will help me to express my question here.

RTL code:

module mul_and_add #(parameter BITS = 32,
parameter SHIFT = 15
)

(
  clk,
  i_multiplicand,
  i_multiplier,
  i_adder,
  o_result
);

input clk;
input signed  [BITS-1:0]    i_multiplicand;
input signed  [BITS-1:0]    i_multiplier;
input signed  [BITS-1:0]    i_adder;
output signed [BITS-1:0]    o_result;

reg signed    [2*BITS-1:0]  mul_result;
reg signed    [BITS:0]      add_result;
wire signed   [BITS-1:0]    o_result;

always @(posedge clk)

begin

mul_result <= i_multiplicand * i_multiplier;
add_result <=  i_adder + (mul_result >> SHIFT);

end

assign o_result = add_result[BITS-1:0];

endmodule

TB code:

module tb_mul_and_add (
                        );

parameter BITS = 32;

reg  clk;
reg  signed [ BITS - 1 : 0 ] i_multiplicand;
reg  signed [ BITS - 1 : 0 ] i_multiplier;
reg  signed [ BITS - 1 : 0 ] i_adder;

wire signed [ BITS - 1 : 0 ] o_result;

mul_and_add mul_and_add_i (
    .clk(clk),
    .i_multiplicand(i_multiplicand),
    .i_multiplier(i_multiplier),
    .i_adder(i_adder),
    .o_result(o_result)
                  );

parameter CLKPERIODE = 10;

initial clk = 1'b1;
always #(CLKPERIODE/2) clk = !clk;

initial begin
  i_multiplicand =  32'h00010000;
  i_multiplier =    32'h00010000;
  i_adder =     32'h00010000;
  #30
  i_multiplicand = 32'h00008000;
  i_multiplier = 32'h00010000;
  i_adder = 32'h00020000;
  #70
  $finish();
end

endmodule

Output: Cadence SimVision

The data marked with red rectangle is the unwanted data which I want to get rid of because when I use this module several times there are many unwanted data before the correct one. So when I have to sort out the data to plot a graph, it's a lot to go through.

What magic trick is there which I am not aware of to get rid of the unwanted data?

Also, if you have an idea for better optimization or any criticism, please feel free to share.

Solution

Change the RTL code to make mul_result a wire, instead of having a one cycle delay for the calculation:

wire signed   [2*BITS-1:0]  mul_result = i_multiplicand * i_multiplier;
always @(posedge clk) begin
    add_result <= i_adder + (mul_result >> SHIFT);
end

Change the TB code to align input changes to the clock edge and use non-blocking assignments to avoid race conditions:

initial begin
  i_multiplicand =  32'h00010000;
  i_multiplier =    32'h00010000;
  i_adder =     32'h00010000;
  repeat (3) @(posedge clk);
  i_multiplicand <= 32'h00008000;
  i_multiplier <= 32'h00010000;
  i_adder <= 32'h00020000;
  #70
  $finish();
end

As a coding style note, you can reduce clutter by using ANSI module ports:

module mul_and_add #(
    parameter BITS  = 32,
    parameter SHIFT = 15
)
(
   input clk,
   input signed  [BITS-1:0] i_multiplicand,
   input signed  [BITS-1:0] i_multiplier,
   input signed  [BITS-1:0] i_adder,
   output signed [BITS-1:0] o_result
);

reg signed    [BITS:0]      add_result;
wire signed   [2*BITS-1:0]  mul_result = i_multiplicand * i_multiplier;
always @(posedge clk) begin
    add_result <= i_adder + (mul_result >> SHIFT);
end

assign o_result = add_result[BITS-1:0];

endmodule