This is code for ALU that does addition and multiplication only. An addition is handled in same clock cycle but the multiplication result has to be delayed by 3 clock cycles.
module my_addmul(
//control signals
input i_clk,
input i_rst,
input i_en,
//add=01, mul=10
input [1:0] i_op,
//input and output registers
input [31:0] i_A,
input [31:0] i_B,
output [31:0] o_D,
//to signal if output is valid
output o_done
);
//registers to save output
reg [31:0] r_D;
reg [63:0] r_mul;//(*keep="true"*)
reg r_mul_done;
reg r_mul_done2;
reg r_done;
//updating outputs
assign o_D = r_D;
assign o_done = r_done;
always @ (posedge i_clk)
begin
r_done <= 0;
r_mul_done <= 0;
if (i_rst) begin
r_D <= 0;
r_mul <= 0;
r_mul_done <= 0;
r_mul_done2 <= 0;
end else if (i_clk == 1) begin
if (i_en == 1) begin
//addition - assignment directly to OP registers
if (i_op == 01) begin
r_done <= 1;
r_D <= i_A + i_B;
//multiplication - indirect assignment to OP registers
end else if (i_op == 2'b10) begin
r_mul <= i_A * i_B;
r_mul_done <= 1;
end
end
//1-clock cycle delay
r_mul_done2 <= (r_mul_done == 1) ? 1 : 0;
//updating outputs in the 3rd cycle
if (r_mul_done2 == 1) begin
r_D <= r_mul[31:0];
r_done <= 1;
end
end
end
endmodule
The problem is that if the keep
attribute is not used, the r_mul
register that stores the multiplication output until 3rd clock cycle is optimised out. I read on the problem and realised that Vivado is thinking like this: "If the multiplication happens every clock cycle, the r_mul
is over-written before it is sent to output. Therefore, it is a register being written but not read, Lets remove it!" Since I insert the 3 clock cycle wait in test bench, the simulation result is always accurate. I want to know what is the "Proper" way of doing this so I don't have to use the keep
attribute. It is an ok solution but I think useful techniques should be learned so hacks don't have to be used. Any ideas or discussion welcome.
If I want to delay a signal, I'd probably insert flops for that. You can probably flop your mul_output
like the way you do for the mul_done
signal. Also, it is better to have different always
blocks for doing the same. You can check the code below but it might be buggy since I haven't simulated/synthesized it -
module my_addmul(
//control signals
input i_clk,
input i_rst,
input i_en,
//add=01, mul=10
input [1:0] i_op,
//input and output registers
input [31:0] i_A,
input [31:0] i_B,
output [31:0] o_D,
//to signal if output is valid
output o_done
);
//registers to save output
reg [31:0] r_D;
reg [63:0] r_mul;//(*keep="true"*)
reg r_mul_1;
reg r_mul_2;
reg r_mul_done;
reg r_mul_done2;
reg r_done;
//updating outputs
assign o_D = r_D;
assign o_done = r_done;
always @ (posedge i_clk)
begin
r_done <= 0;
r_mul_done <= 0;
if (i_rst) begin
r_D <= 0;
r_mul <= 0;
r_mul_done <= 0;
r_mul_done2 <= 0;
end else if (i_clk == 1) begin
if (i_en == 1) begin
//addition - assignment directly to OP registers
if (i_op == 01) begin
r_done <= 1;
r_D <= i_A + i_B;
//multiplication - indirect assignment to OP registers
end else if (i_op == 2'b10) begin
r_mul <= i_A * i_B;
r_mul_done <= 1;
end
end
end
end
always @ (posedge i_clk)
begin
if (i_rst)
begin
r_mul_1 <= 0;
r_mul_done2 <= 0;
end
else
begin
r_mul_1 <= r_mul;
r_mul_done2 <= r_mul_done;
end
end
always @ (posedge i_clk)
begin
if (i_rst)
begin
r_D <= 0;
r_done <= 0;
end
else
begin
r_D <= r_mul_1;
r_done <= r_mul_done2;
end
end
endmodule