I have built an block that does simple moving average on a "factor" numbers in the vector.. and its works good for my needs.
My problem with it that I think my syntax is bit dumb.
I have an array and I push my data into the array and using the factor trigger (can be 2,4,8,16,32) I accumulate the data signals and than make right shift to divide by the factor value.
I read that using for loops is not recomendded but maybe its just nonsense and for loops in synthesis is perfectly fine.
someone can confirm that using for loop for my need is fine and how it can be done here so it be can be synthesized?
my code:
module average #
(
--parameters--
)
(
input clk,
input rst_n,
input [FACTOR_WIDTH-1 : 0] factor, // Average (2, 4, 8, 16, 32)
input [INPUT_WIDTH-1 : 0] din,
--more inputs outputs--
);
reg [INPUT_WIDTH-1 :0] din_dly [0:32-1];
reg [OUTPUT_WIDTH-1:0] dout_sum;
reg [OUTPUT_WIDTH-1:0] dout_shift;
initial begin
dout_sum = {OUTPUT_WIDTH{1'b0}};
dout_shift = {OUTPUT_WIDTH{1'b0}};
for (index = 0; index < 32; index = index + 1) begin
din_dly[index] = {INPUT_WIDTH{1'b0}};
end
end
always @(posedge clk or negedge rst_n) begin : average_logic
if (~rst_n) begin
dout_sum <= {OUTPUT_WIDTH{1'b0}};
dout_shift <= {OUTPUT_WIDTH{1'b0}};
--flags=0--
end else begin
if (--flags--) begin
if (factor == 2) begin
dout_sum <= din_dly[0] + din_dly[1];
dout_shift <= dout_sum>>1; //dout_sum / 2;
end
if (factor == 4) begin
dout_sum <= din_dly[0] + din_dly[1] + din_dly[2] + din_dly[3];
dout_shift <= dout_sum>>2; //dout_sum / 4;
end
if (factor == 8) begin
dout_sum <= din_dly[0] + din_dly[1] + din_dly[2] + din_dly[3]
+ din_dly[4] + din_dly[5] + din_dly[6] + din_dly[7];
dout_shift <= dout_sum>>3; //dout_sum / 8;
end
if (factor == 16) begin
dout_sum <= din_dly[0] + din_dly[1] + din_dly[2] + din_dly[3]
+ din_dly[4] + din_dly[5] + din_dly[6] + din_dly[7]
+ din_dly[8] + din_dly[9] + din_dly[10] + din_dly[11]
+ din_dly[12] + din_dly[13] + din_dly[14] + din_dly[15];
dout_shift <= dout_sum>>4; //dout_sum / 16;
end
if (factor == 32) begin
dout_sum <= din_dly[0] + din_dly[1] + din_dly[2] + din_dly[3]
+ din_dly[4] + din_dly[5] + din_dly[6] + din_dly[7]
+ din_dly[8] + din_dly[9] + din_dly[10] + din_dly[11]
+ din_dly[12] + din_dly[13] + din_dly[14] + din_dly[15]
+ din_dly[16] + din_dly[17] + din_dly[18] + din_dly[19]
+ din_dly[20] + din_dly[21] + din_dly[22] + din_dly[23]
+ din_dly[24] + din_dly[25] + din_dly[26] + din_dly[27]
+ din_dly[28] + din_dly[29] + din_dly[30] + din_dly[31];
dout_shift <= dout_sum>>5; //dout_sum / 32;
end
--logic--
end else begin
--logic--
end
end
end
always @(posedge clk or negedge rst_n) begin
if (~rst_n) begin
din_dly[0] <= {INPUT_WIDTH{1'b0}};
din_dly[1] <= {INPUT_WIDTH{1'b0}};
din_dly[2] <= {INPUT_WIDTH{1'b0}};
din_dly[3] <= {INPUT_WIDTH{1'b0}};
din_dly[4] <= {INPUT_WIDTH{1'b0}};
din_dly[5] <= {INPUT_WIDTH{1'b0}};
din_dly[6] <= {INPUT_WIDTH{1'b0}};
din_dly[7] <= {INPUT_WIDTH{1'b0}};
din_dly[8] <= {INPUT_WIDTH{1'b0}};
din_dly[9] <= {INPUT_WIDTH{1'b0}};
din_dly[10] <= {INPUT_WIDTH{1'b0}};
din_dly[11] <= {INPUT_WIDTH{1'b0}};
din_dly[12] <= {INPUT_WIDTH{1'b0}};
din_dly[13] <= {INPUT_WIDTH{1'b0}};
din_dly[14] <= {INPUT_WIDTH{1'b0}};
din_dly[15] <= {INPUT_WIDTH{1'b0}};
din_dly[16] <= {INPUT_WIDTH{1'b0}};
din_dly[17] <= {INPUT_WIDTH{1'b0}};
din_dly[18] <= {INPUT_WIDTH{1'b0}};
din_dly[19] <= {INPUT_WIDTH{1'b0}};
din_dly[20] <= {INPUT_WIDTH{1'b0}};
din_dly[21] <= {INPUT_WIDTH{1'b0}};
din_dly[22] <= {INPUT_WIDTH{1'b0}};
din_dly[23] <= {INPUT_WIDTH{1'b0}};
din_dly[24] <= {INPUT_WIDTH{1'b0}};
din_dly[25] <= {INPUT_WIDTH{1'b0}};
din_dly[26] <= {INPUT_WIDTH{1'b0}};
din_dly[27] <= {INPUT_WIDTH{1'b0}};
din_dly[28] <= {INPUT_WIDTH{1'b0}};
din_dly[29] <= {INPUT_WIDTH{1'b0}};
din_dly[30] <= {INPUT_WIDTH{1'b0}};
din_dly[31] <= {INPUT_WIDTH{1'b0}};
end else begin
if (--flag--) begin
if (factor == 2) begin
din_dly[0] <= din;
din_dly[1] <= din_dly[0];
end
if (factor == 4) begin
din_dly[0] <= din;
din_dly[1] <= din_dly[0];
din_dly[2] <= din_dly[1];
din_dly[3] <= din_dly[2];
end
if (factor == 8) begin
din_dly[0] <= din;
din_dly[1] <= din_dly[0];
din_dly[2] <= din_dly[1];
din_dly[3] <= din_dly[2];
din_dly[4] <= din_dly[3];
din_dly[5] <= din_dly[4];
din_dly[6] <= din_dly[5];
din_dly[7] <= din_dly[6];
end
if (factor == 16) begin
din_dly[0] <= din;
din_dly[1] <= din_dly[0];
din_dly[2] <= din_dly[1];
din_dly[3] <= din_dly[2];
din_dly[4] <= din_dly[3];
din_dly[5] <= din_dly[4];
din_dly[6] <= din_dly[5];
din_dly[7] <= din_dly[6];
din_dly[8] <= din_dly[7];
din_dly[9] <= din_dly[8];
din_dly[10] <= din_dly[9];
din_dly[11] <= din_dly[10];
din_dly[12] <= din_dly[11];
din_dly[13] <= din_dly[12];
din_dly[14] <= din_dly[13];
din_dly[15] <= din_dly[14];
end
if (factor == 32) begin
din_dly[0] <= din;
din_dly[1] <= din_dly[0];
din_dly[2] <= din_dly[1];
din_dly[3] <= din_dly[2];
din_dly[4] <= din_dly[3];
din_dly[5] <= din_dly[4];
din_dly[6] <= din_dly[5];
din_dly[7] <= din_dly[6];
din_dly[8] <= din_dly[7];
din_dly[9] <= din_dly[8];
din_dly[10] <= din_dly[9];
din_dly[11] <= din_dly[10];
din_dly[12] <= din_dly[11];
din_dly[13] <= din_dly[12];
din_dly[14] <= din_dly[13];
din_dly[15] <= din_dly[14];
din_dly[16] <= din_dly[15];
din_dly[17] <= din_dly[16];
din_dly[18] <= din_dly[17];
din_dly[19] <= din_dly[18];
din_dly[20] <= din_dly[19];
din_dly[21] <= din_dly[20];
din_dly[22] <= din_dly[21];
din_dly[23] <= din_dly[22];
din_dly[24] <= din_dly[23];
din_dly[25] <= din_dly[24];
din_dly[26] <= din_dly[25];
din_dly[27] <= din_dly[26];
din_dly[28] <= din_dly[27];
din_dly[29] <= din_dly[28];
din_dly[30] <= din_dly[29];
din_dly[31] <= din_dly[30];
end
end
if (--some flags--) begin
din_dly[0] <= {INPUT_WIDTH{1'b0}};
din_dly[1] <= {INPUT_WIDTH{1'b0}};
din_dly[2] <= {INPUT_WIDTH{1'b0}};
din_dly[3] <= {INPUT_WIDTH{1'b0}};
din_dly[4] <= {INPUT_WIDTH{1'b0}};
din_dly[5] <= {INPUT_WIDTH{1'b0}};
din_dly[6] <= {INPUT_WIDTH{1'b0}};
din_dly[7] <= {INPUT_WIDTH{1'b0}};
din_dly[8] <= {INPUT_WIDTH{1'b0}};
din_dly[9] <= {INPUT_WIDTH{1'b0}};
din_dly[10] <= {INPUT_WIDTH{1'b0}};
din_dly[11] <= {INPUT_WIDTH{1'b0}};
din_dly[12] <= {INPUT_WIDTH{1'b0}};
din_dly[13] <= {INPUT_WIDTH{1'b0}};
din_dly[14] <= {INPUT_WIDTH{1'b0}};
din_dly[15] <= {INPUT_WIDTH{1'b0}};
din_dly[16] <= {INPUT_WIDTH{1'b0}};
din_dly[17] <= {INPUT_WIDTH{1'b0}};
din_dly[18] <= {INPUT_WIDTH{1'b0}};
din_dly[19] <= {INPUT_WIDTH{1'b0}};
din_dly[20] <= {INPUT_WIDTH{1'b0}};
din_dly[21] <= {INPUT_WIDTH{1'b0}};
din_dly[22] <= {INPUT_WIDTH{1'b0}};
din_dly[23] <= {INPUT_WIDTH{1'b0}};
din_dly[24] <= {INPUT_WIDTH{1'b0}};
din_dly[25] <= {INPUT_WIDTH{1'b0}};
din_dly[26] <= {INPUT_WIDTH{1'b0}};
din_dly[27] <= {INPUT_WIDTH{1'b0}};
din_dly[28] <= {INPUT_WIDTH{1'b0}};
din_dly[29] <= {INPUT_WIDTH{1'b0}};
din_dly[30] <= {INPUT_WIDTH{1'b0}};
din_dly[31] <= {INPUT_WIDTH{1'b0}};
end
end
end
endmodule
thanks!
Best Answer
For loops in Verilog can be used for synthesis purposes. However the caveat is that the number of loops must be known at synthesis because the tools will unroll the loop into hardware. This means your loop limit must be a constant, a
parameter/localparam
, or agenvar
.As such if you want to use for loops in your code, you'll need to find a way to factor it such that you have a constant number of loops. One such example might be:
That way you still have the same number of loops always, but you simply add on (32-factor) zeros on in some of the loops. This will result in a chain of adders and multiplexers which may not give a high f-max.
You would have to reconcile how to do
dout_shift
. This could be done with a simple lookup table to convertfactor
into how many bits to shift.An alternate solution would be a
generate for
block which makes one set of logic for each different factor.This would produce more logic but would be faster as it's more parallel and pipelineable.