module average #( parameter size = 1, // number of channels parameter width = 16 // bit width of the input data ) ( input wire clock, frame, reset, input wire [3*size*6-1:0] del_data, input wire [3*size*width-1:0] inp_data, output wire [3*size*width1-1:0] out_data ); localparam width1 = width + 6 + 1; reg int_wren_reg, int_wren_next; reg [1:0] int_chan_reg, int_chan_next; reg [2:0] int_case_reg, int_case_next; reg [7:0] int_addr_reg, int_addr_next; reg [5:0] del_addr_reg, del_addr_next; wire [5:0] del_addr_wire; wire [7:0] int_addr_wire; reg [size*width-1:0] inp_data_reg [2:0], inp_data_next [2:0]; wire [size*width-1:0] inp_data_wire [3:0]; reg [size*width1-1:0] out_data_reg [2:0], out_data_next [2:0]; wire [size*width1-1:0] out_data_wire; reg [size*width1-1:0] acc_data_reg [3:0], acc_data_next [3:0]; wire [size*width1-1:0] acc_data_wire; reg [size*width1-1:0] sub_data_reg [3:0], sub_data_next [3:0]; wire [size*width1-1:0] sub_data_wire; integer i; genvar j; generate for (j = 0; j < size; j = j + 1) begin : INT_DATA assign inp_data_wire[0][j*width+width-1:j*width] = inp_data[(3*j+0)*width+width-1:(3*j+0)*width]; assign inp_data_wire[1][j*width+width-1:j*width] = inp_data[(3*j+1)*width+width-1:(3*j+1)*width]; assign inp_data_wire[2][j*width+width-1:j*width] = inp_data[(3*j+2)*width+width-1:(3*j+2)*width]; lpm_mux #( .lpm_size(3), .lpm_type("LPM_MUX"), .lpm_width(8), .lpm_widths(2)) mux_unit_1 ( .sel(int_chan_next), .data({ 2'd2, del_data[(3*j+2)*6+6-1:(3*j+2)*6], 2'd1, del_data[(3*j+1)*6+6-1:(3*j+1)*6], 2'd0, del_data[(3*j+0)*6+6-1:(3*j+0)*6]}), .result(int_addr_wire)); lpm_add_sub #( .lpm_direction("SUB"), .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"), .lpm_representation("UNSIGNED"), .lpm_type("LPM_ADD_SUB"), .lpm_width(6)) add_unit_1 ( .dataa(del_addr_reg), .datab(int_addr_wire[5:0]), .result(del_addr_wire)); lpm_add_sub #( .lpm_direction("SUB"), .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"), .lpm_representation("SIGNED"), .lpm_type("LPM_ADD_SUB"), .lpm_width(width1)) sub_unit_1 ( .dataa({{(width1-width){1'b0}}, inp_data_reg[0][j*width+width-1:j*width]}), .datab({{(width1-width){1'b0}}, inp_data_wire[3][j*width+width-1:j*width]}), .result(sub_data_wire[j*width1+width1-1:j*width1])); lpm_add_sub #( .lpm_direction("ADD"), .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"), .lpm_representation("SIGNED"), .lpm_type("LPM_ADD_SUB"), .lpm_width(width1)) acc_unit_1 ( .dataa({sub_data_reg[0][j*width1+width1-1], {(width1-width1){1'b0}}, sub_data_reg[0][j*width1+width1-2:j*width1]}), .datab(acc_data_reg[0][j*width1+width1-1:j*width1]), .result(acc_data_wire[j*width1+width1-1:j*width1])); end endgenerate altsyncram #( .address_aclr_b("NONE"), .address_reg_b("CLOCK0"), .clock_enable_input_a("BYPASS"), .clock_enable_input_b("BYPASS"), .clock_enable_output_b("BYPASS"), .intended_device_family("Cyclone III"), .lpm_type("altsyncram"), .numwords_a(256), .numwords_b(256), .operation_mode("DUAL_PORT"), .outdata_aclr_b("NONE"), .outdata_reg_b("CLOCK0"), .power_up_uninitialized("FALSE"), .read_during_write_mode_mixed_ports("DONT_CARE"), .widthad_a(8), .widthad_b(8), .width_a(size*width), .width_b(size*width), .width_byteena_a(1)) ram_unit_1 ( .wren_a(int_wren_reg), .clock0(clock), .address_a(int_addr_reg), .address_b({int_addr_wire[7:6], del_addr_wire}), .data_a(inp_data_reg[0]), .q_b(inp_data_wire[3]), .aclr0(1'b0), .aclr1(1'b0), .addressstall_a(1'b0), .addressstall_b(1'b0), .byteena_a(1'b1), .byteena_b(1'b1), .clock1(1'b1), .clocken0(1'b1), .clocken1(1'b1), .clocken2(1'b1), .clocken3(1'b1), .data_b({(size*width){1'b1}}), .eccstatus(), .q_a(), .rden_a(1'b1), .rden_b(1'b1), .wren_b(1'b0)); always @(posedge clock) begin if (reset) begin int_wren_reg <= 1'b1; int_chan_reg <= 2'd0; int_case_reg <= 3'd0; del_addr_reg <= 6'd0; int_addr_reg <= 8'd0; for(i = 0; i <= 2; i = i + 1) begin inp_data_reg[i] <= {(size*width){1'b0}}; out_data_reg[i] <= {(size*width1){1'b0}}; end for(i = 0; i <= 3; i = i + 1) begin sub_data_reg[i] <= {(size*width1){1'b0}}; acc_data_reg[i] <= {(size*width1){1'b0}}; end end else begin int_wren_reg <= int_wren_next; int_chan_reg <= int_chan_next; int_case_reg <= int_case_next; del_addr_reg <= del_addr_next; int_addr_reg <= int_addr_next; for(i = 0; i <= 2; i = i + 1) begin inp_data_reg[i] <= inp_data_next[i]; out_data_reg[i] <= out_data_next[i]; end for(i = 0; i <= 3; i = i + 1) begin sub_data_reg[i] <= sub_data_next[i]; acc_data_reg[i] <= acc_data_next[i]; end end end always @* begin int_wren_next = int_wren_reg; int_chan_next = int_chan_reg; int_case_next = int_case_reg; del_addr_next = del_addr_reg; int_addr_next = int_addr_reg; for(i = 0; i <= 2; i = i + 1) begin inp_data_next[i] = inp_data_reg[i]; out_data_next[i] = out_data_reg[i]; end for(i = 0; i <= 3; i = i + 1) begin sub_data_next[i] = sub_data_reg[i]; acc_data_next[i] = acc_data_reg[i]; end case (int_case_reg) 0: begin // write zeros int_wren_next = 1'b1; del_addr_next = 6'd0; int_addr_next = 8'd0; for(i = 0; i <= 2; i = i + 1) begin inp_data_next[i] = {(size*width){1'b0}}; out_data_next[i] = {(size*width1){1'b0}}; end for(i = 0; i <= 3; i = i + 1) begin sub_data_next[i] = {(size*width1){1'b0}}; acc_data_next[i] = {(size*width1){1'b0}}; end int_case_next = 3'd1; end 1: begin // write zeros int_addr_next = int_addr_reg + 8'd1; if (&int_addr_reg) begin int_wren_next = 1'b0; int_chan_next = 2'd0; int_case_next = 3'd2; end end 2: // frame begin if (frame) begin int_wren_next = 1'b1; int_addr_next[7:6] = 2'd0; // set read addr for 2nd pipeline int_chan_next = 2'd1; // register input data for 2nd and 3rd sums inp_data_next[1] = inp_data_wire[1]; inp_data_next[2] = inp_data_wire[2]; // prepare registers for 1st sum inp_data_next[0] = inp_data_wire[0]; sub_data_next[0] = sub_data_reg[1]; acc_data_next[0] = acc_data_reg[1]; int_case_next = 3'd3; end end 3: // 1st sum begin int_addr_next[7:6] = 2'd1; // set read addr for 3rd pipeline int_chan_next = 2'd2; // prepare registers for 2nd sum inp_data_next[0] = inp_data_reg[1]; sub_data_next[0] = sub_data_reg[2]; acc_data_next[0] = acc_data_reg[2]; // register 1st sum sub_data_next[1] = sub_data_wire; acc_data_next[1] = acc_data_wire; out_data_next[0] = acc_data_wire; int_case_next = 3'd4; end 4: // 2nd sum begin int_addr_next[7:6] = 2'd2; // prepare registers for 3rd sum inp_data_next[0] = inp_data_reg[2]; sub_data_next[0] = sub_data_reg[3]; acc_data_next[0] = acc_data_reg[3]; // register 2nd sum sub_data_next[2] = sub_data_wire; acc_data_next[2] = acc_data_wire; out_data_next[1] = acc_data_wire; del_addr_next = del_addr_reg + 6'd1; int_case_next = 3'd5; end 5: // 3rd sum begin int_wren_next = 1'b0; // set read addr for 1st pipeline int_chan_next = 2'd0; // register 3rd sum sub_data_next[3] = sub_data_wire; acc_data_next[3] = acc_data_wire; out_data_next[2] = acc_data_wire; int_addr_next[5:0] = del_addr_reg; int_case_next = 3'd2; end default: begin int_case_next = 3'd0; end endcase end assign out_data = {out_data_reg[2], out_data_reg[1], out_data_reg[0]}; endmodule