module deconv #( parameter size = 1, // number of channels parameter width = 16 // bit width of the input data ) ( input wire clock, frame, reset, input wire [14:0] del_data, input wire [3*size*32-1:0] mul_data, input wire [3*size*width-1:0] inp_data, output wire [3*size*widthr-1:0] out_data ); localparam width1 = width + 6 + 1; localparam width2 = width + 6 + 6; localparam widthr = 2*(width + 8); reg int_wren_reg, int_wren_next; reg [1:0] int_chan_reg, int_chan_next; reg [2:0] int_case_reg, int_case_next; reg [7:0] int_addr_reg, int_addr_next; wire [7:0] int_addr_wire; wire [5:0] del_addr_wire; reg [size*widthr-1:0] acc_data_reg [6:0], acc_data_next [6:0]; reg [size*widthr-1:0] int_data_reg [17:0], int_data_next [17:0]; wire [size*widthr-1:0] int_data_wire [8:0]; wire [size*widthr-1:0] mul_data_wire [5:0]; integer i; genvar j; generate for (j = 0; j < size; j = j + 1) begin : INT_DATA assign int_data_wire[0][j*widthr+widthr-1:j*widthr] = {{(widthr-width){1'b0}}, inp_data[(3*j+0)*width+width-1:(3*j+0)*width]}; assign int_data_wire[1][j*widthr+widthr-1:j*widthr] = {{(widthr-width){1'b0}}, inp_data[(3*j+1)*width+width-1:(3*j+1)*width]}; assign int_data_wire[2][j*widthr+widthr-1:j*widthr] = {{(widthr-width){1'b0}}, inp_data[(3*j+2)*width+width-1:(3*j+2)*width]}; assign mul_data_wire[0][j*widthr+widthr-1:j*widthr] = {{(widthr-16){1'b0}}, mul_data[(3*j+0)*16+16-1:(3*j+0)*16]}; assign mul_data_wire[1][j*widthr+widthr-1:j*widthr] = {{(widthr-16){1'b0}}, mul_data[(3*j+1)*16+16-1:(3*j+1)*16]}; assign mul_data_wire[2][j*widthr+widthr-1:j*widthr] = {{(widthr-16){1'b0}}, mul_data[(3*j+2)*16+16-1:(3*j+2)*16]}; assign mul_data_wire[3][j*widthr+widthr-1:j*widthr] = {{(widthr-16){1'b0}}, mul_data[(3*j+3)*16+16-1:(3*j+3)*16]}; assign mul_data_wire[4][j*widthr+widthr-1:j*widthr] = {{(widthr-16){1'b0}}, mul_data[(3*j+4)*16+16-1:(3*j+4)*16]}; assign mul_data_wire[5][j*widthr+widthr-1:j*widthr] = {{(widthr-16){1'b0}}, mul_data[(3*j+5)*16+16-1:(3*j+5)*16]}; lpm_add_sub #( .lpm_direction("ADD"), .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"), .lpm_representation("UNSIGNED"), .lpm_type("LPM_ADD_SUB"), .lpm_width(6)) add_unit_1 ( .dataa(int_addr_reg[5:0]), .datab(int_addr_wire[5:0]), .result(del_addr_wire)); lpm_add_sub #( .lpm_direction("SUB"), .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"), .lpm_representation("SIGNED"), .lpm_type("LPM_ADD_SUB"), .lpm_width(widthr)) sub_unit_1 ( .dataa(acc_data_reg[0][j*widthr+widthr-1:j*widthr]), .datab(int_data_wire[3][j*widthr+widthr-1:j*widthr]), .result(int_data_wire[4][j*widthr+widthr-1:j*widthr])); lpm_add_sub #( .lpm_direction("ADD"), .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"), .lpm_representation("SIGNED"), .lpm_type("LPM_ADD_SUB"), .lpm_width(widthr)) acc_unit_1 ( .dataa(acc_data_reg[1][j*widthr+widthr-1:j*widthr]), .datab(acc_data_reg[2][j*widthr+widthr-1:j*widthr]), .result(int_data_wire[5][j*widthr+widthr-1:j*widthr])); lpm_mult #( .lpm_hint("MAXIMIZE_SPEED=9"), .lpm_representation("SIGNED"), .lpm_type("LPM_MULT"), .lpm_pipeline(3), .lpm_widtha(18), .lpm_widthb(18), .lpm_widthp(36)) mult_unit_1 ( .clock(clock), .clken(int_wren_reg), // .dataa(int_data_wire[4][j*widthr+widthr-1:j*widthr]), .dataa(acc_data_reg[1][j*widthr+widthr-1:j*widthr]), .datab(acc_data_reg[5][j*widthr+widthr-1:j*widthr]), .result(int_data_wire[6][j*widthr+widthr-1:j*widthr])); lpm_mult #( .lpm_hint("MAXIMIZE_SPEED=9"), .lpm_representation("SIGNED"), .lpm_type("LPM_MULT"), .lpm_pipeline(3), .lpm_widtha(widthr), .lpm_widthb(widthr), .lpm_widthp(widthr)) mult_unit_2 ( .clock(clock), .clken(int_wren_reg), // .dataa(int_data_wire[5][j*widthr+widthr-1:j*widthr]), .dataa(acc_data_reg[2][j*widthr+widthr-1:j*widthr]), .datab(acc_data_reg[6][j*widthr+widthr-1:j*widthr]), .result(int_data_wire[7][j*widthr+widthr-1:j*widthr])); lpm_add_sub #( .lpm_direction("ADD"), .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"), .lpm_representation("SIGNED"), .lpm_type("LPM_ADD_SUB"), .lpm_width(widthr)) add_unit_2 ( .dataa(acc_data_reg[3][j*widthr+widthr-1:j*widthr]), .datab(acc_data_reg[4][j*widthr+widthr-1:j*widthr]), .result(int_data_wire[8][j*widthr+widthr-1:j*widthr])); end endgenerate altsyncram #( .address_aclr_b("NONE"), .address_reg_b("CLOCK0"), .clock_enable_input_a("BYPASS"), .clock_enable_input_b("BYPASS"), .clock_enable_output_b("BYPASS"), .intended_device_family("Cyclone III"), .lpm_type("altsyncram"), .numwords_a(256), .numwords_b(256), .operation_mode("DUAL_PORT"), .outdata_aclr_b("NONE"), .outdata_reg_b("CLOCK0"), .power_up_uninitialized("FALSE"), .read_during_write_mode_mixed_ports("DONT_CARE"), .widthad_a(8), .widthad_b(8), .width_a(size*widthr), .width_b(size*widthr), .width_byteena_a(1)) ram_unit_1 ( .wren_a(int_wren_reg), .clock0(clock), .address_a(int_addr_reg), .address_b({int_addr_wire[7:6], del_addr_wire}), .data_a(acc_data_reg[0]), .q_b(int_data_wire[3]), .aclr0(1'b0), .aclr1(1'b0), .addressstall_a(1'b0), .addressstall_b(1'b0), .byteena_a(1'b1), .byteena_b(1'b1), .clock1(1'b1), .clocken0(1'b1), .clocken1(1'b1), .clocken2(1'b1), .clocken3(1'b1), .data_b({widthr{1'b1}}), .eccstatus(), .q_a(), .rden_a(1'b1), .rden_b(1'b1), .wren_b(1'b0)); lpm_mux #( .lpm_size(3), .lpm_type("LPM_MUX"), .lpm_width(8), .lpm_widths(2)) mux_unit_1 ( .sel(int_chan_next), .data({ 2'd2, 1'b0, del_data[14:10], 2'd1, 1'b0, del_data[9:5], 2'd0, 1'b0, del_data[4:0]}), .result(int_addr_wire)); always @(posedge clock) begin if (reset) begin int_wren_reg <= 1'b1; int_chan_reg <= 2'd0; int_case_reg <= 3'd0; int_addr_reg <= 8'd0; for(i = 0; i <= 6; i = i + 1) begin acc_data_reg[i] <= {(size*widthr){1'b0}}; end for(i = 0; i <= 17; i = i + 1) begin int_data_reg[i] <= {(size*widthr){1'b0}}; end end else begin int_wren_reg <= int_wren_next; int_chan_reg <= int_chan_next; int_case_reg <= int_case_next; int_addr_reg <= int_addr_next; for(i = 0; i <= 6; i = i + 1) begin acc_data_reg[i] <= acc_data_next[i]; end for(i = 0; i <= 17; i = i + 1) begin int_data_reg[i] <= int_data_next[i]; end end end always @* begin int_wren_next = int_wren_reg; int_chan_next = int_chan_reg; int_case_next = int_case_reg; int_addr_next = int_addr_reg; for(i = 0; i <= 6; i = i + 1) begin acc_data_next[i] = acc_data_reg[i]; end for(i = 0; i <= 17; i = i + 1) begin int_data_next[i] = int_data_reg[i]; end case (int_case_reg) 0: begin // write zeros int_wren_next = 1'b1; int_addr_next = 8'd0; for(i = 0; i <= 6; i = i + 1) begin acc_data_next[i] = {(size*widthr){1'b0}}; end for(i = 0; i <= 17; i = i + 1) begin int_data_next[i] = {(size*widthr){1'b0}}; end int_case_next = 3'd1; end 1: begin // write zeros int_addr_next = int_addr_reg + 8'd1; if (&int_addr_reg) begin int_wren_next = 1'b0; int_chan_next = 2'd0; int_case_next = 3'd2; end end 2: // frame begin if (frame) begin int_wren_next = 1'b1; int_addr_next[7:6] = 2'd0; // set read addr for 2nd pipeline int_chan_next = 2'd1; // register input data for 2nd and 3rd sums int_data_next[0] = int_data_wire[1]; int_data_next[1] = int_data_wire[2]; // prepare registers for 1st sum acc_data_next[0] = int_data_wire[0]; acc_data_next[1] = int_data_reg[2]; acc_data_next[2] = int_data_reg[3]; acc_data_next[3] = int_data_reg[4]; acc_data_next[4] = int_data_reg[5]; acc_data_next[5] = mul_data_wire[0]; acc_data_next[6] = mul_data_wire[1]; int_case_next = 3'd3; end end 3: // 1st sum begin int_addr_next[7:6] = 2'd1; // set read addr for 3rd pipeline int_chan_next = 2'd2; // prepare registers for 2nd sum acc_data_next[0] = int_data_reg[0]; acc_data_next[1] = int_data_reg[7]; acc_data_next[2] = int_data_reg[8]; acc_data_next[3] = int_data_reg[9]; acc_data_next[4] = int_data_reg[10]; acc_data_next[5] = mul_data_wire[2]; acc_data_next[6] = mul_data_wire[3]; // register 1st sum int_data_next[2] = int_data_wire[4]; int_data_next[3] = int_data_wire[5]; int_data_next[4] = int_data_wire[6]; int_data_next[5] = int_data_wire[7]; int_data_next[6] = int_data_wire[8]; int_case_next = 3'd4; end 4: // 2nd sum begin int_addr_next[7:6] = 2'd2; // prepare registers for 3rd sum acc_data_next[0] = int_data_reg[1]; acc_data_next[1] = int_data_reg[12]; acc_data_next[2] = int_data_reg[13]; acc_data_next[3] = int_data_reg[14]; acc_data_next[4] = int_data_reg[15]; acc_data_next[5] = mul_data_wire[4]; acc_data_next[6] = mul_data_wire[5]; // register 2nd sum int_data_next[7] = int_data_wire[4]; int_data_next[8] = int_data_wire[5]; int_data_next[9] = int_data_wire[6]; int_data_next[10] = int_data_wire[7]; int_data_next[11] = int_data_wire[8]; int_case_next = 3'd5; end 5: // 3rd sum begin int_wren_next = 1'b0; // set read addr for 1st pipeline int_chan_next = 2'd0; // register 3rd sum int_data_next[12] = int_data_wire[4]; int_data_next[13] = int_data_wire[5]; int_data_next[14] = int_data_wire[6]; int_data_next[15] = int_data_wire[7]; int_data_next[16] = int_data_wire[8]; int_addr_next[5:0] = int_addr_reg[5:0] + 6'd1; int_case_next = 3'd2; end default: begin int_case_next = 3'd0; end endcase end assign out_data = {int_data_next[16], int_data_next[11], int_data_next[6]}; endmodule