module clip #( parameter shift = 24, // right shift of the result parameter width = 27, // bit width of the input data parameter widthr = 12 // bit width of the output data ) ( input wire clock, frame, reset, input wire [4*6-1:0] del_data, input wire [4*6-1:0] amp_data, input wire [4*16-1:0] tau_data, input wire [4*width-1:0] inp_data, output wire [4*widthr-1:0] out_data ); localparam width1 = width + 16; localparam width2 = width + 6; localparam width3 = width1 + 2; reg int_wren_reg, int_wren_next; reg int_flag_reg, int_flag_next; reg [1:0] int_chan_reg, int_chan_next; reg [2:0] int_case_reg, int_case_next; reg [7:0] int_addr_reg, int_addr_next; reg [5:0] del_addr_reg, del_addr_next; wire [5:0] del_addr_wire; wire [7:0] int_addr_wire; reg [widthr-1:0] out_data_reg [4:0], out_data_next [4:0]; wire [widthr-1:0] out_data_wire; reg [width3-1:0] add_data_reg [4:0], add_data_next [4:0]; wire [width3-1:0] add_data_wire; wire [width1-1:0] mul_data_wire1; wire [width2-1:0] mul_data_wire2; reg [width-1:0] inp_data_reg [3:0], inp_data_next [3:0]; wire [width-1:0] inp_data_wire [4:0]; reg [5:0] amp_data_reg, amp_data_next; wire [5:0] amp_data_wire [3:0]; reg [15:0] tau_data_reg, tau_data_next; wire [15:0] tau_data_wire [3:0]; integer i; genvar j; generate for (j = 0; j < 4; j = j + 1) begin : INT_DATA assign inp_data_wire[j] = inp_data[j*width+width-1:j*width]; assign amp_data_wire[j] = amp_data[j*6+6-1:j*6]; assign tau_data_wire[j] = tau_data[j*16+16-1:j*16]; end endgenerate lpm_mux #( .lpm_size(4), .lpm_type("LPM_MUX"), .lpm_width(8), .lpm_widths(2)) mux_unit_1 ( .sel(int_chan_next), .data({ 2'd3, del_data[3*6+6-1:3*6], 2'd2, del_data[2*6+6-1:2*6], 2'd1, del_data[1*6+6-1:1*6], 2'd0, del_data[0*6+6-1:0*6]}), .result(int_addr_wire)); assign del_addr_wire = del_addr_reg - int_addr_wire[5:0]; lpm_mult #( .lpm_hint("MAXIMIZE_SPEED=9"), .lpm_representation("UNSIGNED"), .lpm_type("LPM_MULT"), .lpm_pipeline(3), .lpm_widtha(width), .lpm_widthb(16), .lpm_widthp(width1)) mult_unit_1 ( .clock(clock), .clken(int_wren_reg), .dataa(inp_data_wire[4]), .datab(tau_data_reg), .result(mul_data_wire1)); lpm_mult #( .lpm_hint("MAXIMIZE_SPEED=9"), .lpm_representation("UNSIGNED"), .lpm_type("LPM_MULT"), .lpm_pipeline(3), .lpm_widtha(width), .lpm_widthb(6), .lpm_widthp(width2)) mult_unit_2 ( .clock(clock), .clken(int_wren_reg), .dataa(inp_data_reg[0]), .datab(amp_data_reg), .result(mul_data_wire2)); assign add_data_wire = {2'b0, mul_data_wire2, {(width1-width2){1'b0}}} - {2'b0, mul_data_wire1}; assign out_data_wire = add_data_reg[0][width3-1] ? {(widthr){1'b0}} : add_data_reg[0][shift+widthr-1:shift] + {{(widthr-1){add_data_reg[0][width3-1]}}, add_data_reg[0][shift-1]}; altsyncram #( .address_aclr_b("NONE"), .address_reg_b("CLOCK0"), .clock_enable_input_a("BYPASS"), .clock_enable_input_b("BYPASS"), .clock_enable_output_b("BYPASS"), .intended_device_family("Cyclone III"), .lpm_type("altsyncram"), .numwords_a(256), .numwords_b(256), .operation_mode("DUAL_PORT"), .outdata_aclr_b("NONE"), .outdata_reg_b("CLOCK0"), .power_up_uninitialized("FALSE"), .read_during_write_mode_mixed_ports("DONT_CARE"), .widthad_a(8), .widthad_b(8), .width_a(width), .width_b(width), .width_byteena_a(1)) ram_unit_1 ( .wren_a(int_wren_reg), .clock0(clock), .address_a(int_addr_reg), .address_b({int_addr_wire[7:6], del_addr_wire}), .data_a(inp_data_reg[0]), .q_b(inp_data_wire[4]), .aclr0(1'b0), .aclr1(1'b0), .addressstall_a(1'b0), .addressstall_b(1'b0), .byteena_a(1'b1), .byteena_b(1'b1), .clock1(1'b1), .clocken0(1'b1), .clocken1(1'b1), .clocken2(1'b1), .clocken3(1'b1), .data_b({(width){1'b1}}), .eccstatus(), .q_a(), .rden_a(1'b1), .rden_b(1'b1), .wren_b(1'b0)); always @(posedge clock) begin if (reset) begin int_wren_reg <= 1'b1; int_flag_reg <= 1'b0; int_chan_reg <= 2'd0; int_case_reg <= 3'd0; del_addr_reg <= 6'd0; int_addr_reg <= 8'd0; amp_data_reg <= 6'd0; tau_data_reg <= 16'd0; for(i = 0; i <= 3; i = i + 1) begin inp_data_reg[i] <= {(width){1'b0}}; end for(i = 0; i <= 4; i = i + 1) begin out_data_reg[i] <= {(widthr){1'b0}}; add_data_reg[i] <= {(width3){1'b0}}; end end else begin int_wren_reg <= int_wren_next; int_flag_reg <= int_flag_next; int_chan_reg <= int_chan_next; int_case_reg <= int_case_next; del_addr_reg <= del_addr_next; int_addr_reg <= int_addr_next; amp_data_reg <= amp_data_next; tau_data_reg <= tau_data_next; for(i = 0; i <= 3; i = i + 1) begin inp_data_reg[i] <= inp_data_next[i]; end for(i = 0; i <= 4; i = i + 1) begin out_data_reg[i] <= out_data_next[i]; add_data_reg[i] <= add_data_next[i]; end end end always @* begin int_wren_next = int_wren_reg; int_flag_next = int_flag_reg; int_chan_next = int_chan_reg; int_case_next = int_case_reg; del_addr_next = del_addr_reg; int_addr_next = int_addr_reg; amp_data_next = amp_data_reg; tau_data_next = tau_data_reg; for(i = 0; i <= 3; i = i + 1) begin inp_data_next[i] = inp_data_reg[i]; end for(i = 0; i <= 4; i = i + 1) begin out_data_next[i] = out_data_reg[i]; add_data_next[i] = add_data_reg[i]; end case (int_case_reg) 0: begin // write zeros int_wren_next = 1'b1; del_addr_next = 6'd0; int_addr_next = 8'd0; amp_data_next = 6'd0; tau_data_next = 16'd0; for(i = 0; i <= 3; i = i + 1) begin inp_data_next[i] = {(width){1'b0}}; end for(i = 0; i <= 4; i = i + 1) begin out_data_next[i] = {(widthr){1'b0}}; add_data_next[i] = {(width3){1'b0}}; end int_case_next = 3'd1; end 1: begin // write zeros int_addr_next = int_addr_reg + 8'd1; if (&int_addr_reg) begin int_wren_next = 1'b0; int_flag_next = 1'b0; int_chan_next = 2'd0; int_case_next = 3'd2; end end 2: // frame begin int_flag_next = 1'b0; int_wren_next = frame; if (frame) begin int_addr_next[7:6] = 2'd0; // set read addr for 2nd pipeline int_chan_next = 2'd1; // register input data for 2nd, 3rd and 4th sums inp_data_next[1] = inp_data_wire[1]; inp_data_next[2] = inp_data_wire[2]; inp_data_next[3] = inp_data_wire[3]; // prepare registers for 1st sum inp_data_next[0] = inp_data_wire[0]; // prepare registers for 2nd shift add_data_next[0] = add_data_reg[2]; tau_data_next = tau_data_wire[0]; amp_data_next = amp_data_wire[0]; int_case_next = 3'd3; end if (int_flag_reg) // register 4th sum begin int_addr_next[5:0] = del_addr_reg; // register 1st product add_data_next[1] = add_data_wire; out_data_next[1] = out_data_wire; end end 3: // 1st sum begin int_addr_next[7:6] = 2'd1; // set read addr for 3rd pipeline int_chan_next = 2'd2; // prepare registers for 2nd sum inp_data_next[0] = inp_data_reg[1]; // prepare registers for 3rd shift add_data_next[0] = add_data_reg[3]; tau_data_next = tau_data_wire[1]; amp_data_next = amp_data_wire[1]; // register 2nd product add_data_next[2] = add_data_wire; out_data_next[2] = out_data_wire; int_case_next = 3'd4; end 4: // 2nd sum begin int_addr_next[7:6] = 2'd2; // set read addr for 4th pipeline int_chan_next = 2'd3; // prepare registers for 3rd sum inp_data_next[0] = inp_data_reg[2]; // prepare registers for 4th shift add_data_next[0] = add_data_reg[4]; tau_data_next = tau_data_wire[2]; amp_data_next = amp_data_wire[2]; // register 3rd product add_data_next[3] = add_data_wire; out_data_next[3] = out_data_wire; del_addr_next = del_addr_reg + 6'd1; int_case_next = 3'd5; end 5: // 3rd sum begin int_flag_next = 1'b1; int_addr_next[7:6] = 2'd3; // set read addr for 1st pipeline int_chan_next = 2'd0; // prepare registers for 4th sum inp_data_next[0] = inp_data_reg[3]; // prepare registers for 1st shift add_data_next[0] = add_data_reg[1]; tau_data_next = tau_data_wire[3]; amp_data_next = amp_data_wire[3]; // register 4th product add_data_next[4] = add_data_wire; out_data_next[4] = out_data_wire; // register 4th output out_data_next[0] = out_data_reg[1]; int_case_next = 3'd2; end default: begin int_case_next = 3'd0; end endcase end assign out_data = {out_data_reg[4], out_data_reg[3], out_data_reg[2], out_data_reg[0]}; endmodule