Index: /sandbox/MultiChannelUSB/clip.v
===================================================================
--- /sandbox/MultiChannelUSB/clip.v	(revision 139)
+++ /sandbox/MultiChannelUSB/clip.v	(revision 139)
@@ -0,0 +1,390 @@
+module clip
+	#(
+		parameter	size	=	1, // number of channels
+		parameter	shift	=	24, // right shift of the result
+		parameter	width	=	27, // bit width of the input data
+		parameter	widthr	=	12 // bit width of the output data
+	)
+	(
+		input	wire						clock, frame, reset,
+		input	wire	[4*size*6-1:0]		del_data,
+		input	wire	[4*size*6-1:0]		amp_data,
+		input	wire	[4*size*16-1:0]		tau_data,
+		input	wire	[4*size*width-1:0]	inp_data,
+		output	wire	[4*size*widthr-1:0]	out_data
+	);
+
+	localparam	width1	=	width + 16;
+	localparam	width2	=	width + 6;
+	localparam	width3	=	width1 + 2;
+
+	reg							int_wren_reg, int_wren_next;
+	reg							int_flag_reg, int_flag_next;
+	reg		[1:0]				int_chan_reg, int_chan_next;
+	reg		[2:0]				int_case_reg, int_case_next;
+	reg		[7:0]				int_addr_reg, int_addr_next;
+
+	reg		[5:0]				del_addr_reg, del_addr_next;
+	wire	[5:0]				del_addr_wire;
+	wire	[7:0]				int_addr_wire;
+
+	reg		[size*widthr-1:0]	out_data_reg [4:0], out_data_next [4:0];
+	wire	[size*widthr-1:0]	out_data_wire;
+
+	wire	[size*width3-1:0]	add_data_wire;
+
+	wire	[size*width1-1:0]	mul_data_wire1;
+	wire	[size*width2-1:0]	mul_data_wire2;
+
+	reg		[size*width-1:0]	inp_data_reg [3:0], inp_data_next [3:0];
+	wire	[size*width-1:0]	inp_data_wire [4:0];
+
+	reg		[size*6-1:0]		amp_data_reg, amp_data_next;
+	wire	[size*6-1:0]		amp_data_wire [3:0];
+
+	reg		[size*16-1:0]		tau_data_reg, tau_data_next;
+	wire	[size*16-1:0]		tau_data_wire [3:0];
+
+	integer i;
+	genvar j;
+
+	generate
+		for (j = 0; j < size; j = j + 1)
+		begin : INT_DATA
+			assign inp_data_wire[0][j*width+width-1:j*width] = inp_data[(4*j+0)*width+width-1:(4*j+0)*width];
+			assign inp_data_wire[1][j*width+width-1:j*width] = inp_data[(4*j+1)*width+width-1:(4*j+1)*width];
+			assign inp_data_wire[2][j*width+width-1:j*width] = inp_data[(4*j+2)*width+width-1:(4*j+2)*width];
+			assign inp_data_wire[3][j*width+width-1:j*width] = inp_data[(4*j+3)*width+width-1:(4*j+3)*width];
+			assign amp_data_wire[0][j*6+6-1:j*6] = amp_data[(4*j+0)*6+6-1:(4*j+0)*6];
+			assign amp_data_wire[1][j*6+6-1:j*6] = amp_data[(4*j+1)*6+6-1:(4*j+1)*6];
+			assign amp_data_wire[2][j*6+6-1:j*6] = amp_data[(4*j+2)*6+6-1:(4*j+2)*6];
+			assign amp_data_wire[3][j*6+6-1:j*6] = amp_data[(4*j+3)*6+6-1:(4*j+3)*6];
+			assign tau_data_wire[0][j*16+16-1:j*16] = tau_data[(4*j+0)*16+16-1:(4*j+0)*16];
+			assign tau_data_wire[1][j*16+16-1:j*16] = tau_data[(4*j+1)*16+16-1:(4*j+1)*16];
+			assign tau_data_wire[2][j*16+16-1:j*16] = tau_data[(4*j+2)*16+16-1:(4*j+2)*16];
+			assign tau_data_wire[3][j*16+16-1:j*16] = tau_data[(4*j+3)*16+16-1:(4*j+3)*16];
+                                                                                         
+			lpm_mux #(
+				.lpm_size(4),
+				.lpm_type("LPM_MUX"),
+				.lpm_width(8),
+				.lpm_widths(2)) mux_unit_1 (
+				.sel(int_chan_next),
+				.data({
+					2'd3, del_data[(4*j+3)*6+6-1:(4*j+3)*6],
+					2'd2, del_data[(4*j+2)*6+6-1:(4*j+2)*6],
+					2'd1, del_data[(4*j+1)*6+6-1:(4*j+1)*6],
+					2'd0, del_data[(4*j+0)*6+6-1:(4*j+0)*6]}),
+				.result(int_addr_wire));
+/*
+			lpm_add_sub	#(
+				.lpm_direction("SUB"),
+				.lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"),
+				.lpm_representation("UNSIGNED"),
+				.lpm_type("LPM_ADD_SUB"),
+				.lpm_width(6)) add_unit_1 (
+				.dataa(del_addr_reg),
+				.datab(int_addr_wire[5:0]),
+				.result(del_addr_wire));
+*/
+			assign del_addr_wire = del_addr_reg - int_addr_wire[5:0];
+
+			lpm_mult #(
+				.lpm_hint("MAXIMIZE_SPEED=9"),
+				.lpm_representation("UNSIGNED"),
+				.lpm_type("LPM_MULT"),
+				.lpm_pipeline(3),
+				.lpm_widtha(width),
+				.lpm_widthb(16),
+				.lpm_widthp(width1)) mult_unit_1 (
+				.clock(clock),
+				.clken(int_wren_reg),
+				.dataa(inp_data_wire[4][j*width+width-1:j*width]),
+				.datab(tau_data_reg[j*16+16-1:j*16]),
+				.result(mul_data_wire1[j*width1+width1-1:j*width1]));
+
+			lpm_mult #(
+				.lpm_hint("MAXIMIZE_SPEED=9"),
+				.lpm_representation("UNSIGNED"),
+				.lpm_type("LPM_MULT"),
+				.lpm_pipeline(3),
+				.lpm_widtha(width),
+				.lpm_widthb(6),
+				.lpm_widthp(width2)) mult_unit_2 (
+				.clock(clock),
+				.clken(int_wren_reg),
+				.dataa(inp_data_reg[0][j*width+width-1:j*width]),
+				.datab(amp_data_reg[j*6+6-1:j*6]),
+				.result(mul_data_wire2[j*width2+width2-1:j*width2]));
+/*
+			lpm_add_sub	#(
+				.lpm_direction("SUB"),
+				.lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"),
+				.lpm_representation("UNSIGNED"),
+				.lpm_type("LPM_ADD_SUB"),
+				.lpm_width(width3)) add_unit_2 (
+				.dataa({2'b0, mul_data_wire2[j*width2+width2-1:j*width2], {(width1-width2){1'b0}}}),
+				.datab({2'b0, mul_data_wire1[j*width1+width1-1:j*width1]}),
+				.result(add_data_wire[j*width3+width3-1:j*width3]));
+*/
+			assign add_data_wire[j*width3+width3-1:j*width3] = 
+				  {2'b0, mul_data_wire2[j*width2+width2-1:j*width2], {(width1-width2){1'b0}}}
+				- {2'b0, mul_data_wire1[j*width1+width1-1:j*width1]};
+/*
+			lpm_add_sub	#(
+				.lpm_direction("ADD"),
+				.lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"),
+				.lpm_representation("UNSIGNED"),
+				.lpm_type("LPM_ADD_SUB"),
+				.lpm_width(widthr)) add_unit_3 (
+				.dataa(add_data_wire[j*width3+shift+widthr-1:j*width3+shift]),
+				.datab({{(widthr-1){add_data_wire[j*width3+width3-1]}}, add_data_wire[j*width3+shift-1]}),
+				.result(out_data_wire[j*widthr+widthr-1:j*widthr]));
+*/
+			assign out_data_wire[j*widthr+widthr-1:j*widthr] = 
+				  add_data_wire[j*width3+shift+widthr-1:j*width3+shift]
+				+ {{(widthr-1){add_data_wire[j*width3+width3-1]}}, add_data_wire[j*width3+shift-1]};
+
+		end
+	endgenerate
+
+
+	altsyncram #(
+		.address_aclr_b("NONE"),
+		.address_reg_b("CLOCK0"),
+		.clock_enable_input_a("BYPASS"),
+		.clock_enable_input_b("BYPASS"),
+		.clock_enable_output_b("BYPASS"),
+		.intended_device_family("Cyclone III"),
+		.lpm_type("altsyncram"),
+		.numwords_a(256),
+		.numwords_b(256),
+		.operation_mode("DUAL_PORT"),
+		.outdata_aclr_b("NONE"),
+		.outdata_reg_b("CLOCK0"),
+		.power_up_uninitialized("FALSE"),
+		.read_during_write_mode_mixed_ports("DONT_CARE"),
+		.widthad_a(8),
+		.widthad_b(8),
+		.width_a(size*width),
+		.width_b(size*width),
+		.width_byteena_a(1)) ram_unit_1 (
+		.wren_a(int_wren_reg),
+		.clock0(clock),
+		.address_a(int_addr_reg),
+		.address_b({int_addr_wire[7:6], del_addr_wire}),
+		.data_a(inp_data_reg[0]),
+		.q_b(inp_data_wire[4]),
+		.aclr0(1'b0),
+		.aclr1(1'b0),
+		.addressstall_a(1'b0),
+		.addressstall_b(1'b0),
+		.byteena_a(1'b1),
+		.byteena_b(1'b1),
+		.clock1(1'b1),
+		.clocken0(1'b1),
+		.clocken1(1'b1),
+		.clocken2(1'b1),
+		.clocken3(1'b1),
+		.data_b({(size*width){1'b1}}),
+		.eccstatus(),
+		.q_a(),
+		.rden_a(1'b1),
+		.rden_b(1'b1),
+		.wren_b(1'b0));
+
+	always @(posedge clock)
+	begin
+		if (reset)
+        begin
+			int_wren_reg <= 1'b1;
+			int_flag_reg <= 1'b0;
+			int_chan_reg <= 2'd0;
+			int_case_reg <= 3'd0;
+			del_addr_reg <= 6'd0;
+			int_addr_reg <= 8'd0;
+			amp_data_reg <= 6'd0;
+			tau_data_reg <= 16'd0;
+			for(i = 0; i <= 3; i = i + 1)
+			begin
+				inp_data_reg[i] <= {(size*width){1'b0}};
+			end
+			for(i = 0; i <= 4; i = i + 1)
+			begin
+				out_data_reg[i] <= {(size*widthr){1'b0}};
+			end
+		end
+		else
+		begin
+			int_wren_reg <= int_wren_next;
+			int_flag_reg <= int_flag_next;
+			int_chan_reg <= int_chan_next;
+			int_case_reg <= int_case_next;
+			del_addr_reg <= del_addr_next;
+			int_addr_reg <= int_addr_next;
+			amp_data_reg <= amp_data_next;
+			tau_data_reg <= tau_data_next;
+			for(i = 0; i <= 3; i = i + 1)
+			begin
+				inp_data_reg[i] <= inp_data_next[i];
+			end                  
+			for(i = 0; i <= 4; i = i + 1)
+			begin
+				out_data_reg[i] <= out_data_next[i];
+			end                  
+		end             
+	end
+	
+	always @*
+	begin
+		int_wren_next = int_wren_reg;
+		int_flag_next = int_flag_reg;
+		int_chan_next = int_chan_reg;
+		int_case_next = int_case_reg;
+		del_addr_next = del_addr_reg;
+		int_addr_next = int_addr_reg;
+		amp_data_next = amp_data_reg;
+		tau_data_next = tau_data_reg;
+		for(i = 0; i <= 3; i = i + 1)
+		begin
+			inp_data_next[i] = inp_data_reg[i];
+		end                  
+		for(i = 0; i <= 4; i = i + 1)
+		begin
+			out_data_next[i] = out_data_reg[i];
+		end                  
+
+		case (int_case_reg)		
+			0:
+			begin
+				// write zeros
+				int_wren_next = 1'b1;
+				del_addr_next = 6'd0;
+				int_addr_next = 8'd0;
+				amp_data_next = 6'd0;
+				tau_data_next = 16'd0;
+				for(i = 0; i <= 3; i = i + 1)
+				begin
+					inp_data_next[i] = {(size*width){1'b0}};
+				end                  
+				for(i = 0; i <= 4; i = i + 1)
+				begin
+					out_data_next[i] = {(size*widthr){1'b0}};
+				end                  
+
+				int_case_next = 3'd1;
+			end	
+			1:
+			begin
+				// write zeros
+				int_addr_next = int_addr_reg + 8'd1;
+				if (&int_addr_reg)
+				begin
+					int_wren_next = 1'b0;
+					int_flag_next = 1'b0;
+					int_chan_next = 2'd0;
+					int_case_next = 3'd2;
+				end
+			end	
+			2: // frame
+			begin
+				int_flag_next = 1'b0;
+				int_wren_next = frame;
+				if (frame)
+				begin
+					int_addr_next[7:6] = 2'd0;
+					
+					// set read addr for 2nd pipeline
+					int_chan_next = 2'd1;
+
+					// register input data for 2nd, 3rd and 4th sums
+					inp_data_next[1] = inp_data_wire[1];
+					inp_data_next[2] = inp_data_wire[2];
+					inp_data_next[3] = inp_data_wire[3];
+
+					// prepare registers for 1st sum					
+					inp_data_next[0] = inp_data_wire[0];
+
+					tau_data_next = tau_data_wire[0];
+					amp_data_next = amp_data_wire[0];
+					
+					int_case_next = 3'd3;
+				end
+				if (int_flag_reg) // register 4th sum
+				begin
+					int_addr_next[5:0] = del_addr_reg;
+					// register 1st product
+					out_data_next[0] = out_data_wire;
+				end
+			end
+			3:  // 1st sum
+			begin				
+				int_addr_next[7:6] = 2'd1;
+
+				// set read addr for 3rd pipeline
+				int_chan_next = 2'd2;
+
+				// prepare registers for 2nd sum
+				inp_data_next[0] = inp_data_reg[1];
+				
+				tau_data_next = tau_data_wire[1];
+				amp_data_next = amp_data_wire[1];
+
+				// register 2nd product
+				out_data_next[1] = out_data_wire;
+
+				int_case_next = 3'd4;
+			end
+			4: // 2nd sum
+			begin
+				int_addr_next[7:6] = 2'd2;
+
+				// set read addr for 4th pipeline
+				int_chan_next = 2'd3;
+
+				// prepare registers for 3rd sum	
+				inp_data_next[0] = inp_data_reg[2];
+
+				tau_data_next = tau_data_wire[2];
+				amp_data_next = amp_data_wire[2];
+				
+				// register 3rd product
+				out_data_next[2] = out_data_wire;
+				
+				del_addr_next = del_addr_reg + 6'd1;
+
+				int_case_next = 3'd5;
+			end
+			5:  // 3rd sum
+			begin				
+				int_flag_next = 1'b1;
+
+				int_addr_next[7:6] = 2'd3;
+					
+				// set read addr for 1st pipeline
+				int_chan_next = 2'd0;
+
+				// prepare registers for 4th sum	
+				inp_data_next[0] = inp_data_reg[3];
+
+				tau_data_next = tau_data_wire[3];
+				amp_data_next = amp_data_wire[3];
+				
+				// register 4th product
+				out_data_next[3] = out_data_wire;
+                                             
+				// register 4th output
+				out_data_next[4] = out_data_reg[0];
+
+				int_case_next = 3'd2;
+			end
+			default:
+			begin
+				int_case_next = 3'd0;
+			end
+		endcase
+	end
+
+	assign out_data = {out_data_reg[3], out_data_reg[2], out_data_reg[1], out_data_reg[4]};
+
+endmodule
