module filter
	#(
		parameter	size	=	3, // number of channels
		parameter	width	=	12 // bit width of the input data (unsigned)
	)
	(
		input	wire						clock, frame, reset,
		input	wire	[size*width-1:0]	inp_data,
		output	wire	[size*widthr-1:0]	out_data
	);
	
	localparam	widthr	=	width + 7;
	/*
	5-bit LFSR with additional bits to keep track of previous values
	*/
	reg		[23:0]				int_lfsr_reg, int_lfsr_next;

	reg							int_wren_reg, int_wren_next;
	reg							int_flag_reg, int_flag_next;
	reg							int_chan_reg, int_chan_next;
	reg		[1:0]				int_case_reg, int_case_next;
	reg		[5:0]				int_addr_reg, int_addr_next;

	wire	[5:0]				int_addr_wire;

	reg		[size*widthr-1:0]	acc_data_reg [1:0], acc_data_next [1:0];
	reg		[size*widthr-1:0]	int_data_reg [2:0], int_data_next [2:0];

	wire	[size*widthr-1:0]	acc_data_wire [1:0], del_data_wire;

	integer i;
	genvar j;

	generate
		for (j = 0; j < size; j = j + 1)
		begin : INT_DATA
			assign acc_data_wire[0][j*widthr+widthr-1:j*widthr] = {{(widthr-width){1'b0}}, inp_data[j*width+width-1:j*width]};

			assign acc_data_wire[1][j*widthr+widthr-1:j*widthr] =
				  acc_data_reg[0][j*widthr+widthr-1:j*widthr]
				- del_data_wire[j*widthr+widthr-1:j*widthr]
				+ acc_data_reg[1][j*widthr+widthr-1:j*widthr];

		end
	endgenerate

	altsyncram #(
		.address_aclr_b("NONE"),
		.address_reg_b("CLOCK0"),
		.clock_enable_input_a("BYPASS"),
		.clock_enable_input_b("BYPASS"),
		.clock_enable_output_b("BYPASS"),
		.intended_device_family("Cyclone III"),
		.lpm_type("altsyncram"),
		.numwords_a(64),
		.numwords_b(64),
		.operation_mode("DUAL_PORT"),
		.outdata_aclr_b("NONE"),
		.outdata_reg_b("CLOCK0"),
		.power_up_uninitialized("FALSE"),
		.read_during_write_mode_mixed_ports("DONT_CARE"),
		.widthad_a(6),
		.widthad_b(6),
		.width_a(size*widthr),
		.width_b(size*widthr),
		.width_byteena_a(1)) ram_unit_1 (
		.wren_a(int_wren_reg),
		.clock0(clock),
		.address_a(int_addr_reg),
		.address_b(int_addr_wire),
		.data_a(acc_data_reg[0]),
		.q_b(del_data_wire),
		.aclr0(1'b0),
		.aclr1(1'b0),
		.addressstall_a(1'b0),
		.addressstall_b(1'b0),
		.byteena_a(1'b1),
		.byteena_b(1'b1),
		.clock1(1'b1),
		.clocken0(1'b1),
		.clocken1(1'b1),
		.clocken2(1'b1),
		.clocken3(1'b1),
		.data_b({(size*widthr){1'b1}}),
		.eccstatus(),
		.q_a(),
		.rden_a(1'b1),
		.rden_b(1'b1),
		.wren_b(1'b0));

	lpm_mux #(
		.lpm_size(2),
		.lpm_type("LPM_MUX"),
		.lpm_width(6),
		.lpm_widths(1)) mux_unit_1 (
		.sel(int_chan_next),
		.data({
			1'b1, int_lfsr_reg[16+4:16],
			1'b0, int_lfsr_reg[4+4:4]}),
		.result(int_addr_wire));                            

	always @(posedge clock)
	begin
		if (reset)
        begin
			int_wren_reg <= 1'b1;
			int_flag_reg <= 1'b0;
			int_chan_reg <= 1'b0;
			int_case_reg <= 2'd0;
			int_addr_reg <= 6'd0;
			for(i = 0; i <= 1; i = i + 1)
			begin
				acc_data_reg[i] <= {(size*widthr){1'b0}};
			end
			for(i = 0; i <= 2; i = i + 1)
			begin
				int_data_reg[i] <= {(size*widthr){1'b0}};
			end
			int_lfsr_reg <= 24'd0;
		end
		else
		begin
			int_wren_reg <= int_wren_next;
			int_flag_reg <= int_flag_next;
			int_chan_reg <= int_chan_next;
			int_case_reg <= int_case_next;
			int_addr_reg <= int_addr_next;
			for(i = 0; i <= 1; i = i + 1)
			begin
				acc_data_reg[i] <= acc_data_next[i];
			end
			for(i = 0; i <= 2; i = i + 1)
			begin
				int_data_reg[i] <= int_data_next[i];
			end
			int_lfsr_reg <= int_lfsr_next;
		end             
	end
	
	always @*
	begin
		int_wren_next = int_wren_reg;
		int_flag_next = int_flag_reg;
		int_chan_next = int_chan_reg;
		int_case_next = int_case_reg;
		int_addr_next = int_addr_reg;
		for(i = 0; i <= 1; i = i + 1)
		begin
			acc_data_next[i] = acc_data_reg[i];
		end
		for(i = 0; i <= 2; i = i + 1)
		begin
			int_data_next[i] = int_data_reg[i];
		end
		int_lfsr_next = int_lfsr_reg;

		case (int_case_reg)		
			0:
			begin
				// write zeros
				int_wren_next = 1'b1;
				int_addr_next = 6'd0;
				for(i = 0; i <= 1; i = i + 1)
				begin
					acc_data_next[i] = {(size*widthr){1'b0}};
				end
				for(i = 0; i <= 2; i = i + 1)
				begin
					int_data_next[i] = {(size*widthr){1'b0}};
				end
				int_case_next = 2'd1;
			end	
			1:
			begin
				// write zeros
				int_addr_next = int_addr_reg + 6'd1;
				if (&int_addr_reg)
				begin
					int_wren_next = 1'b0;
					int_flag_next = 1'b0;
					int_chan_next = 1'b0;
					int_lfsr_next = 24'h0722BD;
					int_case_next = 'd2;
				end
			end	
			2: // frame
			begin
				int_flag_next = 1'b0;
				if (frame)
				begin
					int_wren_next = 1'b1;

					int_addr_next = {1'b0, int_lfsr_reg[4:0]};
					
					// set read addr for 2nd pipeline
					int_chan_next = 1'b1;
                    
					// prepare registers for 1st sum					
					acc_data_next[0] = acc_data_wire[0];
					acc_data_next[1] = int_data_reg[0];
					
					int_lfsr_next = {int_lfsr_reg[22:0], int_lfsr_reg[2] ~^ int_lfsr_reg[4]};

					int_case_next = 'd3;
				end
				if (int_flag_reg) // register 2nd sum
				begin
					// register 2nd sum
					int_data_next[1] = acc_data_wire[1];
				end
			end
			3:  // 2nd sum
			begin				
				int_flag_next = 1'b1;

				int_addr_next = {1'b1, int_lfsr_reg[5:1]};

				// set read addr for 1st pipeline
				int_chan_next = 1'b0;

				// prepare registers for 2nd sum	
				acc_data_next[0] = int_data_reg[0];
				acc_data_next[1] = int_data_reg[1];

				// register 1st sum
				int_data_next[0] = acc_data_wire[1];
				
				// register 2nd output
				int_data_next[2] = int_data_reg[1];

				int_case_next = 2'd2;
			end
			default:
			begin
				int_case_next = 2'd0;
			end
		endcase
	end

	assign out_data = int_data_reg[2];

endmodule
