source: sandbox/MultiChannelUSB/deconv.v@ 115

Last change on this file since 115 was 114, checked in by demin, 14 years ago

Optimize multipliers

File size: 12.0 KB
RevLine 
[113]1module deconv
2 #(
3 parameter size = 1, // number of channels
4 parameter width = 16 // bit width of the input data
5 )
6 (
7 input wire clock, frame, reset,
[114]8 input wire [3*size*6-1:0] del_data,
9 input wire [3*size*8-1:0] amp_data,
10 input wire [3*size*16-1:0] tau_data,
[113]11 input wire [3*size*width-1:0] inp_data,
[114]12 output wire [3*size*widthr-1:0] out_data,
13 output wire [3*size*width2-1:0] acc_data
[113]14 );
15
[114]16 localparam width1 = width + 1;
17 localparam width2 = width + 6 + 1;
18 localparam widthr = width + 16 + 3;
[113]19
20 reg int_wren_reg, int_wren_next;
21 reg [1:0] int_chan_reg, int_chan_next;
22 reg [2:0] int_case_reg, int_case_next;
23 reg [7:0] int_addr_reg, int_addr_next;
24
[114]25 reg [5:0] del_addr_reg, del_addr_next;
26 wire [5:0] del_addr_wire;
[113]27 wire [7:0] int_addr_wire;
28
[114]29 reg [size*widthr-1:0] out_data_reg [2:0], out_data_next [2:0];
30 wire [size*widthr-1:0] out_data_wire;
[113]31
[114]32 reg [size*widthr-1:0] mul_data_reg [7:0], mul_data_next [7:0];
33 wire [size*widthr-1:0] mul_data_wire [1:0];
[113]34
[114]35 reg [size*width2-1:0] acc_data_reg [3:0], acc_data_next [3:0];
36 wire [size*width2-1:0] acc_data_wire;
[113]37
[114]38 reg [size*width1-1:0] sub_data_reg [3:0], sub_data_next [3:0];
39 wire [size*width1-1:0] sub_data_wire;
40
41 reg [size*width-1:0] inp_data_reg [2:0], inp_data_next [2:0];
42 wire [size*width-1:0] inp_data_wire [3:0];
43
44 reg [size*8-1:0] amp_data_reg, amp_data_next;
45 wire [size*8-1:0] amp_data_wire [2:0];
46
47 reg [size*16-1:0] tau_data_reg, tau_data_next;
48 wire [size*16-1:0] tau_data_wire [2:0];
49
[113]50 integer i;
51 genvar j;
52
53 generate
54 for (j = 0; j < size; j = j + 1)
55 begin : INT_DATA
[114]56 assign inp_data_wire[0][j*width+width-1:j*width] = inp_data[(3*j+0)*width+width-1:(3*j+0)*width];
57 assign inp_data_wire[1][j*width+width-1:j*width] = inp_data[(3*j+1)*width+width-1:(3*j+1)*width];
58 assign inp_data_wire[2][j*width+width-1:j*width] = inp_data[(3*j+2)*width+width-1:(3*j+2)*width];
59 assign amp_data_wire[0][j*8+8-1:j*8] = amp_data[(3*j+0)*8+8-1:(3*j+0)*8];
60 assign amp_data_wire[1][j*8+8-1:j*8] = amp_data[(3*j+1)*8+8-1:(3*j+1)*8];
61 assign amp_data_wire[2][j*8+8-1:j*8] = amp_data[(3*j+2)*8+8-1:(3*j+2)*8];
62 assign tau_data_wire[0][j*16+16-1:j*16] = tau_data[(3*j+0)*16+16-1:(3*j+0)*16];
63 assign tau_data_wire[1][j*16+16-1:j*16] = tau_data[(3*j+1)*16+16-1:(3*j+1)*16];
64 assign tau_data_wire[2][j*16+16-1:j*16] = tau_data[(3*j+2)*16+16-1:(3*j+2)*16];
[113]65
[114]66 lpm_mux #(
67 .lpm_size(3),
68 .lpm_type("LPM_MUX"),
69 .lpm_width(8),
70 .lpm_widths(2)) mux_unit_1 (
71 .sel(int_chan_next),
72 .data({
73 2'd2, del_data[(3*j+2)*6+6-1:(3*j+2)*6],
74 2'd1, del_data[(3*j+1)*6+6-1:(3*j+1)*6],
75 2'd0, del_data[(3*j+0)*6+6-1:(3*j+0)*6]}),
76 .result(int_addr_wire));
77
[113]78 lpm_add_sub #(
[114]79 .lpm_direction("SUB"),
[113]80 .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"),
81 .lpm_representation("UNSIGNED"),
82 .lpm_type("LPM_ADD_SUB"),
83 .lpm_width(6)) add_unit_1 (
[114]84 .dataa(del_addr_reg),
[113]85 .datab(int_addr_wire[5:0]),
86 .result(del_addr_wire));
87
88 lpm_add_sub #(
89 .lpm_direction("SUB"),
90 .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"),
91 .lpm_representation("SIGNED"),
92 .lpm_type("LPM_ADD_SUB"),
[114]93 .lpm_width(width1)) sub_unit_1 (
94 .dataa({{(width1-width){1'b0}}, inp_data_reg[0][j*width+width-1:j*width]}),
95 .datab({{(width1-width){1'b0}}, inp_data_wire[3][j*width+width-1:j*width]}),
96 .result(sub_data_wire[j*width1+width1-1:j*width1]));
[113]97
98 lpm_add_sub #(
99 .lpm_direction("ADD"),
100 .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"),
101 .lpm_representation("SIGNED"),
102 .lpm_type("LPM_ADD_SUB"),
[114]103 .lpm_width(width2)) acc_unit_1 (
104 .dataa({sub_data_reg[0][j*width1+width1-1], {(width2-width1){1'b0}}, sub_data_reg[0][j*width1+width1-2:j*width1]}),
105// .dataa({width2{1'b0}}),
106 .datab(acc_data_reg[0][j*width2+width2-1:j*width2]),
107 .result(acc_data_wire[j*width2+width2-1:j*width2]));
[113]108
109 lpm_mult #(
110 .lpm_hint("MAXIMIZE_SPEED=9"),
111 .lpm_representation("SIGNED"),
112 .lpm_type("LPM_MULT"),
113 .lpm_pipeline(3),
[114]114 .lpm_widtha(width1),
115 .lpm_widthb(17),
116 .lpm_widthp(widthr)) mult_unit_1 (
[113]117 .clock(clock),
118 .clken(int_wren_reg),
[114]119 .dataa(sub_data_reg[0][j*width1+width1-1:j*width1]),
120 .datab({1'b0, tau_data_reg[j*16+16-1:j*16]}),
121 .result(mul_data_wire[0][j*widthr+widthr-1:j*widthr]));
[113]122
123 lpm_mult #(
124 .lpm_hint("MAXIMIZE_SPEED=9"),
[114]125 .lpm_representation("UNSIGNED"),
[113]126 .lpm_type("LPM_MULT"),
127 .lpm_pipeline(3),
[114]128 .lpm_widtha(width2),
129 .lpm_widthb(8),
[113]130 .lpm_widthp(widthr)) mult_unit_2 (
131 .clock(clock),
132 .clken(int_wren_reg),
[114]133 .dataa(acc_data_reg[0][j*width2+width2-1:j*width2]),
134 .datab(amp_data_reg[j*8+8-1:j*8]),
135 .result(mul_data_wire[1][j*widthr+widthr-1:j*widthr]));
[113]136
137 lpm_add_sub #(
138 .lpm_direction("ADD"),
139 .lpm_hint("ONE_INPUT_IS_CONSTANT=NO,CIN_USED=NO"),
140 .lpm_representation("SIGNED"),
141 .lpm_type("LPM_ADD_SUB"),
142 .lpm_width(widthr)) add_unit_2 (
[114]143 .dataa(mul_data_reg[0][j*widthr+widthr-1:j*widthr]),
144 .datab(mul_data_reg[1][j*widthr+widthr-1:j*widthr]),
145 .result(out_data_wire[j*widthr+widthr-1:j*widthr]));
[113]146
147 end
148 endgenerate
149
150
151 altsyncram #(
152 .address_aclr_b("NONE"),
153 .address_reg_b("CLOCK0"),
154 .clock_enable_input_a("BYPASS"),
155 .clock_enable_input_b("BYPASS"),
156 .clock_enable_output_b("BYPASS"),
157 .intended_device_family("Cyclone III"),
158 .lpm_type("altsyncram"),
159 .numwords_a(256),
160 .numwords_b(256),
161 .operation_mode("DUAL_PORT"),
162 .outdata_aclr_b("NONE"),
163 .outdata_reg_b("CLOCK0"),
164 .power_up_uninitialized("FALSE"),
165 .read_during_write_mode_mixed_ports("DONT_CARE"),
166 .widthad_a(8),
167 .widthad_b(8),
[114]168 .width_a(size*width),
169 .width_b(size*width),
[113]170 .width_byteena_a(1)) ram_unit_1 (
171 .wren_a(int_wren_reg),
172 .clock0(clock),
173 .address_a(int_addr_reg),
174 .address_b({int_addr_wire[7:6], del_addr_wire}),
[114]175 .data_a(inp_data_reg[0]),
176 .q_b(inp_data_wire[3]),
[113]177 .aclr0(1'b0),
178 .aclr1(1'b0),
179 .addressstall_a(1'b0),
180 .addressstall_b(1'b0),
181 .byteena_a(1'b1),
182 .byteena_b(1'b1),
183 .clock1(1'b1),
184 .clocken0(1'b1),
185 .clocken1(1'b1),
186 .clocken2(1'b1),
187 .clocken3(1'b1),
[114]188 .data_b({(size*width){1'b1}}),
[113]189 .eccstatus(),
190 .q_a(),
191 .rden_a(1'b1),
192 .rden_b(1'b1),
193 .wren_b(1'b0));
194
195 always @(posedge clock)
196 begin
197 if (reset)
198 begin
199 int_wren_reg <= 1'b1;
200 int_chan_reg <= 2'd0;
201 int_case_reg <= 3'd0;
[114]202 del_addr_reg <= 6'd0;
[113]203 int_addr_reg <= 8'd0;
[114]204 amp_data_reg <= 8'd0;
205 tau_data_reg <= 16'd0;
206 for(i = 0; i <= 2; i = i + 1)
[113]207 begin
[114]208 inp_data_reg[i] <= {(size*width){1'b0}};
209 out_data_reg[i] <= {(size*widthr){1'b0}};
[113]210 end
[114]211 for(i = 0; i <= 3; i = i + 1)
[113]212 begin
[114]213 sub_data_reg[i] <= {(size*width1){1'b0}};
214 acc_data_reg[i] <= {(size*width2){1'b0}};
[113]215 end
[114]216 for(i = 0; i <= 7; i = i + 1)
217 begin
218 mul_data_reg[i] <= {(size*widthr){1'b0}};
219 end
[113]220 end
221 else
222 begin
223 int_wren_reg <= int_wren_next;
224 int_chan_reg <= int_chan_next;
225 int_case_reg <= int_case_next;
[114]226 del_addr_reg <= del_addr_next;
[113]227 int_addr_reg <= int_addr_next;
[114]228 amp_data_reg <= amp_data_next;
229 tau_data_reg <= tau_data_next;
230 for(i = 0; i <= 2; i = i + 1)
[113]231 begin
[114]232 inp_data_reg[i] <= inp_data_next[i];
233 out_data_reg[i] <= out_data_next[i];
234 end
235 for(i = 0; i <= 3; i = i + 1)
236 begin
237 sub_data_reg[i] <= sub_data_next[i];
[113]238 acc_data_reg[i] <= acc_data_next[i];
[114]239 end
240 for(i = 0; i <= 7; i = i + 1)
[113]241 begin
[114]242 mul_data_reg[i] <= mul_data_next[i];
[113]243 end
244 end
245 end
246
247 always @*
248 begin
249 int_wren_next = int_wren_reg;
250 int_chan_next = int_chan_reg;
251 int_case_next = int_case_reg;
[114]252 del_addr_next = del_addr_reg;
[113]253 int_addr_next = int_addr_reg;
[114]254 amp_data_next = amp_data_reg;
255 tau_data_next = tau_data_reg;
256 for(i = 0; i <= 2; i = i + 1)
[113]257 begin
[114]258 inp_data_next[i] = inp_data_reg[i];
259 out_data_next[i] = out_data_reg[i];
260 end
261 for(i = 0; i <= 3; i = i + 1)
262 begin
263 sub_data_next[i] = sub_data_reg[i];
[113]264 acc_data_next[i] = acc_data_reg[i];
[114]265 end
266 for(i = 0; i <= 7; i = i + 1)
[113]267 begin
[114]268 mul_data_next[i] = mul_data_reg[i];
[113]269 end
270
271 case (int_case_reg)
272 0:
273 begin
274 // write zeros
275 int_wren_next = 1'b1;
[114]276 del_addr_next = 6'd0;
[113]277 int_addr_next = 8'd0;
[114]278 amp_data_next = 8'd0;
279 tau_data_next = 16'd0;
280 for(i = 0; i <= 2; i = i + 1)
[113]281 begin
[114]282 inp_data_next[i] = {(size*width){1'b0}};
283 out_data_next[i] = {(size*widthr){1'b0}};
284 end
285 for(i = 0; i <= 3; i = i + 1)
[113]286 begin
[114]287 sub_data_next[i] = {(size*width1){1'b0}};
288 acc_data_next[i] = {(size*width2){1'b0}};
289 end
290 for(i = 0; i <= 7; i = i + 1)
291 begin
292 mul_data_next[i] = {(size*widthr){1'b0}};
[113]293 end
[114]294
[113]295 int_case_next = 3'd1;
296 end
297 1:
298 begin
299 // write zeros
300 int_addr_next = int_addr_reg + 8'd1;
301 if (&int_addr_reg)
302 begin
303 int_wren_next = 1'b0;
304 int_chan_next = 2'd0;
305 int_case_next = 3'd2;
306 end
307 end
308 2: // frame
309 begin
310 if (frame)
311 begin
312 int_wren_next = 1'b1;
313
314 int_addr_next[7:6] = 2'd0;
315
316 // set read addr for 2nd pipeline
317 int_chan_next = 2'd1;
318
319 // register input data for 2nd and 3rd sums
[114]320 inp_data_next[1] = inp_data_wire[1];
321 inp_data_next[2] = inp_data_wire[2];
[113]322
323 // prepare registers for 1st sum
[114]324 inp_data_next[0] = inp_data_wire[0];
325
326 sub_data_next[0] = sub_data_reg[1];
327 acc_data_next[0] = acc_data_reg[1];
328
329 mul_data_next[0] = mul_data_reg[2];
330 mul_data_next[1] = mul_data_reg[3];
[113]331
[114]332 tau_data_next = tau_data_wire[0];
333 amp_data_next = amp_data_wire[0];
334
[113]335 int_case_next = 3'd3;
336 end
337
338 end
339 3: // 1st sum
340 begin
341 int_addr_next[7:6] = 2'd1;
342
343 // set read addr for 3rd pipeline
344 int_chan_next = 2'd2;
345
346 // prepare registers for 2nd sum
[114]347 inp_data_next[0] = inp_data_reg[1];
348
349 sub_data_next[0] = sub_data_reg[2];
350 acc_data_next[0] = acc_data_reg[2];
351
352 mul_data_next[0] = mul_data_reg[4];
353 mul_data_next[1] = mul_data_reg[5];
[113]354
[114]355 tau_data_next = tau_data_wire[1];
356 amp_data_next = amp_data_wire[1];
357
[113]358 // register 1st sum
[114]359 sub_data_next[1] = sub_data_wire;
360 acc_data_next[1] = acc_data_wire;
361 mul_data_next[2] = mul_data_wire[0];
362 mul_data_next[3] = mul_data_wire[1];
363 out_data_next[0] = out_data_wire;
[113]364
365 int_case_next = 3'd4;
366 end
367 4: // 2nd sum
368 begin
369 int_addr_next[7:6] = 2'd2;
370
371 // prepare registers for 3rd sum
[114]372 inp_data_next[0] = inp_data_reg[2];
373
374 sub_data_next[0] = sub_data_reg[3];
375 acc_data_next[0] = acc_data_reg[3];
376
377 mul_data_next[0] = mul_data_reg[6];
378 mul_data_next[1] = mul_data_reg[7];
379
380 tau_data_next = tau_data_wire[2];
381 amp_data_next = amp_data_wire[2];
[113]382
383 // register 2nd sum
[114]384 sub_data_next[2] = sub_data_wire;
385 acc_data_next[2] = acc_data_wire;
386 mul_data_next[4] = mul_data_wire[0];
387 mul_data_next[5] = mul_data_wire[1];
388 out_data_next[1] = out_data_wire;
[113]389
[114]390 del_addr_next = del_addr_reg + 6'd1;
391
[113]392 int_case_next = 3'd5;
393 end
394 5: // 3rd sum
395 begin
396 int_wren_next = 1'b0;
397
398 // set read addr for 1st pipeline
399 int_chan_next = 2'd0;
400
401 // register 3rd sum
[114]402 sub_data_next[3] = sub_data_wire;
403 acc_data_next[3] = acc_data_wire;
404 mul_data_next[6] = mul_data_wire[0];
405 mul_data_next[7] = mul_data_wire[1];
406 out_data_next[2] = out_data_wire;
[113]407
[114]408 int_addr_next[5:0] = del_addr_reg;
[113]409
410 int_case_next = 3'd2;
411 end
412 default:
413 begin
414 int_case_next = 3'd0;
415 end
416 endcase
417 end
418
[114]419 assign out_data = {out_data_reg[2], out_data_reg[1], out_data_reg[0]};
420 assign acc_data = {acc_data_reg[3], acc_data_reg[2], acc_data_reg[1]};
421// assign acc_data = {17'd0, del_addr_wire, 17'd0, del_addr_wire, 17'd0, del_addr_wire};
[113]422
423endmodule
Note: See TracBrowser for help on using the repository browser.