1use num_complex::Complex;
2
3use crate::{common::FftNum, FftDirection};
4
5use crate::array_utils::{self, DoubleBuf, LoadStore};
6use crate::common::{fft_error_inplace, fft_error_outofplace};
7use crate::twiddles;
8use crate::{Direction, Fft, Length};
9
10#[allow(unused)]
11macro_rules! boilerplate_fft_butterfly {
12 ($struct_name:ident, $len:expr, $direction_fn:expr) => {
13 impl<T: FftNum> $struct_name<T> {
14 #[inline(always)]
15 pub(crate) unsafe fn perform_fft_butterfly(&self, buffer: impl LoadStore<T>) {
16 self.perform_fft_contiguous(buffer);
17 }
18 }
19 impl<T: FftNum> Fft<T> for $struct_name<T> {
20 fn process_outofplace_with_scratch(
21 &self,
22 input: &mut [Complex<T>],
23 output: &mut [Complex<T>],
24 _scratch: &mut [Complex<T>],
25 ) {
26 if input.len() < self.len() || output.len() != input.len() {
27 fft_error_outofplace(self.len(), input.len(), output.len(), 0, 0);
29 return; }
31
32 let result = array_utils::iter_chunks_zipped(
33 input,
34 output,
35 self.len(),
36 |in_chunk, out_chunk| {
37 unsafe {
38 self.perform_fft_butterfly(DoubleBuf {
39 input: in_chunk,
40 output: out_chunk,
41 })
42 };
43 },
44 );
45
46 if result.is_err() {
47 fft_error_outofplace(self.len(), input.len(), output.len(), 0, 0);
50 }
51 }
52 fn process_with_scratch(&self, buffer: &mut [Complex<T>], _scratch: &mut [Complex<T>]) {
53 if buffer.len() < self.len() {
54 fft_error_inplace(self.len(), buffer.len(), 0, 0);
56 return; }
58
59 let result = array_utils::iter_chunks(buffer, self.len(), |chunk| unsafe {
60 self.perform_fft_butterfly(chunk)
61 });
62
63 if result.is_err() {
64 fft_error_inplace(self.len(), buffer.len(), 0, 0);
67 }
68 }
69 #[inline(always)]
70 fn get_inplace_scratch_len(&self) -> usize {
71 0
72 }
73 #[inline(always)]
74 fn get_outofplace_scratch_len(&self) -> usize {
75 0
76 }
77 }
78 impl<T> Length for $struct_name<T> {
79 #[inline(always)]
80 fn len(&self) -> usize {
81 $len
82 }
83 }
84 impl<T> Direction for $struct_name<T> {
85 #[inline(always)]
86 fn fft_direction(&self) -> FftDirection {
87 $direction_fn(self)
88 }
89 }
90 };
91}
92
93pub struct Butterfly1<T> {
94 direction: FftDirection,
95 _phantom: std::marker::PhantomData<T>,
96}
97impl<T: FftNum> Butterfly1<T> {
98 #[inline(always)]
99 pub fn new(direction: FftDirection) -> Self {
100 Self {
101 direction,
102 _phantom: std::marker::PhantomData,
103 }
104 }
105}
106impl<T: FftNum> Fft<T> for Butterfly1<T> {
107 fn process_outofplace_with_scratch(
108 &self,
109 input: &mut [Complex<T>],
110 output: &mut [Complex<T>],
111 _scratch: &mut [Complex<T>],
112 ) {
113 output.copy_from_slice(&input);
114 }
115
116 fn process_with_scratch(&self, _buffer: &mut [Complex<T>], _scratch: &mut [Complex<T>]) {}
117
118 fn get_inplace_scratch_len(&self) -> usize {
119 0
120 }
121
122 fn get_outofplace_scratch_len(&self) -> usize {
123 0
124 }
125}
126impl<T> Length for Butterfly1<T> {
127 fn len(&self) -> usize {
128 1
129 }
130}
131impl<T> Direction for Butterfly1<T> {
132 fn fft_direction(&self) -> FftDirection {
133 self.direction
134 }
135}
136
137pub struct Butterfly2<T> {
138 direction: FftDirection,
139 _phantom: std::marker::PhantomData<T>,
140}
141boilerplate_fft_butterfly!(Butterfly2, 2, |this: &Butterfly2<_>| this.direction);
142impl<T: FftNum> Butterfly2<T> {
143 #[inline(always)]
144 pub fn new(direction: FftDirection) -> Self {
145 Self {
146 direction,
147 _phantom: std::marker::PhantomData,
148 }
149 }
150 #[inline(always)]
151 unsafe fn perform_fft_strided(left: &mut Complex<T>, right: &mut Complex<T>) {
152 let temp = *left + *right;
153
154 *right = *left - *right;
155 *left = temp;
156 }
157 #[inline(always)]
158 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
159 let value0 = buffer.load(0);
160 let value1 = buffer.load(1);
161 buffer.store(value0 + value1, 0);
162 buffer.store(value0 - value1, 1);
163 }
164}
165
166pub struct Butterfly3<T> {
167 pub twiddle: Complex<T>,
168 direction: FftDirection,
169}
170boilerplate_fft_butterfly!(Butterfly3, 3, |this: &Butterfly3<_>| this.direction);
171impl<T: FftNum> Butterfly3<T> {
172 #[inline(always)]
173 pub fn new(direction: FftDirection) -> Self {
174 Self {
175 twiddle: twiddles::compute_twiddle(1, 3, direction),
176 direction,
177 }
178 }
179 #[inline(always)]
180 pub fn direction_of(fft: &Butterfly3<T>) -> Self {
181 Self {
182 twiddle: fft.twiddle.conj(),
183 direction: fft.direction.opposite_direction(),
184 }
185 }
186 #[inline(always)]
187 unsafe fn perform_fft_strided(
188 &self,
189 val0: &mut Complex<T>,
190 val1: &mut Complex<T>,
191 val2: &mut Complex<T>,
192 ) {
193 let xp = *val1 + *val2;
194 let xn = *val1 - *val2;
195 let sum = *val0 + xp;
196
197 let temp_a = *val0
198 + Complex {
199 re: self.twiddle.re * xp.re,
200 im: self.twiddle.re * xp.im,
201 };
202 let temp_b = Complex {
203 re: -self.twiddle.im * xn.im,
204 im: self.twiddle.im * xn.re,
205 };
206
207 *val0 = sum;
208 *val1 = temp_a + temp_b;
209 *val2 = temp_a - temp_b;
210 }
211
212 #[inline(always)]
213 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
214 let xp = buffer.load(1) + buffer.load(2);
215 let xn = buffer.load(1) - buffer.load(2);
216 let sum = buffer.load(0) + xp;
217
218 let temp_a = buffer.load(0)
219 + Complex {
220 re: self.twiddle.re * xp.re,
221 im: self.twiddle.re * xp.im,
222 };
223 let temp_b = Complex {
224 re: -self.twiddle.im * xn.im,
225 im: self.twiddle.im * xn.re,
226 };
227
228 buffer.store(sum, 0);
229 buffer.store(temp_a + temp_b, 1);
230 buffer.store(temp_a - temp_b, 2);
231 }
232}
233
234pub struct Butterfly4<T> {
235 direction: FftDirection,
236 _phantom: std::marker::PhantomData<T>,
237}
238boilerplate_fft_butterfly!(Butterfly4, 4, |this: &Butterfly4<_>| this.direction);
239impl<T: FftNum> Butterfly4<T> {
240 #[inline(always)]
241 pub fn new(direction: FftDirection) -> Self {
242 Self {
243 direction,
244 _phantom: std::marker::PhantomData,
245 }
246 }
247 #[inline(always)]
248 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
249 let mut value0 = buffer.load(0);
254 let mut value1 = buffer.load(1);
255 let mut value2 = buffer.load(2);
256 let mut value3 = buffer.load(3);
257
258 Butterfly2::perform_fft_strided(&mut value0, &mut value2);
260 Butterfly2::perform_fft_strided(&mut value1, &mut value3);
261
262 value3 = twiddles::rotate_90(value3, self.direction);
264
265 Butterfly2::perform_fft_strided(&mut value0, &mut value1);
269 Butterfly2::perform_fft_strided(&mut value2, &mut value3);
270
271 buffer.store(value0, 0);
273 buffer.store(value2, 1);
274 buffer.store(value1, 2);
275 buffer.store(value3, 3);
276 }
277
278 #[inline(always)]
279 unsafe fn perform_fft_strided(
280 &self,
281 value0: &mut Complex<T>,
282 value1: &mut Complex<T>,
283 value2: &mut Complex<T>,
284 value3: &mut Complex<T>,
285 ) {
286 Butterfly2::perform_fft_strided(value0, value2);
288 Butterfly2::perform_fft_strided(value1, value3);
289
290 *value3 = twiddles::rotate_90(*value3, self.direction);
292
293 Butterfly2::perform_fft_strided(value0, value1);
297 Butterfly2::perform_fft_strided(value2, value3);
298
299 let temp = *value1;
301 *value1 = *value2;
302 *value2 = temp;
303 }
304}
305
306pub struct Butterfly5<T> {
307 twiddle1: Complex<T>,
308 twiddle2: Complex<T>,
309 direction: FftDirection,
310}
311boilerplate_fft_butterfly!(Butterfly5, 5, |this: &Butterfly5<_>| this.direction);
312impl<T: FftNum> Butterfly5<T> {
313 pub fn new(direction: FftDirection) -> Self {
314 Self {
315 twiddle1: twiddles::compute_twiddle(1, 5, direction),
316 twiddle2: twiddles::compute_twiddle(2, 5, direction),
317 direction,
318 }
319 }
320
321 #[inline(never)] unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
323 let x14p = buffer.load(1) + buffer.load(4);
403 let x14n = buffer.load(1) - buffer.load(4);
404 let x23p = buffer.load(2) + buffer.load(3);
405 let x23n = buffer.load(2) - buffer.load(3);
406 let sum = buffer.load(0) + x14p + x23p;
407 let b14re_a = buffer.load(0).re + self.twiddle1.re * x14p.re + self.twiddle2.re * x23p.re;
408 let b14re_b = self.twiddle1.im * x14n.im + self.twiddle2.im * x23n.im;
409 let b23re_a = buffer.load(0).re + self.twiddle2.re * x14p.re + self.twiddle1.re * x23p.re;
410 let b23re_b = self.twiddle2.im * x14n.im + -self.twiddle1.im * x23n.im;
411
412 let b14im_a = buffer.load(0).im + self.twiddle1.re * x14p.im + self.twiddle2.re * x23p.im;
413 let b14im_b = self.twiddle1.im * x14n.re + self.twiddle2.im * x23n.re;
414 let b23im_a = buffer.load(0).im + self.twiddle2.re * x14p.im + self.twiddle1.re * x23p.im;
415 let b23im_b = self.twiddle2.im * x14n.re + -self.twiddle1.im * x23n.re;
416
417 let out1re = b14re_a - b14re_b;
418 let out1im = b14im_a + b14im_b;
419 let out2re = b23re_a - b23re_b;
420 let out2im = b23im_a + b23im_b;
421 let out3re = b23re_a + b23re_b;
422 let out3im = b23im_a - b23im_b;
423 let out4re = b14re_a + b14re_b;
424 let out4im = b14im_a - b14im_b;
425 buffer.store(sum, 0);
426 buffer.store(
427 Complex {
428 re: out1re,
429 im: out1im,
430 },
431 1,
432 );
433 buffer.store(
434 Complex {
435 re: out2re,
436 im: out2im,
437 },
438 2,
439 );
440 buffer.store(
441 Complex {
442 re: out3re,
443 im: out3im,
444 },
445 3,
446 );
447 buffer.store(
448 Complex {
449 re: out4re,
450 im: out4im,
451 },
452 4,
453 );
454 }
455}
456
457pub struct Butterfly6<T> {
458 butterfly3: Butterfly3<T>,
459}
460boilerplate_fft_butterfly!(Butterfly6, 6, |this: &Butterfly6<_>| this
461 .butterfly3
462 .fft_direction());
463impl<T: FftNum> Butterfly6<T> {
464 #[inline(always)]
465 pub fn new(direction: FftDirection) -> Self {
466 Self {
467 butterfly3: Butterfly3::new(direction),
468 }
469 }
470 #[inline(always)]
471 pub fn direction_of(fft: &Butterfly6<T>) -> Self {
472 Self {
473 butterfly3: Butterfly3::direction_of(&fft.butterfly3),
474 }
475 }
476 #[inline(always)]
477 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
478 let mut scratch_a = [buffer.load(0), buffer.load(2), buffer.load(4)];
483
484 let mut scratch_b = [buffer.load(3), buffer.load(5), buffer.load(1)];
485
486 self.butterfly3.perform_fft_contiguous(&mut scratch_a);
488 self.butterfly3.perform_fft_contiguous(&mut scratch_b);
489
490 Butterfly2::perform_fft_strided(&mut scratch_a[0], &mut scratch_b[0]);
496 Butterfly2::perform_fft_strided(&mut scratch_a[1], &mut scratch_b[1]);
497 Butterfly2::perform_fft_strided(&mut scratch_a[2], &mut scratch_b[2]);
498
499 buffer.store(scratch_a[0], 0);
503 buffer.store(scratch_b[1], 1);
504 buffer.store(scratch_a[2], 2);
505 buffer.store(scratch_b[0], 3);
506 buffer.store(scratch_a[1], 4);
507 buffer.store(scratch_b[2], 5);
508 }
509}
510
511pub struct Butterfly7<T> {
512 twiddle1: Complex<T>,
513 twiddle2: Complex<T>,
514 twiddle3: Complex<T>,
515 direction: FftDirection,
516}
517boilerplate_fft_butterfly!(Butterfly7, 7, |this: &Butterfly7<_>| this.direction);
518impl<T: FftNum> Butterfly7<T> {
519 pub fn new(direction: FftDirection) -> Self {
520 Self {
521 twiddle1: twiddles::compute_twiddle(1, 7, direction),
522 twiddle2: twiddles::compute_twiddle(2, 7, direction),
523 twiddle3: twiddles::compute_twiddle(3, 7, direction),
524 direction,
525 }
526 }
527 #[inline(never)]
528 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
529 let x16p = buffer.load(1) + buffer.load(6);
599 let x16n = buffer.load(1) - buffer.load(6);
600 let x25p = buffer.load(2) + buffer.load(5);
601 let x25n = buffer.load(2) - buffer.load(5);
602 let x34p = buffer.load(3) + buffer.load(4);
603 let x34n = buffer.load(3) - buffer.load(4);
604 let sum = buffer.load(0) + x16p + x25p + x34p;
605
606 let x16re_a = buffer.load(0).re
607 + self.twiddle1.re * x16p.re
608 + self.twiddle2.re * x25p.re
609 + self.twiddle3.re * x34p.re;
610 let x16re_b =
611 self.twiddle1.im * x16n.im + self.twiddle2.im * x25n.im + self.twiddle3.im * x34n.im;
612 let x25re_a = buffer.load(0).re
613 + self.twiddle1.re * x34p.re
614 + self.twiddle2.re * x16p.re
615 + self.twiddle3.re * x25p.re;
616 let x25re_b =
617 -self.twiddle1.im * x34n.im + self.twiddle2.im * x16n.im - self.twiddle3.im * x25n.im;
618 let x34re_a = buffer.load(0).re
619 + self.twiddle1.re * x25p.re
620 + self.twiddle2.re * x34p.re
621 + self.twiddle3.re * x16p.re;
622 let x34re_b =
623 -self.twiddle1.im * x25n.im + self.twiddle2.im * x34n.im + self.twiddle3.im * x16n.im;
624 let x16im_a = buffer.load(0).im
625 + self.twiddle1.re * x16p.im
626 + self.twiddle2.re * x25p.im
627 + self.twiddle3.re * x34p.im;
628 let x16im_b =
629 self.twiddle1.im * x16n.re + self.twiddle2.im * x25n.re + self.twiddle3.im * x34n.re;
630 let x25im_a = buffer.load(0).im
631 + self.twiddle1.re * x34p.im
632 + self.twiddle2.re * x16p.im
633 + self.twiddle3.re * x25p.im;
634 let x25im_b =
635 -self.twiddle1.im * x34n.re + self.twiddle2.im * x16n.re - self.twiddle3.im * x25n.re;
636 let x34im_a = buffer.load(0).im
637 + self.twiddle1.re * x25p.im
638 + self.twiddle2.re * x34p.im
639 + self.twiddle3.re * x16p.im;
640 let x34im_b =
641 self.twiddle1.im * x25n.re - self.twiddle2.im * x34n.re - self.twiddle3.im * x16n.re;
642
643 let out1re = x16re_a - x16re_b;
644 let out1im = x16im_a + x16im_b;
645 let out2re = x25re_a - x25re_b;
646 let out2im = x25im_a + x25im_b;
647 let out3re = x34re_a - x34re_b;
648 let out3im = x34im_a - x34im_b;
649 let out4re = x34re_a + x34re_b;
650 let out4im = x34im_a + x34im_b;
651 let out5re = x25re_a + x25re_b;
652 let out5im = x25im_a - x25im_b;
653 let out6re = x16re_a + x16re_b;
654 let out6im = x16im_a - x16im_b;
655
656 buffer.store(sum, 0);
657 buffer.store(
658 Complex {
659 re: out1re,
660 im: out1im,
661 },
662 1,
663 );
664 buffer.store(
665 Complex {
666 re: out2re,
667 im: out2im,
668 },
669 2,
670 );
671 buffer.store(
672 Complex {
673 re: out3re,
674 im: out3im,
675 },
676 3,
677 );
678 buffer.store(
679 Complex {
680 re: out4re,
681 im: out4im,
682 },
683 4,
684 );
685 buffer.store(
686 Complex {
687 re: out5re,
688 im: out5im,
689 },
690 5,
691 );
692 buffer.store(
693 Complex {
694 re: out6re,
695 im: out6im,
696 },
697 6,
698 );
699 }
700}
701
702pub struct Butterfly8<T> {
703 root2: T,
704 direction: FftDirection,
705}
706boilerplate_fft_butterfly!(Butterfly8, 8, |this: &Butterfly8<_>| this.direction);
707impl<T: FftNum> Butterfly8<T> {
708 #[inline(always)]
709 pub fn new(direction: FftDirection) -> Self {
710 Self {
711 root2: T::from_f64(0.5f64.sqrt()).unwrap(),
712 direction,
713 }
714 }
715
716 #[inline(always)]
717 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
718 let butterfly4 = Butterfly4::new(self.direction);
719
720 let mut scratch0 = [
725 buffer.load(0),
726 buffer.load(2),
727 buffer.load(4),
728 buffer.load(6),
729 ];
730 let mut scratch1 = [
731 buffer.load(1),
732 buffer.load(3),
733 buffer.load(5),
734 buffer.load(7),
735 ];
736
737 butterfly4.perform_fft_contiguous(&mut scratch0);
739 butterfly4.perform_fft_contiguous(&mut scratch1);
740
741 scratch1[1] = (twiddles::rotate_90(scratch1[1], self.direction) + scratch1[1]) * self.root2;
743 scratch1[2] = twiddles::rotate_90(scratch1[2], self.direction);
744 scratch1[3] = (twiddles::rotate_90(scratch1[3], self.direction) - scratch1[3]) * self.root2;
745
746 for i in 0..4 {
750 Butterfly2::perform_fft_strided(&mut scratch0[i], &mut scratch1[i]);
751 }
752
753 for i in 0..4 {
755 buffer.store(scratch0[i], i);
756 }
757 for i in 0..4 {
758 buffer.store(scratch1[i], i + 4);
759 }
760 }
761}
762
763pub struct Butterfly9<T> {
764 butterfly3: Butterfly3<T>,
765 twiddle1: Complex<T>,
766 twiddle2: Complex<T>,
767 twiddle4: Complex<T>,
768}
769boilerplate_fft_butterfly!(Butterfly9, 9, |this: &Butterfly9<_>| this
770 .butterfly3
771 .fft_direction());
772impl<T: FftNum> Butterfly9<T> {
773 #[inline(always)]
774 pub fn new(direction: FftDirection) -> Self {
775 Self {
776 butterfly3: Butterfly3::new(direction),
777 twiddle1: twiddles::compute_twiddle(1, 9, direction),
778 twiddle2: twiddles::compute_twiddle(2, 9, direction),
779 twiddle4: twiddles::compute_twiddle(4, 9, direction),
780 }
781 }
782 #[inline(always)]
783 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
784 let mut scratch0 = [buffer.load(0), buffer.load(3), buffer.load(6)];
788 let mut scratch1 = [buffer.load(1), buffer.load(4), buffer.load(7)];
789 let mut scratch2 = [buffer.load(2), buffer.load(5), buffer.load(8)];
790
791 self.butterfly3.perform_fft_contiguous(&mut scratch0);
793 self.butterfly3.perform_fft_contiguous(&mut scratch1);
794 self.butterfly3.perform_fft_contiguous(&mut scratch2);
795
796 scratch1[1] = scratch1[1] * self.twiddle1;
798 scratch1[2] = scratch1[2] * self.twiddle2;
799 scratch2[1] = scratch2[1] * self.twiddle2;
800 scratch2[2] = scratch2[2] * self.twiddle4;
801
802 self.butterfly3
806 .perform_fft_strided(&mut scratch0[0], &mut scratch1[0], &mut scratch2[0]);
807 self.butterfly3
808 .perform_fft_strided(&mut scratch0[1], &mut scratch1[1], &mut scratch2[1]);
809 self.butterfly3
810 .perform_fft_strided(&mut scratch0[2], &mut scratch1[2], &mut scratch2[2]);
811
812 buffer.store(scratch0[0], 0);
814 buffer.store(scratch0[1], 1);
815 buffer.store(scratch0[2], 2);
816 buffer.store(scratch1[0], 3);
817 buffer.store(scratch1[1], 4);
818 buffer.store(scratch1[2], 5);
819 buffer.store(scratch2[0], 6);
820 buffer.store(scratch2[1], 7);
821 buffer.store(scratch2[2], 8);
822 }
823}
824
825pub struct Butterfly11<T> {
826 twiddle1: Complex<T>,
827 twiddle2: Complex<T>,
828 twiddle3: Complex<T>,
829 twiddle4: Complex<T>,
830 twiddle5: Complex<T>,
831 direction: FftDirection,
832}
833boilerplate_fft_butterfly!(Butterfly11, 11, |this: &Butterfly11<_>| this.direction);
834impl<T: FftNum> Butterfly11<T> {
835 pub fn new(direction: FftDirection) -> Self {
836 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 11, direction);
837 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 11, direction);
838 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 11, direction);
839 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 11, direction);
840 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 11, direction);
841 Self {
842 twiddle1,
843 twiddle2,
844 twiddle3,
845 twiddle4,
846 twiddle5,
847 direction,
848 }
849 }
850
851 #[inline(never)]
852 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
853 let x110p = buffer.load(1) + buffer.load(10);
858 let x110n = buffer.load(1) - buffer.load(10);
859 let x29p = buffer.load(2) + buffer.load(9);
860 let x29n = buffer.load(2) - buffer.load(9);
861 let x38p = buffer.load(3) + buffer.load(8);
862 let x38n = buffer.load(3) - buffer.load(8);
863 let x47p = buffer.load(4) + buffer.load(7);
864 let x47n = buffer.load(4) - buffer.load(7);
865 let x56p = buffer.load(5) + buffer.load(6);
866 let x56n = buffer.load(5) - buffer.load(6);
867 let sum = buffer.load(0) + x110p + x29p + x38p + x47p + x56p;
868 let b110re_a = buffer.load(0).re
869 + self.twiddle1.re * x110p.re
870 + self.twiddle2.re * x29p.re
871 + self.twiddle3.re * x38p.re
872 + self.twiddle4.re * x47p.re
873 + self.twiddle5.re * x56p.re;
874 let b110re_b = self.twiddle1.im * x110n.im
875 + self.twiddle2.im * x29n.im
876 + self.twiddle3.im * x38n.im
877 + self.twiddle4.im * x47n.im
878 + self.twiddle5.im * x56n.im;
879 let b29re_a = buffer.load(0).re
880 + self.twiddle2.re * x110p.re
881 + self.twiddle4.re * x29p.re
882 + self.twiddle5.re * x38p.re
883 + self.twiddle3.re * x47p.re
884 + self.twiddle1.re * x56p.re;
885 let b29re_b = self.twiddle2.im * x110n.im
886 + self.twiddle4.im * x29n.im
887 + -self.twiddle5.im * x38n.im
888 + -self.twiddle3.im * x47n.im
889 + -self.twiddle1.im * x56n.im;
890 let b38re_a = buffer.load(0).re
891 + self.twiddle3.re * x110p.re
892 + self.twiddle5.re * x29p.re
893 + self.twiddle2.re * x38p.re
894 + self.twiddle1.re * x47p.re
895 + self.twiddle4.re * x56p.re;
896 let b38re_b = self.twiddle3.im * x110n.im
897 + -self.twiddle5.im * x29n.im
898 + -self.twiddle2.im * x38n.im
899 + self.twiddle1.im * x47n.im
900 + self.twiddle4.im * x56n.im;
901 let b47re_a = buffer.load(0).re
902 + self.twiddle4.re * x110p.re
903 + self.twiddle3.re * x29p.re
904 + self.twiddle1.re * x38p.re
905 + self.twiddle5.re * x47p.re
906 + self.twiddle2.re * x56p.re;
907 let b47re_b = self.twiddle4.im * x110n.im
908 + -self.twiddle3.im * x29n.im
909 + self.twiddle1.im * x38n.im
910 + self.twiddle5.im * x47n.im
911 + -self.twiddle2.im * x56n.im;
912 let b56re_a = buffer.load(0).re
913 + self.twiddle5.re * x110p.re
914 + self.twiddle1.re * x29p.re
915 + self.twiddle4.re * x38p.re
916 + self.twiddle2.re * x47p.re
917 + self.twiddle3.re * x56p.re;
918 let b56re_b = self.twiddle5.im * x110n.im
919 + -self.twiddle1.im * x29n.im
920 + self.twiddle4.im * x38n.im
921 + -self.twiddle2.im * x47n.im
922 + self.twiddle3.im * x56n.im;
923
924 let b110im_a = buffer.load(0).im
925 + self.twiddle1.re * x110p.im
926 + self.twiddle2.re * x29p.im
927 + self.twiddle3.re * x38p.im
928 + self.twiddle4.re * x47p.im
929 + self.twiddle5.re * x56p.im;
930 let b110im_b = self.twiddle1.im * x110n.re
931 + self.twiddle2.im * x29n.re
932 + self.twiddle3.im * x38n.re
933 + self.twiddle4.im * x47n.re
934 + self.twiddle5.im * x56n.re;
935 let b29im_a = buffer.load(0).im
936 + self.twiddle2.re * x110p.im
937 + self.twiddle4.re * x29p.im
938 + self.twiddle5.re * x38p.im
939 + self.twiddle3.re * x47p.im
940 + self.twiddle1.re * x56p.im;
941 let b29im_b = self.twiddle2.im * x110n.re
942 + self.twiddle4.im * x29n.re
943 + -self.twiddle5.im * x38n.re
944 + -self.twiddle3.im * x47n.re
945 + -self.twiddle1.im * x56n.re;
946 let b38im_a = buffer.load(0).im
947 + self.twiddle3.re * x110p.im
948 + self.twiddle5.re * x29p.im
949 + self.twiddle2.re * x38p.im
950 + self.twiddle1.re * x47p.im
951 + self.twiddle4.re * x56p.im;
952 let b38im_b = self.twiddle3.im * x110n.re
953 + -self.twiddle5.im * x29n.re
954 + -self.twiddle2.im * x38n.re
955 + self.twiddle1.im * x47n.re
956 + self.twiddle4.im * x56n.re;
957 let b47im_a = buffer.load(0).im
958 + self.twiddle4.re * x110p.im
959 + self.twiddle3.re * x29p.im
960 + self.twiddle1.re * x38p.im
961 + self.twiddle5.re * x47p.im
962 + self.twiddle2.re * x56p.im;
963 let b47im_b = self.twiddle4.im * x110n.re
964 + -self.twiddle3.im * x29n.re
965 + self.twiddle1.im * x38n.re
966 + self.twiddle5.im * x47n.re
967 + -self.twiddle2.im * x56n.re;
968 let b56im_a = buffer.load(0).im
969 + self.twiddle5.re * x110p.im
970 + self.twiddle1.re * x29p.im
971 + self.twiddle4.re * x38p.im
972 + self.twiddle2.re * x47p.im
973 + self.twiddle3.re * x56p.im;
974 let b56im_b = self.twiddle5.im * x110n.re
975 + -self.twiddle1.im * x29n.re
976 + self.twiddle4.im * x38n.re
977 + -self.twiddle2.im * x47n.re
978 + self.twiddle3.im * x56n.re;
979
980 let out1re = b110re_a - b110re_b;
981 let out1im = b110im_a + b110im_b;
982 let out2re = b29re_a - b29re_b;
983 let out2im = b29im_a + b29im_b;
984 let out3re = b38re_a - b38re_b;
985 let out3im = b38im_a + b38im_b;
986 let out4re = b47re_a - b47re_b;
987 let out4im = b47im_a + b47im_b;
988 let out5re = b56re_a - b56re_b;
989 let out5im = b56im_a + b56im_b;
990 let out6re = b56re_a + b56re_b;
991 let out6im = b56im_a - b56im_b;
992 let out7re = b47re_a + b47re_b;
993 let out7im = b47im_a - b47im_b;
994 let out8re = b38re_a + b38re_b;
995 let out8im = b38im_a - b38im_b;
996 let out9re = b29re_a + b29re_b;
997 let out9im = b29im_a - b29im_b;
998 let out10re = b110re_a + b110re_b;
999 let out10im = b110im_a - b110im_b;
1000 buffer.store(sum, 0);
1001 buffer.store(
1002 Complex {
1003 re: out1re,
1004 im: out1im,
1005 },
1006 1,
1007 );
1008 buffer.store(
1009 Complex {
1010 re: out2re,
1011 im: out2im,
1012 },
1013 2,
1014 );
1015 buffer.store(
1016 Complex {
1017 re: out3re,
1018 im: out3im,
1019 },
1020 3,
1021 );
1022 buffer.store(
1023 Complex {
1024 re: out4re,
1025 im: out4im,
1026 },
1027 4,
1028 );
1029 buffer.store(
1030 Complex {
1031 re: out5re,
1032 im: out5im,
1033 },
1034 5,
1035 );
1036 buffer.store(
1037 Complex {
1038 re: out6re,
1039 im: out6im,
1040 },
1041 6,
1042 );
1043 buffer.store(
1044 Complex {
1045 re: out7re,
1046 im: out7im,
1047 },
1048 7,
1049 );
1050 buffer.store(
1051 Complex {
1052 re: out8re,
1053 im: out8im,
1054 },
1055 8,
1056 );
1057 buffer.store(
1058 Complex {
1059 re: out9re,
1060 im: out9im,
1061 },
1062 9,
1063 );
1064 buffer.store(
1065 Complex {
1066 re: out10re,
1067 im: out10im,
1068 },
1069 10,
1070 );
1071 }
1072}
1073
1074pub struct Butterfly12<T> {
1075 butterfly3: Butterfly3<T>,
1076 butterfly4: Butterfly4<T>,
1077}
1078boilerplate_fft_butterfly!(Butterfly12, 12, |this: &Butterfly12<_>| this
1079 .butterfly3
1080 .fft_direction());
1081impl<T: FftNum> Butterfly12<T> {
1082 #[inline(always)]
1083 pub fn new(direction: FftDirection) -> Self {
1084 Self {
1085 butterfly3: Butterfly3::new(direction),
1086 butterfly4: Butterfly4::new(direction),
1087 }
1088 }
1089 #[inline(always)]
1090 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1091 let mut scratch0 = [
1096 buffer.load(0),
1097 buffer.load(3),
1098 buffer.load(6),
1099 buffer.load(9),
1100 ];
1101 let mut scratch1 = [
1102 buffer.load(4),
1103 buffer.load(7),
1104 buffer.load(10),
1105 buffer.load(1),
1106 ];
1107 let mut scratch2 = [
1108 buffer.load(8),
1109 buffer.load(11),
1110 buffer.load(2),
1111 buffer.load(5),
1112 ];
1113
1114 self.butterfly4.perform_fft_contiguous(&mut scratch0);
1116 self.butterfly4.perform_fft_contiguous(&mut scratch1);
1117 self.butterfly4.perform_fft_contiguous(&mut scratch2);
1118
1119 self.butterfly3
1125 .perform_fft_strided(&mut scratch0[0], &mut scratch1[0], &mut scratch2[0]);
1126 self.butterfly3
1127 .perform_fft_strided(&mut scratch0[1], &mut scratch1[1], &mut scratch2[1]);
1128 self.butterfly3
1129 .perform_fft_strided(&mut scratch0[2], &mut scratch1[2], &mut scratch2[2]);
1130 self.butterfly3
1131 .perform_fft_strided(&mut scratch0[3], &mut scratch1[3], &mut scratch2[3]);
1132
1133 buffer.store(scratch0[0], 0);
1137 buffer.store(scratch1[1], 1);
1138 buffer.store(scratch2[2], 2);
1139 buffer.store(scratch0[3], 3);
1140 buffer.store(scratch1[0], 4);
1141 buffer.store(scratch2[1], 5);
1142 buffer.store(scratch0[2], 6);
1143 buffer.store(scratch1[3], 7);
1144 buffer.store(scratch2[0], 8);
1145 buffer.store(scratch0[1], 9);
1146 buffer.store(scratch1[2], 10);
1147 buffer.store(scratch2[3], 11);
1148 }
1149}
1150
1151pub struct Butterfly13<T> {
1152 twiddle1: Complex<T>,
1153 twiddle2: Complex<T>,
1154 twiddle3: Complex<T>,
1155 twiddle4: Complex<T>,
1156 twiddle5: Complex<T>,
1157 twiddle6: Complex<T>,
1158 direction: FftDirection,
1159}
1160boilerplate_fft_butterfly!(Butterfly13, 13, |this: &Butterfly13<_>| this.direction);
1161impl<T: FftNum> Butterfly13<T> {
1162 pub fn new(direction: FftDirection) -> Self {
1163 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 13, direction);
1164 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 13, direction);
1165 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 13, direction);
1166 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 13, direction);
1167 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 13, direction);
1168 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 13, direction);
1169 Self {
1170 twiddle1,
1171 twiddle2,
1172 twiddle3,
1173 twiddle4,
1174 twiddle5,
1175 twiddle6,
1176 direction,
1177 }
1178 }
1179
1180 #[inline(never)]
1181 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1182 let x112p = buffer.load(1) + buffer.load(12);
1186 let x112n = buffer.load(1) - buffer.load(12);
1187 let x211p = buffer.load(2) + buffer.load(11);
1188 let x211n = buffer.load(2) - buffer.load(11);
1189 let x310p = buffer.load(3) + buffer.load(10);
1190 let x310n = buffer.load(3) - buffer.load(10);
1191 let x49p = buffer.load(4) + buffer.load(9);
1192 let x49n = buffer.load(4) - buffer.load(9);
1193 let x58p = buffer.load(5) + buffer.load(8);
1194 let x58n = buffer.load(5) - buffer.load(8);
1195 let x67p = buffer.load(6) + buffer.load(7);
1196 let x67n = buffer.load(6) - buffer.load(7);
1197 let sum = buffer.load(0) + x112p + x211p + x310p + x49p + x58p + x67p;
1198 let b112re_a = buffer.load(0).re
1199 + self.twiddle1.re * x112p.re
1200 + self.twiddle2.re * x211p.re
1201 + self.twiddle3.re * x310p.re
1202 + self.twiddle4.re * x49p.re
1203 + self.twiddle5.re * x58p.re
1204 + self.twiddle6.re * x67p.re;
1205 let b112re_b = self.twiddle1.im * x112n.im
1206 + self.twiddle2.im * x211n.im
1207 + self.twiddle3.im * x310n.im
1208 + self.twiddle4.im * x49n.im
1209 + self.twiddle5.im * x58n.im
1210 + self.twiddle6.im * x67n.im;
1211 let b211re_a = buffer.load(0).re
1212 + self.twiddle2.re * x112p.re
1213 + self.twiddle4.re * x211p.re
1214 + self.twiddle6.re * x310p.re
1215 + self.twiddle5.re * x49p.re
1216 + self.twiddle3.re * x58p.re
1217 + self.twiddle1.re * x67p.re;
1218 let b211re_b = self.twiddle2.im * x112n.im
1219 + self.twiddle4.im * x211n.im
1220 + self.twiddle6.im * x310n.im
1221 + -self.twiddle5.im * x49n.im
1222 + -self.twiddle3.im * x58n.im
1223 + -self.twiddle1.im * x67n.im;
1224 let b310re_a = buffer.load(0).re
1225 + self.twiddle3.re * x112p.re
1226 + self.twiddle6.re * x211p.re
1227 + self.twiddle4.re * x310p.re
1228 + self.twiddle1.re * x49p.re
1229 + self.twiddle2.re * x58p.re
1230 + self.twiddle5.re * x67p.re;
1231 let b310re_b = self.twiddle3.im * x112n.im
1232 + self.twiddle6.im * x211n.im
1233 + -self.twiddle4.im * x310n.im
1234 + -self.twiddle1.im * x49n.im
1235 + self.twiddle2.im * x58n.im
1236 + self.twiddle5.im * x67n.im;
1237 let b49re_a = buffer.load(0).re
1238 + self.twiddle4.re * x112p.re
1239 + self.twiddle5.re * x211p.re
1240 + self.twiddle1.re * x310p.re
1241 + self.twiddle3.re * x49p.re
1242 + self.twiddle6.re * x58p.re
1243 + self.twiddle2.re * x67p.re;
1244 let b49re_b = self.twiddle4.im * x112n.im
1245 + -self.twiddle5.im * x211n.im
1246 + -self.twiddle1.im * x310n.im
1247 + self.twiddle3.im * x49n.im
1248 + -self.twiddle6.im * x58n.im
1249 + -self.twiddle2.im * x67n.im;
1250 let b58re_a = buffer.load(0).re
1251 + self.twiddle5.re * x112p.re
1252 + self.twiddle3.re * x211p.re
1253 + self.twiddle2.re * x310p.re
1254 + self.twiddle6.re * x49p.re
1255 + self.twiddle1.re * x58p.re
1256 + self.twiddle4.re * x67p.re;
1257 let b58re_b = self.twiddle5.im * x112n.im
1258 + -self.twiddle3.im * x211n.im
1259 + self.twiddle2.im * x310n.im
1260 + -self.twiddle6.im * x49n.im
1261 + -self.twiddle1.im * x58n.im
1262 + self.twiddle4.im * x67n.im;
1263 let b67re_a = buffer.load(0).re
1264 + self.twiddle6.re * x112p.re
1265 + self.twiddle1.re * x211p.re
1266 + self.twiddle5.re * x310p.re
1267 + self.twiddle2.re * x49p.re
1268 + self.twiddle4.re * x58p.re
1269 + self.twiddle3.re * x67p.re;
1270 let b67re_b = self.twiddle6.im * x112n.im
1271 + -self.twiddle1.im * x211n.im
1272 + self.twiddle5.im * x310n.im
1273 + -self.twiddle2.im * x49n.im
1274 + self.twiddle4.im * x58n.im
1275 + -self.twiddle3.im * x67n.im;
1276
1277 let b112im_a = buffer.load(0).im
1278 + self.twiddle1.re * x112p.im
1279 + self.twiddle2.re * x211p.im
1280 + self.twiddle3.re * x310p.im
1281 + self.twiddle4.re * x49p.im
1282 + self.twiddle5.re * x58p.im
1283 + self.twiddle6.re * x67p.im;
1284 let b112im_b = self.twiddle1.im * x112n.re
1285 + self.twiddle2.im * x211n.re
1286 + self.twiddle3.im * x310n.re
1287 + self.twiddle4.im * x49n.re
1288 + self.twiddle5.im * x58n.re
1289 + self.twiddle6.im * x67n.re;
1290 let b211im_a = buffer.load(0).im
1291 + self.twiddle2.re * x112p.im
1292 + self.twiddle4.re * x211p.im
1293 + self.twiddle6.re * x310p.im
1294 + self.twiddle5.re * x49p.im
1295 + self.twiddle3.re * x58p.im
1296 + self.twiddle1.re * x67p.im;
1297 let b211im_b = self.twiddle2.im * x112n.re
1298 + self.twiddle4.im * x211n.re
1299 + self.twiddle6.im * x310n.re
1300 + -self.twiddle5.im * x49n.re
1301 + -self.twiddle3.im * x58n.re
1302 + -self.twiddle1.im * x67n.re;
1303 let b310im_a = buffer.load(0).im
1304 + self.twiddle3.re * x112p.im
1305 + self.twiddle6.re * x211p.im
1306 + self.twiddle4.re * x310p.im
1307 + self.twiddle1.re * x49p.im
1308 + self.twiddle2.re * x58p.im
1309 + self.twiddle5.re * x67p.im;
1310 let b310im_b = self.twiddle3.im * x112n.re
1311 + self.twiddle6.im * x211n.re
1312 + -self.twiddle4.im * x310n.re
1313 + -self.twiddle1.im * x49n.re
1314 + self.twiddle2.im * x58n.re
1315 + self.twiddle5.im * x67n.re;
1316 let b49im_a = buffer.load(0).im
1317 + self.twiddle4.re * x112p.im
1318 + self.twiddle5.re * x211p.im
1319 + self.twiddle1.re * x310p.im
1320 + self.twiddle3.re * x49p.im
1321 + self.twiddle6.re * x58p.im
1322 + self.twiddle2.re * x67p.im;
1323 let b49im_b = self.twiddle4.im * x112n.re
1324 + -self.twiddle5.im * x211n.re
1325 + -self.twiddle1.im * x310n.re
1326 + self.twiddle3.im * x49n.re
1327 + -self.twiddle6.im * x58n.re
1328 + -self.twiddle2.im * x67n.re;
1329 let b58im_a = buffer.load(0).im
1330 + self.twiddle5.re * x112p.im
1331 + self.twiddle3.re * x211p.im
1332 + self.twiddle2.re * x310p.im
1333 + self.twiddle6.re * x49p.im
1334 + self.twiddle1.re * x58p.im
1335 + self.twiddle4.re * x67p.im;
1336 let b58im_b = self.twiddle5.im * x112n.re
1337 + -self.twiddle3.im * x211n.re
1338 + self.twiddle2.im * x310n.re
1339 + -self.twiddle6.im * x49n.re
1340 + -self.twiddle1.im * x58n.re
1341 + self.twiddle4.im * x67n.re;
1342 let b67im_a = buffer.load(0).im
1343 + self.twiddle6.re * x112p.im
1344 + self.twiddle1.re * x211p.im
1345 + self.twiddle5.re * x310p.im
1346 + self.twiddle2.re * x49p.im
1347 + self.twiddle4.re * x58p.im
1348 + self.twiddle3.re * x67p.im;
1349 let b67im_b = self.twiddle6.im * x112n.re
1350 + -self.twiddle1.im * x211n.re
1351 + self.twiddle5.im * x310n.re
1352 + -self.twiddle2.im * x49n.re
1353 + self.twiddle4.im * x58n.re
1354 + -self.twiddle3.im * x67n.re;
1355
1356 let out1re = b112re_a - b112re_b;
1357 let out1im = b112im_a + b112im_b;
1358 let out2re = b211re_a - b211re_b;
1359 let out2im = b211im_a + b211im_b;
1360 let out3re = b310re_a - b310re_b;
1361 let out3im = b310im_a + b310im_b;
1362 let out4re = b49re_a - b49re_b;
1363 let out4im = b49im_a + b49im_b;
1364 let out5re = b58re_a - b58re_b;
1365 let out5im = b58im_a + b58im_b;
1366 let out6re = b67re_a - b67re_b;
1367 let out6im = b67im_a + b67im_b;
1368 let out7re = b67re_a + b67re_b;
1369 let out7im = b67im_a - b67im_b;
1370 let out8re = b58re_a + b58re_b;
1371 let out8im = b58im_a - b58im_b;
1372 let out9re = b49re_a + b49re_b;
1373 let out9im = b49im_a - b49im_b;
1374 let out10re = b310re_a + b310re_b;
1375 let out10im = b310im_a - b310im_b;
1376 let out11re = b211re_a + b211re_b;
1377 let out11im = b211im_a - b211im_b;
1378 let out12re = b112re_a + b112re_b;
1379 let out12im = b112im_a - b112im_b;
1380 buffer.store(sum, 0);
1381 buffer.store(
1382 Complex {
1383 re: out1re,
1384 im: out1im,
1385 },
1386 1,
1387 );
1388 buffer.store(
1389 Complex {
1390 re: out2re,
1391 im: out2im,
1392 },
1393 2,
1394 );
1395 buffer.store(
1396 Complex {
1397 re: out3re,
1398 im: out3im,
1399 },
1400 3,
1401 );
1402 buffer.store(
1403 Complex {
1404 re: out4re,
1405 im: out4im,
1406 },
1407 4,
1408 );
1409 buffer.store(
1410 Complex {
1411 re: out5re,
1412 im: out5im,
1413 },
1414 5,
1415 );
1416 buffer.store(
1417 Complex {
1418 re: out6re,
1419 im: out6im,
1420 },
1421 6,
1422 );
1423 buffer.store(
1424 Complex {
1425 re: out7re,
1426 im: out7im,
1427 },
1428 7,
1429 );
1430 buffer.store(
1431 Complex {
1432 re: out8re,
1433 im: out8im,
1434 },
1435 8,
1436 );
1437 buffer.store(
1438 Complex {
1439 re: out9re,
1440 im: out9im,
1441 },
1442 9,
1443 );
1444 buffer.store(
1445 Complex {
1446 re: out10re,
1447 im: out10im,
1448 },
1449 10,
1450 );
1451 buffer.store(
1452 Complex {
1453 re: out11re,
1454 im: out11im,
1455 },
1456 11,
1457 );
1458 buffer.store(
1459 Complex {
1460 re: out12re,
1461 im: out12im,
1462 },
1463 12,
1464 );
1465 }
1466}
1467
1468pub struct Butterfly16<T> {
1469 butterfly8: Butterfly8<T>,
1470 twiddle1: Complex<T>,
1471 twiddle2: Complex<T>,
1472 twiddle3: Complex<T>,
1473}
1474boilerplate_fft_butterfly!(Butterfly16, 16, |this: &Butterfly16<_>| this
1475 .butterfly8
1476 .fft_direction());
1477impl<T: FftNum> Butterfly16<T> {
1478 #[inline(always)]
1479 pub fn new(direction: FftDirection) -> Self {
1480 Self {
1481 butterfly8: Butterfly8::new(direction),
1482 twiddle1: twiddles::compute_twiddle(1, 16, direction),
1483 twiddle2: twiddles::compute_twiddle(2, 16, direction),
1484 twiddle3: twiddles::compute_twiddle(3, 16, direction),
1485 }
1486 }
1487
1488 #[inline(never)]
1489 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1490 let butterfly4 = Butterfly4::new(self.fft_direction());
1491
1492 let mut scratch_evens = [
1495 buffer.load(0),
1496 buffer.load(2),
1497 buffer.load(4),
1498 buffer.load(6),
1499 buffer.load(8),
1500 buffer.load(10),
1501 buffer.load(12),
1502 buffer.load(14),
1503 ];
1504
1505 let mut scratch_odds_n1 = [
1506 buffer.load(1),
1507 buffer.load(5),
1508 buffer.load(9),
1509 buffer.load(13),
1510 ];
1511 let mut scratch_odds_n3 = [
1512 buffer.load(15),
1513 buffer.load(3),
1514 buffer.load(7),
1515 buffer.load(11),
1516 ];
1517
1518 self.butterfly8.perform_fft_contiguous(&mut scratch_evens);
1520 butterfly4.perform_fft_contiguous(&mut scratch_odds_n1);
1521 butterfly4.perform_fft_contiguous(&mut scratch_odds_n3);
1522
1523 scratch_odds_n1[1] = scratch_odds_n1[1] * self.twiddle1;
1525 scratch_odds_n3[1] = scratch_odds_n3[1] * self.twiddle1.conj();
1526
1527 scratch_odds_n1[2] = scratch_odds_n1[2] * self.twiddle2;
1528 scratch_odds_n3[2] = scratch_odds_n3[2] * self.twiddle2.conj();
1529
1530 scratch_odds_n1[3] = scratch_odds_n1[3] * self.twiddle3;
1531 scratch_odds_n3[3] = scratch_odds_n3[3] * self.twiddle3.conj();
1532
1533 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[0], &mut scratch_odds_n3[0]);
1535 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[1], &mut scratch_odds_n3[1]);
1536 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[2], &mut scratch_odds_n3[2]);
1537 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[3], &mut scratch_odds_n3[3]);
1538
1539 scratch_odds_n3[0] = twiddles::rotate_90(scratch_odds_n3[0], self.fft_direction());
1541 scratch_odds_n3[1] = twiddles::rotate_90(scratch_odds_n3[1], self.fft_direction());
1542 scratch_odds_n3[2] = twiddles::rotate_90(scratch_odds_n3[2], self.fft_direction());
1543 scratch_odds_n3[3] = twiddles::rotate_90(scratch_odds_n3[3], self.fft_direction());
1544
1545 buffer.store(scratch_evens[0] + scratch_odds_n1[0], 0);
1547 buffer.store(scratch_evens[1] + scratch_odds_n1[1], 1);
1548 buffer.store(scratch_evens[2] + scratch_odds_n1[2], 2);
1549 buffer.store(scratch_evens[3] + scratch_odds_n1[3], 3);
1550 buffer.store(scratch_evens[4] + scratch_odds_n3[0], 4);
1551 buffer.store(scratch_evens[5] + scratch_odds_n3[1], 5);
1552 buffer.store(scratch_evens[6] + scratch_odds_n3[2], 6);
1553 buffer.store(scratch_evens[7] + scratch_odds_n3[3], 7);
1554 buffer.store(scratch_evens[0] - scratch_odds_n1[0], 8);
1555 buffer.store(scratch_evens[1] - scratch_odds_n1[1], 9);
1556 buffer.store(scratch_evens[2] - scratch_odds_n1[2], 10);
1557 buffer.store(scratch_evens[3] - scratch_odds_n1[3], 11);
1558 buffer.store(scratch_evens[4] - scratch_odds_n3[0], 12);
1559 buffer.store(scratch_evens[5] - scratch_odds_n3[1], 13);
1560 buffer.store(scratch_evens[6] - scratch_odds_n3[2], 14);
1561 buffer.store(scratch_evens[7] - scratch_odds_n3[3], 15);
1562 }
1563}
1564
1565pub struct Butterfly17<T> {
1566 twiddle1: Complex<T>,
1567 twiddle2: Complex<T>,
1568 twiddle3: Complex<T>,
1569 twiddle4: Complex<T>,
1570 twiddle5: Complex<T>,
1571 twiddle6: Complex<T>,
1572 twiddle7: Complex<T>,
1573 twiddle8: Complex<T>,
1574 direction: FftDirection,
1575}
1576boilerplate_fft_butterfly!(Butterfly17, 17, |this: &Butterfly17<_>| this.direction);
1577impl<T: FftNum> Butterfly17<T> {
1578 pub fn new(direction: FftDirection) -> Self {
1579 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 17, direction);
1580 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 17, direction);
1581 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 17, direction);
1582 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 17, direction);
1583 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 17, direction);
1584 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 17, direction);
1585 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 17, direction);
1586 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 17, direction);
1587 Self {
1588 twiddle1,
1589 twiddle2,
1590 twiddle3,
1591 twiddle4,
1592 twiddle5,
1593 twiddle6,
1594 twiddle7,
1595 twiddle8,
1596 direction,
1597 }
1598 }
1599
1600 #[inline(never)]
1601 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1602 let x116p = buffer.load(1) + buffer.load(16);
1606 let x116n = buffer.load(1) - buffer.load(16);
1607 let x215p = buffer.load(2) + buffer.load(15);
1608 let x215n = buffer.load(2) - buffer.load(15);
1609 let x314p = buffer.load(3) + buffer.load(14);
1610 let x314n = buffer.load(3) - buffer.load(14);
1611 let x413p = buffer.load(4) + buffer.load(13);
1612 let x413n = buffer.load(4) - buffer.load(13);
1613 let x512p = buffer.load(5) + buffer.load(12);
1614 let x512n = buffer.load(5) - buffer.load(12);
1615 let x611p = buffer.load(6) + buffer.load(11);
1616 let x611n = buffer.load(6) - buffer.load(11);
1617 let x710p = buffer.load(7) + buffer.load(10);
1618 let x710n = buffer.load(7) - buffer.load(10);
1619 let x89p = buffer.load(8) + buffer.load(9);
1620 let x89n = buffer.load(8) - buffer.load(9);
1621 let sum = buffer.load(0) + x116p + x215p + x314p + x413p + x512p + x611p + x710p + x89p;
1622 let b116re_a = buffer.load(0).re
1623 + self.twiddle1.re * x116p.re
1624 + self.twiddle2.re * x215p.re
1625 + self.twiddle3.re * x314p.re
1626 + self.twiddle4.re * x413p.re
1627 + self.twiddle5.re * x512p.re
1628 + self.twiddle6.re * x611p.re
1629 + self.twiddle7.re * x710p.re
1630 + self.twiddle8.re * x89p.re;
1631 let b116re_b = self.twiddle1.im * x116n.im
1632 + self.twiddle2.im * x215n.im
1633 + self.twiddle3.im * x314n.im
1634 + self.twiddle4.im * x413n.im
1635 + self.twiddle5.im * x512n.im
1636 + self.twiddle6.im * x611n.im
1637 + self.twiddle7.im * x710n.im
1638 + self.twiddle8.im * x89n.im;
1639 let b215re_a = buffer.load(0).re
1640 + self.twiddle2.re * x116p.re
1641 + self.twiddle4.re * x215p.re
1642 + self.twiddle6.re * x314p.re
1643 + self.twiddle8.re * x413p.re
1644 + self.twiddle7.re * x512p.re
1645 + self.twiddle5.re * x611p.re
1646 + self.twiddle3.re * x710p.re
1647 + self.twiddle1.re * x89p.re;
1648 let b215re_b = self.twiddle2.im * x116n.im
1649 + self.twiddle4.im * x215n.im
1650 + self.twiddle6.im * x314n.im
1651 + self.twiddle8.im * x413n.im
1652 + -self.twiddle7.im * x512n.im
1653 + -self.twiddle5.im * x611n.im
1654 + -self.twiddle3.im * x710n.im
1655 + -self.twiddle1.im * x89n.im;
1656 let b314re_a = buffer.load(0).re
1657 + self.twiddle3.re * x116p.re
1658 + self.twiddle6.re * x215p.re
1659 + self.twiddle8.re * x314p.re
1660 + self.twiddle5.re * x413p.re
1661 + self.twiddle2.re * x512p.re
1662 + self.twiddle1.re * x611p.re
1663 + self.twiddle4.re * x710p.re
1664 + self.twiddle7.re * x89p.re;
1665 let b314re_b = self.twiddle3.im * x116n.im
1666 + self.twiddle6.im * x215n.im
1667 + -self.twiddle8.im * x314n.im
1668 + -self.twiddle5.im * x413n.im
1669 + -self.twiddle2.im * x512n.im
1670 + self.twiddle1.im * x611n.im
1671 + self.twiddle4.im * x710n.im
1672 + self.twiddle7.im * x89n.im;
1673 let b413re_a = buffer.load(0).re
1674 + self.twiddle4.re * x116p.re
1675 + self.twiddle8.re * x215p.re
1676 + self.twiddle5.re * x314p.re
1677 + self.twiddle1.re * x413p.re
1678 + self.twiddle3.re * x512p.re
1679 + self.twiddle7.re * x611p.re
1680 + self.twiddle6.re * x710p.re
1681 + self.twiddle2.re * x89p.re;
1682 let b413re_b = self.twiddle4.im * x116n.im
1683 + self.twiddle8.im * x215n.im
1684 + -self.twiddle5.im * x314n.im
1685 + -self.twiddle1.im * x413n.im
1686 + self.twiddle3.im * x512n.im
1687 + self.twiddle7.im * x611n.im
1688 + -self.twiddle6.im * x710n.im
1689 + -self.twiddle2.im * x89n.im;
1690 let b512re_a = buffer.load(0).re
1691 + self.twiddle5.re * x116p.re
1692 + self.twiddle7.re * x215p.re
1693 + self.twiddle2.re * x314p.re
1694 + self.twiddle3.re * x413p.re
1695 + self.twiddle8.re * x512p.re
1696 + self.twiddle4.re * x611p.re
1697 + self.twiddle1.re * x710p.re
1698 + self.twiddle6.re * x89p.re;
1699 let b512re_b = self.twiddle5.im * x116n.im
1700 + -self.twiddle7.im * x215n.im
1701 + -self.twiddle2.im * x314n.im
1702 + self.twiddle3.im * x413n.im
1703 + self.twiddle8.im * x512n.im
1704 + -self.twiddle4.im * x611n.im
1705 + self.twiddle1.im * x710n.im
1706 + self.twiddle6.im * x89n.im;
1707 let b611re_a = buffer.load(0).re
1708 + self.twiddle6.re * x116p.re
1709 + self.twiddle5.re * x215p.re
1710 + self.twiddle1.re * x314p.re
1711 + self.twiddle7.re * x413p.re
1712 + self.twiddle4.re * x512p.re
1713 + self.twiddle2.re * x611p.re
1714 + self.twiddle8.re * x710p.re
1715 + self.twiddle3.re * x89p.re;
1716 let b611re_b = self.twiddle6.im * x116n.im
1717 + -self.twiddle5.im * x215n.im
1718 + self.twiddle1.im * x314n.im
1719 + self.twiddle7.im * x413n.im
1720 + -self.twiddle4.im * x512n.im
1721 + self.twiddle2.im * x611n.im
1722 + self.twiddle8.im * x710n.im
1723 + -self.twiddle3.im * x89n.im;
1724 let b710re_a = buffer.load(0).re
1725 + self.twiddle7.re * x116p.re
1726 + self.twiddle3.re * x215p.re
1727 + self.twiddle4.re * x314p.re
1728 + self.twiddle6.re * x413p.re
1729 + self.twiddle1.re * x512p.re
1730 + self.twiddle8.re * x611p.re
1731 + self.twiddle2.re * x710p.re
1732 + self.twiddle5.re * x89p.re;
1733 let b710re_b = self.twiddle7.im * x116n.im
1734 + -self.twiddle3.im * x215n.im
1735 + self.twiddle4.im * x314n.im
1736 + -self.twiddle6.im * x413n.im
1737 + self.twiddle1.im * x512n.im
1738 + self.twiddle8.im * x611n.im
1739 + -self.twiddle2.im * x710n.im
1740 + self.twiddle5.im * x89n.im;
1741 let b89re_a = buffer.load(0).re
1742 + self.twiddle8.re * x116p.re
1743 + self.twiddle1.re * x215p.re
1744 + self.twiddle7.re * x314p.re
1745 + self.twiddle2.re * x413p.re
1746 + self.twiddle6.re * x512p.re
1747 + self.twiddle3.re * x611p.re
1748 + self.twiddle5.re * x710p.re
1749 + self.twiddle4.re * x89p.re;
1750 let b89re_b = self.twiddle8.im * x116n.im
1751 + -self.twiddle1.im * x215n.im
1752 + self.twiddle7.im * x314n.im
1753 + -self.twiddle2.im * x413n.im
1754 + self.twiddle6.im * x512n.im
1755 + -self.twiddle3.im * x611n.im
1756 + self.twiddle5.im * x710n.im
1757 + -self.twiddle4.im * x89n.im;
1758
1759 let b116im_a = buffer.load(0).im
1760 + self.twiddle1.re * x116p.im
1761 + self.twiddle2.re * x215p.im
1762 + self.twiddle3.re * x314p.im
1763 + self.twiddle4.re * x413p.im
1764 + self.twiddle5.re * x512p.im
1765 + self.twiddle6.re * x611p.im
1766 + self.twiddle7.re * x710p.im
1767 + self.twiddle8.re * x89p.im;
1768 let b116im_b = self.twiddle1.im * x116n.re
1769 + self.twiddle2.im * x215n.re
1770 + self.twiddle3.im * x314n.re
1771 + self.twiddle4.im * x413n.re
1772 + self.twiddle5.im * x512n.re
1773 + self.twiddle6.im * x611n.re
1774 + self.twiddle7.im * x710n.re
1775 + self.twiddle8.im * x89n.re;
1776 let b215im_a = buffer.load(0).im
1777 + self.twiddle2.re * x116p.im
1778 + self.twiddle4.re * x215p.im
1779 + self.twiddle6.re * x314p.im
1780 + self.twiddle8.re * x413p.im
1781 + self.twiddle7.re * x512p.im
1782 + self.twiddle5.re * x611p.im
1783 + self.twiddle3.re * x710p.im
1784 + self.twiddle1.re * x89p.im;
1785 let b215im_b = self.twiddle2.im * x116n.re
1786 + self.twiddle4.im * x215n.re
1787 + self.twiddle6.im * x314n.re
1788 + self.twiddle8.im * x413n.re
1789 + -self.twiddle7.im * x512n.re
1790 + -self.twiddle5.im * x611n.re
1791 + -self.twiddle3.im * x710n.re
1792 + -self.twiddle1.im * x89n.re;
1793 let b314im_a = buffer.load(0).im
1794 + self.twiddle3.re * x116p.im
1795 + self.twiddle6.re * x215p.im
1796 + self.twiddle8.re * x314p.im
1797 + self.twiddle5.re * x413p.im
1798 + self.twiddle2.re * x512p.im
1799 + self.twiddle1.re * x611p.im
1800 + self.twiddle4.re * x710p.im
1801 + self.twiddle7.re * x89p.im;
1802 let b314im_b = self.twiddle3.im * x116n.re
1803 + self.twiddle6.im * x215n.re
1804 + -self.twiddle8.im * x314n.re
1805 + -self.twiddle5.im * x413n.re
1806 + -self.twiddle2.im * x512n.re
1807 + self.twiddle1.im * x611n.re
1808 + self.twiddle4.im * x710n.re
1809 + self.twiddle7.im * x89n.re;
1810 let b413im_a = buffer.load(0).im
1811 + self.twiddle4.re * x116p.im
1812 + self.twiddle8.re * x215p.im
1813 + self.twiddle5.re * x314p.im
1814 + self.twiddle1.re * x413p.im
1815 + self.twiddle3.re * x512p.im
1816 + self.twiddle7.re * x611p.im
1817 + self.twiddle6.re * x710p.im
1818 + self.twiddle2.re * x89p.im;
1819 let b413im_b = self.twiddle4.im * x116n.re
1820 + self.twiddle8.im * x215n.re
1821 + -self.twiddle5.im * x314n.re
1822 + -self.twiddle1.im * x413n.re
1823 + self.twiddle3.im * x512n.re
1824 + self.twiddle7.im * x611n.re
1825 + -self.twiddle6.im * x710n.re
1826 + -self.twiddle2.im * x89n.re;
1827 let b512im_a = buffer.load(0).im
1828 + self.twiddle5.re * x116p.im
1829 + self.twiddle7.re * x215p.im
1830 + self.twiddle2.re * x314p.im
1831 + self.twiddle3.re * x413p.im
1832 + self.twiddle8.re * x512p.im
1833 + self.twiddle4.re * x611p.im
1834 + self.twiddle1.re * x710p.im
1835 + self.twiddle6.re * x89p.im;
1836 let b512im_b = self.twiddle5.im * x116n.re
1837 + -self.twiddle7.im * x215n.re
1838 + -self.twiddle2.im * x314n.re
1839 + self.twiddle3.im * x413n.re
1840 + self.twiddle8.im * x512n.re
1841 + -self.twiddle4.im * x611n.re
1842 + self.twiddle1.im * x710n.re
1843 + self.twiddle6.im * x89n.re;
1844 let b611im_a = buffer.load(0).im
1845 + self.twiddle6.re * x116p.im
1846 + self.twiddle5.re * x215p.im
1847 + self.twiddle1.re * x314p.im
1848 + self.twiddle7.re * x413p.im
1849 + self.twiddle4.re * x512p.im
1850 + self.twiddle2.re * x611p.im
1851 + self.twiddle8.re * x710p.im
1852 + self.twiddle3.re * x89p.im;
1853 let b611im_b = self.twiddle6.im * x116n.re
1854 + -self.twiddle5.im * x215n.re
1855 + self.twiddle1.im * x314n.re
1856 + self.twiddle7.im * x413n.re
1857 + -self.twiddle4.im * x512n.re
1858 + self.twiddle2.im * x611n.re
1859 + self.twiddle8.im * x710n.re
1860 + -self.twiddle3.im * x89n.re;
1861 let b710im_a = buffer.load(0).im
1862 + self.twiddle7.re * x116p.im
1863 + self.twiddle3.re * x215p.im
1864 + self.twiddle4.re * x314p.im
1865 + self.twiddle6.re * x413p.im
1866 + self.twiddle1.re * x512p.im
1867 + self.twiddle8.re * x611p.im
1868 + self.twiddle2.re * x710p.im
1869 + self.twiddle5.re * x89p.im;
1870 let b710im_b = self.twiddle7.im * x116n.re
1871 + -self.twiddle3.im * x215n.re
1872 + self.twiddle4.im * x314n.re
1873 + -self.twiddle6.im * x413n.re
1874 + self.twiddle1.im * x512n.re
1875 + self.twiddle8.im * x611n.re
1876 + -self.twiddle2.im * x710n.re
1877 + self.twiddle5.im * x89n.re;
1878 let b89im_a = buffer.load(0).im
1879 + self.twiddle8.re * x116p.im
1880 + self.twiddle1.re * x215p.im
1881 + self.twiddle7.re * x314p.im
1882 + self.twiddle2.re * x413p.im
1883 + self.twiddle6.re * x512p.im
1884 + self.twiddle3.re * x611p.im
1885 + self.twiddle5.re * x710p.im
1886 + self.twiddle4.re * x89p.im;
1887 let b89im_b = self.twiddle8.im * x116n.re
1888 + -self.twiddle1.im * x215n.re
1889 + self.twiddle7.im * x314n.re
1890 + -self.twiddle2.im * x413n.re
1891 + self.twiddle6.im * x512n.re
1892 + -self.twiddle3.im * x611n.re
1893 + self.twiddle5.im * x710n.re
1894 + -self.twiddle4.im * x89n.re;
1895
1896 let out1re = b116re_a - b116re_b;
1897 let out1im = b116im_a + b116im_b;
1898 let out2re = b215re_a - b215re_b;
1899 let out2im = b215im_a + b215im_b;
1900 let out3re = b314re_a - b314re_b;
1901 let out3im = b314im_a + b314im_b;
1902 let out4re = b413re_a - b413re_b;
1903 let out4im = b413im_a + b413im_b;
1904 let out5re = b512re_a - b512re_b;
1905 let out5im = b512im_a + b512im_b;
1906 let out6re = b611re_a - b611re_b;
1907 let out6im = b611im_a + b611im_b;
1908 let out7re = b710re_a - b710re_b;
1909 let out7im = b710im_a + b710im_b;
1910 let out8re = b89re_a - b89re_b;
1911 let out8im = b89im_a + b89im_b;
1912 let out9re = b89re_a + b89re_b;
1913 let out9im = b89im_a - b89im_b;
1914 let out10re = b710re_a + b710re_b;
1915 let out10im = b710im_a - b710im_b;
1916 let out11re = b611re_a + b611re_b;
1917 let out11im = b611im_a - b611im_b;
1918 let out12re = b512re_a + b512re_b;
1919 let out12im = b512im_a - b512im_b;
1920 let out13re = b413re_a + b413re_b;
1921 let out13im = b413im_a - b413im_b;
1922 let out14re = b314re_a + b314re_b;
1923 let out14im = b314im_a - b314im_b;
1924 let out15re = b215re_a + b215re_b;
1925 let out15im = b215im_a - b215im_b;
1926 let out16re = b116re_a + b116re_b;
1927 let out16im = b116im_a - b116im_b;
1928 buffer.store(sum, 0);
1929 buffer.store(
1930 Complex {
1931 re: out1re,
1932 im: out1im,
1933 },
1934 1,
1935 );
1936 buffer.store(
1937 Complex {
1938 re: out2re,
1939 im: out2im,
1940 },
1941 2,
1942 );
1943 buffer.store(
1944 Complex {
1945 re: out3re,
1946 im: out3im,
1947 },
1948 3,
1949 );
1950 buffer.store(
1951 Complex {
1952 re: out4re,
1953 im: out4im,
1954 },
1955 4,
1956 );
1957 buffer.store(
1958 Complex {
1959 re: out5re,
1960 im: out5im,
1961 },
1962 5,
1963 );
1964 buffer.store(
1965 Complex {
1966 re: out6re,
1967 im: out6im,
1968 },
1969 6,
1970 );
1971 buffer.store(
1972 Complex {
1973 re: out7re,
1974 im: out7im,
1975 },
1976 7,
1977 );
1978 buffer.store(
1979 Complex {
1980 re: out8re,
1981 im: out8im,
1982 },
1983 8,
1984 );
1985 buffer.store(
1986 Complex {
1987 re: out9re,
1988 im: out9im,
1989 },
1990 9,
1991 );
1992 buffer.store(
1993 Complex {
1994 re: out10re,
1995 im: out10im,
1996 },
1997 10,
1998 );
1999 buffer.store(
2000 Complex {
2001 re: out11re,
2002 im: out11im,
2003 },
2004 11,
2005 );
2006 buffer.store(
2007 Complex {
2008 re: out12re,
2009 im: out12im,
2010 },
2011 12,
2012 );
2013 buffer.store(
2014 Complex {
2015 re: out13re,
2016 im: out13im,
2017 },
2018 13,
2019 );
2020 buffer.store(
2021 Complex {
2022 re: out14re,
2023 im: out14im,
2024 },
2025 14,
2026 );
2027 buffer.store(
2028 Complex {
2029 re: out15re,
2030 im: out15im,
2031 },
2032 15,
2033 );
2034 buffer.store(
2035 Complex {
2036 re: out16re,
2037 im: out16im,
2038 },
2039 16,
2040 );
2041 }
2042}
2043
2044pub struct Butterfly19<T> {
2045 twiddle1: Complex<T>,
2046 twiddle2: Complex<T>,
2047 twiddle3: Complex<T>,
2048 twiddle4: Complex<T>,
2049 twiddle5: Complex<T>,
2050 twiddle6: Complex<T>,
2051 twiddle7: Complex<T>,
2052 twiddle8: Complex<T>,
2053 twiddle9: Complex<T>,
2054 direction: FftDirection,
2055}
2056boilerplate_fft_butterfly!(Butterfly19, 19, |this: &Butterfly19<_>| this.direction);
2057impl<T: FftNum> Butterfly19<T> {
2058 pub fn new(direction: FftDirection) -> Self {
2059 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 19, direction);
2060 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 19, direction);
2061 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 19, direction);
2062 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 19, direction);
2063 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 19, direction);
2064 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 19, direction);
2065 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 19, direction);
2066 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 19, direction);
2067 let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 19, direction);
2068 Self {
2069 twiddle1,
2070 twiddle2,
2071 twiddle3,
2072 twiddle4,
2073 twiddle5,
2074 twiddle6,
2075 twiddle7,
2076 twiddle8,
2077 twiddle9,
2078 direction,
2079 }
2080 }
2081
2082 #[inline(never)]
2083 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
2084 let x118p = buffer.load(1) + buffer.load(18);
2088 let x118n = buffer.load(1) - buffer.load(18);
2089 let x217p = buffer.load(2) + buffer.load(17);
2090 let x217n = buffer.load(2) - buffer.load(17);
2091 let x316p = buffer.load(3) + buffer.load(16);
2092 let x316n = buffer.load(3) - buffer.load(16);
2093 let x415p = buffer.load(4) + buffer.load(15);
2094 let x415n = buffer.load(4) - buffer.load(15);
2095 let x514p = buffer.load(5) + buffer.load(14);
2096 let x514n = buffer.load(5) - buffer.load(14);
2097 let x613p = buffer.load(6) + buffer.load(13);
2098 let x613n = buffer.load(6) - buffer.load(13);
2099 let x712p = buffer.load(7) + buffer.load(12);
2100 let x712n = buffer.load(7) - buffer.load(12);
2101 let x811p = buffer.load(8) + buffer.load(11);
2102 let x811n = buffer.load(8) - buffer.load(11);
2103 let x910p = buffer.load(9) + buffer.load(10);
2104 let x910n = buffer.load(9) - buffer.load(10);
2105 let sum =
2106 buffer.load(0) + x118p + x217p + x316p + x415p + x514p + x613p + x712p + x811p + x910p;
2107 let b118re_a = buffer.load(0).re
2108 + self.twiddle1.re * x118p.re
2109 + self.twiddle2.re * x217p.re
2110 + self.twiddle3.re * x316p.re
2111 + self.twiddle4.re * x415p.re
2112 + self.twiddle5.re * x514p.re
2113 + self.twiddle6.re * x613p.re
2114 + self.twiddle7.re * x712p.re
2115 + self.twiddle8.re * x811p.re
2116 + self.twiddle9.re * x910p.re;
2117 let b118re_b = self.twiddle1.im * x118n.im
2118 + self.twiddle2.im * x217n.im
2119 + self.twiddle3.im * x316n.im
2120 + self.twiddle4.im * x415n.im
2121 + self.twiddle5.im * x514n.im
2122 + self.twiddle6.im * x613n.im
2123 + self.twiddle7.im * x712n.im
2124 + self.twiddle8.im * x811n.im
2125 + self.twiddle9.im * x910n.im;
2126 let b217re_a = buffer.load(0).re
2127 + self.twiddle2.re * x118p.re
2128 + self.twiddle4.re * x217p.re
2129 + self.twiddle6.re * x316p.re
2130 + self.twiddle8.re * x415p.re
2131 + self.twiddle9.re * x514p.re
2132 + self.twiddle7.re * x613p.re
2133 + self.twiddle5.re * x712p.re
2134 + self.twiddle3.re * x811p.re
2135 + self.twiddle1.re * x910p.re;
2136 let b217re_b = self.twiddle2.im * x118n.im
2137 + self.twiddle4.im * x217n.im
2138 + self.twiddle6.im * x316n.im
2139 + self.twiddle8.im * x415n.im
2140 + -self.twiddle9.im * x514n.im
2141 + -self.twiddle7.im * x613n.im
2142 + -self.twiddle5.im * x712n.im
2143 + -self.twiddle3.im * x811n.im
2144 + -self.twiddle1.im * x910n.im;
2145 let b316re_a = buffer.load(0).re
2146 + self.twiddle3.re * x118p.re
2147 + self.twiddle6.re * x217p.re
2148 + self.twiddle9.re * x316p.re
2149 + self.twiddle7.re * x415p.re
2150 + self.twiddle4.re * x514p.re
2151 + self.twiddle1.re * x613p.re
2152 + self.twiddle2.re * x712p.re
2153 + self.twiddle5.re * x811p.re
2154 + self.twiddle8.re * x910p.re;
2155 let b316re_b = self.twiddle3.im * x118n.im
2156 + self.twiddle6.im * x217n.im
2157 + self.twiddle9.im * x316n.im
2158 + -self.twiddle7.im * x415n.im
2159 + -self.twiddle4.im * x514n.im
2160 + -self.twiddle1.im * x613n.im
2161 + self.twiddle2.im * x712n.im
2162 + self.twiddle5.im * x811n.im
2163 + self.twiddle8.im * x910n.im;
2164 let b415re_a = buffer.load(0).re
2165 + self.twiddle4.re * x118p.re
2166 + self.twiddle8.re * x217p.re
2167 + self.twiddle7.re * x316p.re
2168 + self.twiddle3.re * x415p.re
2169 + self.twiddle1.re * x514p.re
2170 + self.twiddle5.re * x613p.re
2171 + self.twiddle9.re * x712p.re
2172 + self.twiddle6.re * x811p.re
2173 + self.twiddle2.re * x910p.re;
2174 let b415re_b = self.twiddle4.im * x118n.im
2175 + self.twiddle8.im * x217n.im
2176 + -self.twiddle7.im * x316n.im
2177 + -self.twiddle3.im * x415n.im
2178 + self.twiddle1.im * x514n.im
2179 + self.twiddle5.im * x613n.im
2180 + self.twiddle9.im * x712n.im
2181 + -self.twiddle6.im * x811n.im
2182 + -self.twiddle2.im * x910n.im;
2183 let b514re_a = buffer.load(0).re
2184 + self.twiddle5.re * x118p.re
2185 + self.twiddle9.re * x217p.re
2186 + self.twiddle4.re * x316p.re
2187 + self.twiddle1.re * x415p.re
2188 + self.twiddle6.re * x514p.re
2189 + self.twiddle8.re * x613p.re
2190 + self.twiddle3.re * x712p.re
2191 + self.twiddle2.re * x811p.re
2192 + self.twiddle7.re * x910p.re;
2193 let b514re_b = self.twiddle5.im * x118n.im
2194 + -self.twiddle9.im * x217n.im
2195 + -self.twiddle4.im * x316n.im
2196 + self.twiddle1.im * x415n.im
2197 + self.twiddle6.im * x514n.im
2198 + -self.twiddle8.im * x613n.im
2199 + -self.twiddle3.im * x712n.im
2200 + self.twiddle2.im * x811n.im
2201 + self.twiddle7.im * x910n.im;
2202 let b613re_a = buffer.load(0).re
2203 + self.twiddle6.re * x118p.re
2204 + self.twiddle7.re * x217p.re
2205 + self.twiddle1.re * x316p.re
2206 + self.twiddle5.re * x415p.re
2207 + self.twiddle8.re * x514p.re
2208 + self.twiddle2.re * x613p.re
2209 + self.twiddle4.re * x712p.re
2210 + self.twiddle9.re * x811p.re
2211 + self.twiddle3.re * x910p.re;
2212 let b613re_b = self.twiddle6.im * x118n.im
2213 + -self.twiddle7.im * x217n.im
2214 + -self.twiddle1.im * x316n.im
2215 + self.twiddle5.im * x415n.im
2216 + -self.twiddle8.im * x514n.im
2217 + -self.twiddle2.im * x613n.im
2218 + self.twiddle4.im * x712n.im
2219 + -self.twiddle9.im * x811n.im
2220 + -self.twiddle3.im * x910n.im;
2221 let b712re_a = buffer.load(0).re
2222 + self.twiddle7.re * x118p.re
2223 + self.twiddle5.re * x217p.re
2224 + self.twiddle2.re * x316p.re
2225 + self.twiddle9.re * x415p.re
2226 + self.twiddle3.re * x514p.re
2227 + self.twiddle4.re * x613p.re
2228 + self.twiddle8.re * x712p.re
2229 + self.twiddle1.re * x811p.re
2230 + self.twiddle6.re * x910p.re;
2231 let b712re_b = self.twiddle7.im * x118n.im
2232 + -self.twiddle5.im * x217n.im
2233 + self.twiddle2.im * x316n.im
2234 + self.twiddle9.im * x415n.im
2235 + -self.twiddle3.im * x514n.im
2236 + self.twiddle4.im * x613n.im
2237 + -self.twiddle8.im * x712n.im
2238 + -self.twiddle1.im * x811n.im
2239 + self.twiddle6.im * x910n.im;
2240 let b811re_a = buffer.load(0).re
2241 + self.twiddle8.re * x118p.re
2242 + self.twiddle3.re * x217p.re
2243 + self.twiddle5.re * x316p.re
2244 + self.twiddle6.re * x415p.re
2245 + self.twiddle2.re * x514p.re
2246 + self.twiddle9.re * x613p.re
2247 + self.twiddle1.re * x712p.re
2248 + self.twiddle7.re * x811p.re
2249 + self.twiddle4.re * x910p.re;
2250 let b811re_b = self.twiddle8.im * x118n.im
2251 + -self.twiddle3.im * x217n.im
2252 + self.twiddle5.im * x316n.im
2253 + -self.twiddle6.im * x415n.im
2254 + self.twiddle2.im * x514n.im
2255 + -self.twiddle9.im * x613n.im
2256 + -self.twiddle1.im * x712n.im
2257 + self.twiddle7.im * x811n.im
2258 + -self.twiddle4.im * x910n.im;
2259 let b910re_a = buffer.load(0).re
2260 + self.twiddle9.re * x118p.re
2261 + self.twiddle1.re * x217p.re
2262 + self.twiddle8.re * x316p.re
2263 + self.twiddle2.re * x415p.re
2264 + self.twiddle7.re * x514p.re
2265 + self.twiddle3.re * x613p.re
2266 + self.twiddle6.re * x712p.re
2267 + self.twiddle4.re * x811p.re
2268 + self.twiddle5.re * x910p.re;
2269 let b910re_b = self.twiddle9.im * x118n.im
2270 + -self.twiddle1.im * x217n.im
2271 + self.twiddle8.im * x316n.im
2272 + -self.twiddle2.im * x415n.im
2273 + self.twiddle7.im * x514n.im
2274 + -self.twiddle3.im * x613n.im
2275 + self.twiddle6.im * x712n.im
2276 + -self.twiddle4.im * x811n.im
2277 + self.twiddle5.im * x910n.im;
2278
2279 let b118im_a = buffer.load(0).im
2280 + self.twiddle1.re * x118p.im
2281 + self.twiddle2.re * x217p.im
2282 + self.twiddle3.re * x316p.im
2283 + self.twiddle4.re * x415p.im
2284 + self.twiddle5.re * x514p.im
2285 + self.twiddle6.re * x613p.im
2286 + self.twiddle7.re * x712p.im
2287 + self.twiddle8.re * x811p.im
2288 + self.twiddle9.re * x910p.im;
2289 let b118im_b = self.twiddle1.im * x118n.re
2290 + self.twiddle2.im * x217n.re
2291 + self.twiddle3.im * x316n.re
2292 + self.twiddle4.im * x415n.re
2293 + self.twiddle5.im * x514n.re
2294 + self.twiddle6.im * x613n.re
2295 + self.twiddle7.im * x712n.re
2296 + self.twiddle8.im * x811n.re
2297 + self.twiddle9.im * x910n.re;
2298 let b217im_a = buffer.load(0).im
2299 + self.twiddle2.re * x118p.im
2300 + self.twiddle4.re * x217p.im
2301 + self.twiddle6.re * x316p.im
2302 + self.twiddle8.re * x415p.im
2303 + self.twiddle9.re * x514p.im
2304 + self.twiddle7.re * x613p.im
2305 + self.twiddle5.re * x712p.im
2306 + self.twiddle3.re * x811p.im
2307 + self.twiddle1.re * x910p.im;
2308 let b217im_b = self.twiddle2.im * x118n.re
2309 + self.twiddle4.im * x217n.re
2310 + self.twiddle6.im * x316n.re
2311 + self.twiddle8.im * x415n.re
2312 + -self.twiddle9.im * x514n.re
2313 + -self.twiddle7.im * x613n.re
2314 + -self.twiddle5.im * x712n.re
2315 + -self.twiddle3.im * x811n.re
2316 + -self.twiddle1.im * x910n.re;
2317 let b316im_a = buffer.load(0).im
2318 + self.twiddle3.re * x118p.im
2319 + self.twiddle6.re * x217p.im
2320 + self.twiddle9.re * x316p.im
2321 + self.twiddle7.re * x415p.im
2322 + self.twiddle4.re * x514p.im
2323 + self.twiddle1.re * x613p.im
2324 + self.twiddle2.re * x712p.im
2325 + self.twiddle5.re * x811p.im
2326 + self.twiddle8.re * x910p.im;
2327 let b316im_b = self.twiddle3.im * x118n.re
2328 + self.twiddle6.im * x217n.re
2329 + self.twiddle9.im * x316n.re
2330 + -self.twiddle7.im * x415n.re
2331 + -self.twiddle4.im * x514n.re
2332 + -self.twiddle1.im * x613n.re
2333 + self.twiddle2.im * x712n.re
2334 + self.twiddle5.im * x811n.re
2335 + self.twiddle8.im * x910n.re;
2336 let b415im_a = buffer.load(0).im
2337 + self.twiddle4.re * x118p.im
2338 + self.twiddle8.re * x217p.im
2339 + self.twiddle7.re * x316p.im
2340 + self.twiddle3.re * x415p.im
2341 + self.twiddle1.re * x514p.im
2342 + self.twiddle5.re * x613p.im
2343 + self.twiddle9.re * x712p.im
2344 + self.twiddle6.re * x811p.im
2345 + self.twiddle2.re * x910p.im;
2346 let b415im_b = self.twiddle4.im * x118n.re
2347 + self.twiddle8.im * x217n.re
2348 + -self.twiddle7.im * x316n.re
2349 + -self.twiddle3.im * x415n.re
2350 + self.twiddle1.im * x514n.re
2351 + self.twiddle5.im * x613n.re
2352 + self.twiddle9.im * x712n.re
2353 + -self.twiddle6.im * x811n.re
2354 + -self.twiddle2.im * x910n.re;
2355 let b514im_a = buffer.load(0).im
2356 + self.twiddle5.re * x118p.im
2357 + self.twiddle9.re * x217p.im
2358 + self.twiddle4.re * x316p.im
2359 + self.twiddle1.re * x415p.im
2360 + self.twiddle6.re * x514p.im
2361 + self.twiddle8.re * x613p.im
2362 + self.twiddle3.re * x712p.im
2363 + self.twiddle2.re * x811p.im
2364 + self.twiddle7.re * x910p.im;
2365 let b514im_b = self.twiddle5.im * x118n.re
2366 + -self.twiddle9.im * x217n.re
2367 + -self.twiddle4.im * x316n.re
2368 + self.twiddle1.im * x415n.re
2369 + self.twiddle6.im * x514n.re
2370 + -self.twiddle8.im * x613n.re
2371 + -self.twiddle3.im * x712n.re
2372 + self.twiddle2.im * x811n.re
2373 + self.twiddle7.im * x910n.re;
2374 let b613im_a = buffer.load(0).im
2375 + self.twiddle6.re * x118p.im
2376 + self.twiddle7.re * x217p.im
2377 + self.twiddle1.re * x316p.im
2378 + self.twiddle5.re * x415p.im
2379 + self.twiddle8.re * x514p.im
2380 + self.twiddle2.re * x613p.im
2381 + self.twiddle4.re * x712p.im
2382 + self.twiddle9.re * x811p.im
2383 + self.twiddle3.re * x910p.im;
2384 let b613im_b = self.twiddle6.im * x118n.re
2385 + -self.twiddle7.im * x217n.re
2386 + -self.twiddle1.im * x316n.re
2387 + self.twiddle5.im * x415n.re
2388 + -self.twiddle8.im * x514n.re
2389 + -self.twiddle2.im * x613n.re
2390 + self.twiddle4.im * x712n.re
2391 + -self.twiddle9.im * x811n.re
2392 + -self.twiddle3.im * x910n.re;
2393 let b712im_a = buffer.load(0).im
2394 + self.twiddle7.re * x118p.im
2395 + self.twiddle5.re * x217p.im
2396 + self.twiddle2.re * x316p.im
2397 + self.twiddle9.re * x415p.im
2398 + self.twiddle3.re * x514p.im
2399 + self.twiddle4.re * x613p.im
2400 + self.twiddle8.re * x712p.im
2401 + self.twiddle1.re * x811p.im
2402 + self.twiddle6.re * x910p.im;
2403 let b712im_b = self.twiddle7.im * x118n.re
2404 + -self.twiddle5.im * x217n.re
2405 + self.twiddle2.im * x316n.re
2406 + self.twiddle9.im * x415n.re
2407 + -self.twiddle3.im * x514n.re
2408 + self.twiddle4.im * x613n.re
2409 + -self.twiddle8.im * x712n.re
2410 + -self.twiddle1.im * x811n.re
2411 + self.twiddle6.im * x910n.re;
2412 let b811im_a = buffer.load(0).im
2413 + self.twiddle8.re * x118p.im
2414 + self.twiddle3.re * x217p.im
2415 + self.twiddle5.re * x316p.im
2416 + self.twiddle6.re * x415p.im
2417 + self.twiddle2.re * x514p.im
2418 + self.twiddle9.re * x613p.im
2419 + self.twiddle1.re * x712p.im
2420 + self.twiddle7.re * x811p.im
2421 + self.twiddle4.re * x910p.im;
2422 let b811im_b = self.twiddle8.im * x118n.re
2423 + -self.twiddle3.im * x217n.re
2424 + self.twiddle5.im * x316n.re
2425 + -self.twiddle6.im * x415n.re
2426 + self.twiddle2.im * x514n.re
2427 + -self.twiddle9.im * x613n.re
2428 + -self.twiddle1.im * x712n.re
2429 + self.twiddle7.im * x811n.re
2430 + -self.twiddle4.im * x910n.re;
2431 let b910im_a = buffer.load(0).im
2432 + self.twiddle9.re * x118p.im
2433 + self.twiddle1.re * x217p.im
2434 + self.twiddle8.re * x316p.im
2435 + self.twiddle2.re * x415p.im
2436 + self.twiddle7.re * x514p.im
2437 + self.twiddle3.re * x613p.im
2438 + self.twiddle6.re * x712p.im
2439 + self.twiddle4.re * x811p.im
2440 + self.twiddle5.re * x910p.im;
2441 let b910im_b = self.twiddle9.im * x118n.re
2442 + -self.twiddle1.im * x217n.re
2443 + self.twiddle8.im * x316n.re
2444 + -self.twiddle2.im * x415n.re
2445 + self.twiddle7.im * x514n.re
2446 + -self.twiddle3.im * x613n.re
2447 + self.twiddle6.im * x712n.re
2448 + -self.twiddle4.im * x811n.re
2449 + self.twiddle5.im * x910n.re;
2450
2451 let out1re = b118re_a - b118re_b;
2452 let out1im = b118im_a + b118im_b;
2453 let out2re = b217re_a - b217re_b;
2454 let out2im = b217im_a + b217im_b;
2455 let out3re = b316re_a - b316re_b;
2456 let out3im = b316im_a + b316im_b;
2457 let out4re = b415re_a - b415re_b;
2458 let out4im = b415im_a + b415im_b;
2459 let out5re = b514re_a - b514re_b;
2460 let out5im = b514im_a + b514im_b;
2461 let out6re = b613re_a - b613re_b;
2462 let out6im = b613im_a + b613im_b;
2463 let out7re = b712re_a - b712re_b;
2464 let out7im = b712im_a + b712im_b;
2465 let out8re = b811re_a - b811re_b;
2466 let out8im = b811im_a + b811im_b;
2467 let out9re = b910re_a - b910re_b;
2468 let out9im = b910im_a + b910im_b;
2469 let out10re = b910re_a + b910re_b;
2470 let out10im = b910im_a - b910im_b;
2471 let out11re = b811re_a + b811re_b;
2472 let out11im = b811im_a - b811im_b;
2473 let out12re = b712re_a + b712re_b;
2474 let out12im = b712im_a - b712im_b;
2475 let out13re = b613re_a + b613re_b;
2476 let out13im = b613im_a - b613im_b;
2477 let out14re = b514re_a + b514re_b;
2478 let out14im = b514im_a - b514im_b;
2479 let out15re = b415re_a + b415re_b;
2480 let out15im = b415im_a - b415im_b;
2481 let out16re = b316re_a + b316re_b;
2482 let out16im = b316im_a - b316im_b;
2483 let out17re = b217re_a + b217re_b;
2484 let out17im = b217im_a - b217im_b;
2485 let out18re = b118re_a + b118re_b;
2486 let out18im = b118im_a - b118im_b;
2487 buffer.store(sum, 0);
2488 buffer.store(
2489 Complex {
2490 re: out1re,
2491 im: out1im,
2492 },
2493 1,
2494 );
2495 buffer.store(
2496 Complex {
2497 re: out2re,
2498 im: out2im,
2499 },
2500 2,
2501 );
2502 buffer.store(
2503 Complex {
2504 re: out3re,
2505 im: out3im,
2506 },
2507 3,
2508 );
2509 buffer.store(
2510 Complex {
2511 re: out4re,
2512 im: out4im,
2513 },
2514 4,
2515 );
2516 buffer.store(
2517 Complex {
2518 re: out5re,
2519 im: out5im,
2520 },
2521 5,
2522 );
2523 buffer.store(
2524 Complex {
2525 re: out6re,
2526 im: out6im,
2527 },
2528 6,
2529 );
2530 buffer.store(
2531 Complex {
2532 re: out7re,
2533 im: out7im,
2534 },
2535 7,
2536 );
2537 buffer.store(
2538 Complex {
2539 re: out8re,
2540 im: out8im,
2541 },
2542 8,
2543 );
2544 buffer.store(
2545 Complex {
2546 re: out9re,
2547 im: out9im,
2548 },
2549 9,
2550 );
2551 buffer.store(
2552 Complex {
2553 re: out10re,
2554 im: out10im,
2555 },
2556 10,
2557 );
2558 buffer.store(
2559 Complex {
2560 re: out11re,
2561 im: out11im,
2562 },
2563 11,
2564 );
2565 buffer.store(
2566 Complex {
2567 re: out12re,
2568 im: out12im,
2569 },
2570 12,
2571 );
2572 buffer.store(
2573 Complex {
2574 re: out13re,
2575 im: out13im,
2576 },
2577 13,
2578 );
2579 buffer.store(
2580 Complex {
2581 re: out14re,
2582 im: out14im,
2583 },
2584 14,
2585 );
2586 buffer.store(
2587 Complex {
2588 re: out15re,
2589 im: out15im,
2590 },
2591 15,
2592 );
2593 buffer.store(
2594 Complex {
2595 re: out16re,
2596 im: out16im,
2597 },
2598 16,
2599 );
2600 buffer.store(
2601 Complex {
2602 re: out17re,
2603 im: out17im,
2604 },
2605 17,
2606 );
2607 buffer.store(
2608 Complex {
2609 re: out18re,
2610 im: out18im,
2611 },
2612 18,
2613 );
2614 }
2615}
2616
2617pub struct Butterfly23<T> {
2618 twiddle1: Complex<T>,
2619 twiddle2: Complex<T>,
2620 twiddle3: Complex<T>,
2621 twiddle4: Complex<T>,
2622 twiddle5: Complex<T>,
2623 twiddle6: Complex<T>,
2624 twiddle7: Complex<T>,
2625 twiddle8: Complex<T>,
2626 twiddle9: Complex<T>,
2627 twiddle10: Complex<T>,
2628 twiddle11: Complex<T>,
2629 direction: FftDirection,
2630}
2631boilerplate_fft_butterfly!(Butterfly23, 23, |this: &Butterfly23<_>| this.direction);
2632impl<T: FftNum> Butterfly23<T> {
2633 pub fn new(direction: FftDirection) -> Self {
2634 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 23, direction);
2635 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 23, direction);
2636 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 23, direction);
2637 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 23, direction);
2638 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 23, direction);
2639 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 23, direction);
2640 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 23, direction);
2641 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 23, direction);
2642 let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 23, direction);
2643 let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 23, direction);
2644 let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 23, direction);
2645 Self {
2646 twiddle1,
2647 twiddle2,
2648 twiddle3,
2649 twiddle4,
2650 twiddle5,
2651 twiddle6,
2652 twiddle7,
2653 twiddle8,
2654 twiddle9,
2655 twiddle10,
2656 twiddle11,
2657 direction,
2658 }
2659 }
2660
2661 #[inline(never)]
2662 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
2663 let x122p = buffer.load(1) + buffer.load(22);
2667 let x122n = buffer.load(1) - buffer.load(22);
2668 let x221p = buffer.load(2) + buffer.load(21);
2669 let x221n = buffer.load(2) - buffer.load(21);
2670 let x320p = buffer.load(3) + buffer.load(20);
2671 let x320n = buffer.load(3) - buffer.load(20);
2672 let x419p = buffer.load(4) + buffer.load(19);
2673 let x419n = buffer.load(4) - buffer.load(19);
2674 let x518p = buffer.load(5) + buffer.load(18);
2675 let x518n = buffer.load(5) - buffer.load(18);
2676 let x617p = buffer.load(6) + buffer.load(17);
2677 let x617n = buffer.load(6) - buffer.load(17);
2678 let x716p = buffer.load(7) + buffer.load(16);
2679 let x716n = buffer.load(7) - buffer.load(16);
2680 let x815p = buffer.load(8) + buffer.load(15);
2681 let x815n = buffer.load(8) - buffer.load(15);
2682 let x914p = buffer.load(9) + buffer.load(14);
2683 let x914n = buffer.load(9) - buffer.load(14);
2684 let x1013p = buffer.load(10) + buffer.load(13);
2685 let x1013n = buffer.load(10) - buffer.load(13);
2686 let x1112p = buffer.load(11) + buffer.load(12);
2687 let x1112n = buffer.load(11) - buffer.load(12);
2688 let sum = buffer.load(0)
2689 + x122p
2690 + x221p
2691 + x320p
2692 + x419p
2693 + x518p
2694 + x617p
2695 + x716p
2696 + x815p
2697 + x914p
2698 + x1013p
2699 + x1112p;
2700 let b122re_a = buffer.load(0).re
2701 + self.twiddle1.re * x122p.re
2702 + self.twiddle2.re * x221p.re
2703 + self.twiddle3.re * x320p.re
2704 + self.twiddle4.re * x419p.re
2705 + self.twiddle5.re * x518p.re
2706 + self.twiddle6.re * x617p.re
2707 + self.twiddle7.re * x716p.re
2708 + self.twiddle8.re * x815p.re
2709 + self.twiddle9.re * x914p.re
2710 + self.twiddle10.re * x1013p.re
2711 + self.twiddle11.re * x1112p.re;
2712 let b122re_b = self.twiddle1.im * x122n.im
2713 + self.twiddle2.im * x221n.im
2714 + self.twiddle3.im * x320n.im
2715 + self.twiddle4.im * x419n.im
2716 + self.twiddle5.im * x518n.im
2717 + self.twiddle6.im * x617n.im
2718 + self.twiddle7.im * x716n.im
2719 + self.twiddle8.im * x815n.im
2720 + self.twiddle9.im * x914n.im
2721 + self.twiddle10.im * x1013n.im
2722 + self.twiddle11.im * x1112n.im;
2723 let b221re_a = buffer.load(0).re
2724 + self.twiddle2.re * x122p.re
2725 + self.twiddle4.re * x221p.re
2726 + self.twiddle6.re * x320p.re
2727 + self.twiddle8.re * x419p.re
2728 + self.twiddle10.re * x518p.re
2729 + self.twiddle11.re * x617p.re
2730 + self.twiddle9.re * x716p.re
2731 + self.twiddle7.re * x815p.re
2732 + self.twiddle5.re * x914p.re
2733 + self.twiddle3.re * x1013p.re
2734 + self.twiddle1.re * x1112p.re;
2735 let b221re_b = self.twiddle2.im * x122n.im
2736 + self.twiddle4.im * x221n.im
2737 + self.twiddle6.im * x320n.im
2738 + self.twiddle8.im * x419n.im
2739 + self.twiddle10.im * x518n.im
2740 + -self.twiddle11.im * x617n.im
2741 + -self.twiddle9.im * x716n.im
2742 + -self.twiddle7.im * x815n.im
2743 + -self.twiddle5.im * x914n.im
2744 + -self.twiddle3.im * x1013n.im
2745 + -self.twiddle1.im * x1112n.im;
2746 let b320re_a = buffer.load(0).re
2747 + self.twiddle3.re * x122p.re
2748 + self.twiddle6.re * x221p.re
2749 + self.twiddle9.re * x320p.re
2750 + self.twiddle11.re * x419p.re
2751 + self.twiddle8.re * x518p.re
2752 + self.twiddle5.re * x617p.re
2753 + self.twiddle2.re * x716p.re
2754 + self.twiddle1.re * x815p.re
2755 + self.twiddle4.re * x914p.re
2756 + self.twiddle7.re * x1013p.re
2757 + self.twiddle10.re * x1112p.re;
2758 let b320re_b = self.twiddle3.im * x122n.im
2759 + self.twiddle6.im * x221n.im
2760 + self.twiddle9.im * x320n.im
2761 + -self.twiddle11.im * x419n.im
2762 + -self.twiddle8.im * x518n.im
2763 + -self.twiddle5.im * x617n.im
2764 + -self.twiddle2.im * x716n.im
2765 + self.twiddle1.im * x815n.im
2766 + self.twiddle4.im * x914n.im
2767 + self.twiddle7.im * x1013n.im
2768 + self.twiddle10.im * x1112n.im;
2769 let b419re_a = buffer.load(0).re
2770 + self.twiddle4.re * x122p.re
2771 + self.twiddle8.re * x221p.re
2772 + self.twiddle11.re * x320p.re
2773 + self.twiddle7.re * x419p.re
2774 + self.twiddle3.re * x518p.re
2775 + self.twiddle1.re * x617p.re
2776 + self.twiddle5.re * x716p.re
2777 + self.twiddle9.re * x815p.re
2778 + self.twiddle10.re * x914p.re
2779 + self.twiddle6.re * x1013p.re
2780 + self.twiddle2.re * x1112p.re;
2781 let b419re_b = self.twiddle4.im * x122n.im
2782 + self.twiddle8.im * x221n.im
2783 + -self.twiddle11.im * x320n.im
2784 + -self.twiddle7.im * x419n.im
2785 + -self.twiddle3.im * x518n.im
2786 + self.twiddle1.im * x617n.im
2787 + self.twiddle5.im * x716n.im
2788 + self.twiddle9.im * x815n.im
2789 + -self.twiddle10.im * x914n.im
2790 + -self.twiddle6.im * x1013n.im
2791 + -self.twiddle2.im * x1112n.im;
2792 let b518re_a = buffer.load(0).re
2793 + self.twiddle5.re * x122p.re
2794 + self.twiddle10.re * x221p.re
2795 + self.twiddle8.re * x320p.re
2796 + self.twiddle3.re * x419p.re
2797 + self.twiddle2.re * x518p.re
2798 + self.twiddle7.re * x617p.re
2799 + self.twiddle11.re * x716p.re
2800 + self.twiddle6.re * x815p.re
2801 + self.twiddle1.re * x914p.re
2802 + self.twiddle4.re * x1013p.re
2803 + self.twiddle9.re * x1112p.re;
2804 let b518re_b = self.twiddle5.im * x122n.im
2805 + self.twiddle10.im * x221n.im
2806 + -self.twiddle8.im * x320n.im
2807 + -self.twiddle3.im * x419n.im
2808 + self.twiddle2.im * x518n.im
2809 + self.twiddle7.im * x617n.im
2810 + -self.twiddle11.im * x716n.im
2811 + -self.twiddle6.im * x815n.im
2812 + -self.twiddle1.im * x914n.im
2813 + self.twiddle4.im * x1013n.im
2814 + self.twiddle9.im * x1112n.im;
2815 let b617re_a = buffer.load(0).re
2816 + self.twiddle6.re * x122p.re
2817 + self.twiddle11.re * x221p.re
2818 + self.twiddle5.re * x320p.re
2819 + self.twiddle1.re * x419p.re
2820 + self.twiddle7.re * x518p.re
2821 + self.twiddle10.re * x617p.re
2822 + self.twiddle4.re * x716p.re
2823 + self.twiddle2.re * x815p.re
2824 + self.twiddle8.re * x914p.re
2825 + self.twiddle9.re * x1013p.re
2826 + self.twiddle3.re * x1112p.re;
2827 let b617re_b = self.twiddle6.im * x122n.im
2828 + -self.twiddle11.im * x221n.im
2829 + -self.twiddle5.im * x320n.im
2830 + self.twiddle1.im * x419n.im
2831 + self.twiddle7.im * x518n.im
2832 + -self.twiddle10.im * x617n.im
2833 + -self.twiddle4.im * x716n.im
2834 + self.twiddle2.im * x815n.im
2835 + self.twiddle8.im * x914n.im
2836 + -self.twiddle9.im * x1013n.im
2837 + -self.twiddle3.im * x1112n.im;
2838 let b716re_a = buffer.load(0).re
2839 + self.twiddle7.re * x122p.re
2840 + self.twiddle9.re * x221p.re
2841 + self.twiddle2.re * x320p.re
2842 + self.twiddle5.re * x419p.re
2843 + self.twiddle11.re * x518p.re
2844 + self.twiddle4.re * x617p.re
2845 + self.twiddle3.re * x716p.re
2846 + self.twiddle10.re * x815p.re
2847 + self.twiddle6.re * x914p.re
2848 + self.twiddle1.re * x1013p.re
2849 + self.twiddle8.re * x1112p.re;
2850 let b716re_b = self.twiddle7.im * x122n.im
2851 + -self.twiddle9.im * x221n.im
2852 + -self.twiddle2.im * x320n.im
2853 + self.twiddle5.im * x419n.im
2854 + -self.twiddle11.im * x518n.im
2855 + -self.twiddle4.im * x617n.im
2856 + self.twiddle3.im * x716n.im
2857 + self.twiddle10.im * x815n.im
2858 + -self.twiddle6.im * x914n.im
2859 + self.twiddle1.im * x1013n.im
2860 + self.twiddle8.im * x1112n.im;
2861 let b815re_a = buffer.load(0).re
2862 + self.twiddle8.re * x122p.re
2863 + self.twiddle7.re * x221p.re
2864 + self.twiddle1.re * x320p.re
2865 + self.twiddle9.re * x419p.re
2866 + self.twiddle6.re * x518p.re
2867 + self.twiddle2.re * x617p.re
2868 + self.twiddle10.re * x716p.re
2869 + self.twiddle5.re * x815p.re
2870 + self.twiddle3.re * x914p.re
2871 + self.twiddle11.re * x1013p.re
2872 + self.twiddle4.re * x1112p.re;
2873 let b815re_b = self.twiddle8.im * x122n.im
2874 + -self.twiddle7.im * x221n.im
2875 + self.twiddle1.im * x320n.im
2876 + self.twiddle9.im * x419n.im
2877 + -self.twiddle6.im * x518n.im
2878 + self.twiddle2.im * x617n.im
2879 + self.twiddle10.im * x716n.im
2880 + -self.twiddle5.im * x815n.im
2881 + self.twiddle3.im * x914n.im
2882 + self.twiddle11.im * x1013n.im
2883 + -self.twiddle4.im * x1112n.im;
2884 let b914re_a = buffer.load(0).re
2885 + self.twiddle9.re * x122p.re
2886 + self.twiddle5.re * x221p.re
2887 + self.twiddle4.re * x320p.re
2888 + self.twiddle10.re * x419p.re
2889 + self.twiddle1.re * x518p.re
2890 + self.twiddle8.re * x617p.re
2891 + self.twiddle6.re * x716p.re
2892 + self.twiddle3.re * x815p.re
2893 + self.twiddle11.re * x914p.re
2894 + self.twiddle2.re * x1013p.re
2895 + self.twiddle7.re * x1112p.re;
2896 let b914re_b = self.twiddle9.im * x122n.im
2897 + -self.twiddle5.im * x221n.im
2898 + self.twiddle4.im * x320n.im
2899 + -self.twiddle10.im * x419n.im
2900 + -self.twiddle1.im * x518n.im
2901 + self.twiddle8.im * x617n.im
2902 + -self.twiddle6.im * x716n.im
2903 + self.twiddle3.im * x815n.im
2904 + -self.twiddle11.im * x914n.im
2905 + -self.twiddle2.im * x1013n.im
2906 + self.twiddle7.im * x1112n.im;
2907 let b1013re_a = buffer.load(0).re
2908 + self.twiddle10.re * x122p.re
2909 + self.twiddle3.re * x221p.re
2910 + self.twiddle7.re * x320p.re
2911 + self.twiddle6.re * x419p.re
2912 + self.twiddle4.re * x518p.re
2913 + self.twiddle9.re * x617p.re
2914 + self.twiddle1.re * x716p.re
2915 + self.twiddle11.re * x815p.re
2916 + self.twiddle2.re * x914p.re
2917 + self.twiddle8.re * x1013p.re
2918 + self.twiddle5.re * x1112p.re;
2919 let b1013re_b = self.twiddle10.im * x122n.im
2920 + -self.twiddle3.im * x221n.im
2921 + self.twiddle7.im * x320n.im
2922 + -self.twiddle6.im * x419n.im
2923 + self.twiddle4.im * x518n.im
2924 + -self.twiddle9.im * x617n.im
2925 + self.twiddle1.im * x716n.im
2926 + self.twiddle11.im * x815n.im
2927 + -self.twiddle2.im * x914n.im
2928 + self.twiddle8.im * x1013n.im
2929 + -self.twiddle5.im * x1112n.im;
2930 let b1112re_a = buffer.load(0).re
2931 + self.twiddle11.re * x122p.re
2932 + self.twiddle1.re * x221p.re
2933 + self.twiddle10.re * x320p.re
2934 + self.twiddle2.re * x419p.re
2935 + self.twiddle9.re * x518p.re
2936 + self.twiddle3.re * x617p.re
2937 + self.twiddle8.re * x716p.re
2938 + self.twiddle4.re * x815p.re
2939 + self.twiddle7.re * x914p.re
2940 + self.twiddle5.re * x1013p.re
2941 + self.twiddle6.re * x1112p.re;
2942 let b1112re_b = self.twiddle11.im * x122n.im
2943 + -self.twiddle1.im * x221n.im
2944 + self.twiddle10.im * x320n.im
2945 + -self.twiddle2.im * x419n.im
2946 + self.twiddle9.im * x518n.im
2947 + -self.twiddle3.im * x617n.im
2948 + self.twiddle8.im * x716n.im
2949 + -self.twiddle4.im * x815n.im
2950 + self.twiddle7.im * x914n.im
2951 + -self.twiddle5.im * x1013n.im
2952 + self.twiddle6.im * x1112n.im;
2953
2954 let b122im_a = buffer.load(0).im
2955 + self.twiddle1.re * x122p.im
2956 + self.twiddle2.re * x221p.im
2957 + self.twiddle3.re * x320p.im
2958 + self.twiddle4.re * x419p.im
2959 + self.twiddle5.re * x518p.im
2960 + self.twiddle6.re * x617p.im
2961 + self.twiddle7.re * x716p.im
2962 + self.twiddle8.re * x815p.im
2963 + self.twiddle9.re * x914p.im
2964 + self.twiddle10.re * x1013p.im
2965 + self.twiddle11.re * x1112p.im;
2966 let b122im_b = self.twiddle1.im * x122n.re
2967 + self.twiddle2.im * x221n.re
2968 + self.twiddle3.im * x320n.re
2969 + self.twiddle4.im * x419n.re
2970 + self.twiddle5.im * x518n.re
2971 + self.twiddle6.im * x617n.re
2972 + self.twiddle7.im * x716n.re
2973 + self.twiddle8.im * x815n.re
2974 + self.twiddle9.im * x914n.re
2975 + self.twiddle10.im * x1013n.re
2976 + self.twiddle11.im * x1112n.re;
2977 let b221im_a = buffer.load(0).im
2978 + self.twiddle2.re * x122p.im
2979 + self.twiddle4.re * x221p.im
2980 + self.twiddle6.re * x320p.im
2981 + self.twiddle8.re * x419p.im
2982 + self.twiddle10.re * x518p.im
2983 + self.twiddle11.re * x617p.im
2984 + self.twiddle9.re * x716p.im
2985 + self.twiddle7.re * x815p.im
2986 + self.twiddle5.re * x914p.im
2987 + self.twiddle3.re * x1013p.im
2988 + self.twiddle1.re * x1112p.im;
2989 let b221im_b = self.twiddle2.im * x122n.re
2990 + self.twiddle4.im * x221n.re
2991 + self.twiddle6.im * x320n.re
2992 + self.twiddle8.im * x419n.re
2993 + self.twiddle10.im * x518n.re
2994 + -self.twiddle11.im * x617n.re
2995 + -self.twiddle9.im * x716n.re
2996 + -self.twiddle7.im * x815n.re
2997 + -self.twiddle5.im * x914n.re
2998 + -self.twiddle3.im * x1013n.re
2999 + -self.twiddle1.im * x1112n.re;
3000 let b320im_a = buffer.load(0).im
3001 + self.twiddle3.re * x122p.im
3002 + self.twiddle6.re * x221p.im
3003 + self.twiddle9.re * x320p.im
3004 + self.twiddle11.re * x419p.im
3005 + self.twiddle8.re * x518p.im
3006 + self.twiddle5.re * x617p.im
3007 + self.twiddle2.re * x716p.im
3008 + self.twiddle1.re * x815p.im
3009 + self.twiddle4.re * x914p.im
3010 + self.twiddle7.re * x1013p.im
3011 + self.twiddle10.re * x1112p.im;
3012 let b320im_b = self.twiddle3.im * x122n.re
3013 + self.twiddle6.im * x221n.re
3014 + self.twiddle9.im * x320n.re
3015 + -self.twiddle11.im * x419n.re
3016 + -self.twiddle8.im * x518n.re
3017 + -self.twiddle5.im * x617n.re
3018 + -self.twiddle2.im * x716n.re
3019 + self.twiddle1.im * x815n.re
3020 + self.twiddle4.im * x914n.re
3021 + self.twiddle7.im * x1013n.re
3022 + self.twiddle10.im * x1112n.re;
3023 let b419im_a = buffer.load(0).im
3024 + self.twiddle4.re * x122p.im
3025 + self.twiddle8.re * x221p.im
3026 + self.twiddle11.re * x320p.im
3027 + self.twiddle7.re * x419p.im
3028 + self.twiddle3.re * x518p.im
3029 + self.twiddle1.re * x617p.im
3030 + self.twiddle5.re * x716p.im
3031 + self.twiddle9.re * x815p.im
3032 + self.twiddle10.re * x914p.im
3033 + self.twiddle6.re * x1013p.im
3034 + self.twiddle2.re * x1112p.im;
3035 let b419im_b = self.twiddle4.im * x122n.re
3036 + self.twiddle8.im * x221n.re
3037 + -self.twiddle11.im * x320n.re
3038 + -self.twiddle7.im * x419n.re
3039 + -self.twiddle3.im * x518n.re
3040 + self.twiddle1.im * x617n.re
3041 + self.twiddle5.im * x716n.re
3042 + self.twiddle9.im * x815n.re
3043 + -self.twiddle10.im * x914n.re
3044 + -self.twiddle6.im * x1013n.re
3045 + -self.twiddle2.im * x1112n.re;
3046 let b518im_a = buffer.load(0).im
3047 + self.twiddle5.re * x122p.im
3048 + self.twiddle10.re * x221p.im
3049 + self.twiddle8.re * x320p.im
3050 + self.twiddle3.re * x419p.im
3051 + self.twiddle2.re * x518p.im
3052 + self.twiddle7.re * x617p.im
3053 + self.twiddle11.re * x716p.im
3054 + self.twiddle6.re * x815p.im
3055 + self.twiddle1.re * x914p.im
3056 + self.twiddle4.re * x1013p.im
3057 + self.twiddle9.re * x1112p.im;
3058 let b518im_b = self.twiddle5.im * x122n.re
3059 + self.twiddle10.im * x221n.re
3060 + -self.twiddle8.im * x320n.re
3061 + -self.twiddle3.im * x419n.re
3062 + self.twiddle2.im * x518n.re
3063 + self.twiddle7.im * x617n.re
3064 + -self.twiddle11.im * x716n.re
3065 + -self.twiddle6.im * x815n.re
3066 + -self.twiddle1.im * x914n.re
3067 + self.twiddle4.im * x1013n.re
3068 + self.twiddle9.im * x1112n.re;
3069 let b617im_a = buffer.load(0).im
3070 + self.twiddle6.re * x122p.im
3071 + self.twiddle11.re * x221p.im
3072 + self.twiddle5.re * x320p.im
3073 + self.twiddle1.re * x419p.im
3074 + self.twiddle7.re * x518p.im
3075 + self.twiddle10.re * x617p.im
3076 + self.twiddle4.re * x716p.im
3077 + self.twiddle2.re * x815p.im
3078 + self.twiddle8.re * x914p.im
3079 + self.twiddle9.re * x1013p.im
3080 + self.twiddle3.re * x1112p.im;
3081 let b617im_b = self.twiddle6.im * x122n.re
3082 + -self.twiddle11.im * x221n.re
3083 + -self.twiddle5.im * x320n.re
3084 + self.twiddle1.im * x419n.re
3085 + self.twiddle7.im * x518n.re
3086 + -self.twiddle10.im * x617n.re
3087 + -self.twiddle4.im * x716n.re
3088 + self.twiddle2.im * x815n.re
3089 + self.twiddle8.im * x914n.re
3090 + -self.twiddle9.im * x1013n.re
3091 + -self.twiddle3.im * x1112n.re;
3092 let b716im_a = buffer.load(0).im
3093 + self.twiddle7.re * x122p.im
3094 + self.twiddle9.re * x221p.im
3095 + self.twiddle2.re * x320p.im
3096 + self.twiddle5.re * x419p.im
3097 + self.twiddle11.re * x518p.im
3098 + self.twiddle4.re * x617p.im
3099 + self.twiddle3.re * x716p.im
3100 + self.twiddle10.re * x815p.im
3101 + self.twiddle6.re * x914p.im
3102 + self.twiddle1.re * x1013p.im
3103 + self.twiddle8.re * x1112p.im;
3104 let b716im_b = self.twiddle7.im * x122n.re
3105 + -self.twiddle9.im * x221n.re
3106 + -self.twiddle2.im * x320n.re
3107 + self.twiddle5.im * x419n.re
3108 + -self.twiddle11.im * x518n.re
3109 + -self.twiddle4.im * x617n.re
3110 + self.twiddle3.im * x716n.re
3111 + self.twiddle10.im * x815n.re
3112 + -self.twiddle6.im * x914n.re
3113 + self.twiddle1.im * x1013n.re
3114 + self.twiddle8.im * x1112n.re;
3115 let b815im_a = buffer.load(0).im
3116 + self.twiddle8.re * x122p.im
3117 + self.twiddle7.re * x221p.im
3118 + self.twiddle1.re * x320p.im
3119 + self.twiddle9.re * x419p.im
3120 + self.twiddle6.re * x518p.im
3121 + self.twiddle2.re * x617p.im
3122 + self.twiddle10.re * x716p.im
3123 + self.twiddle5.re * x815p.im
3124 + self.twiddle3.re * x914p.im
3125 + self.twiddle11.re * x1013p.im
3126 + self.twiddle4.re * x1112p.im;
3127 let b815im_b = self.twiddle8.im * x122n.re
3128 + -self.twiddle7.im * x221n.re
3129 + self.twiddle1.im * x320n.re
3130 + self.twiddle9.im * x419n.re
3131 + -self.twiddle6.im * x518n.re
3132 + self.twiddle2.im * x617n.re
3133 + self.twiddle10.im * x716n.re
3134 + -self.twiddle5.im * x815n.re
3135 + self.twiddle3.im * x914n.re
3136 + self.twiddle11.im * x1013n.re
3137 + -self.twiddle4.im * x1112n.re;
3138 let b914im_a = buffer.load(0).im
3139 + self.twiddle9.re * x122p.im
3140 + self.twiddle5.re * x221p.im
3141 + self.twiddle4.re * x320p.im
3142 + self.twiddle10.re * x419p.im
3143 + self.twiddle1.re * x518p.im
3144 + self.twiddle8.re * x617p.im
3145 + self.twiddle6.re * x716p.im
3146 + self.twiddle3.re * x815p.im
3147 + self.twiddle11.re * x914p.im
3148 + self.twiddle2.re * x1013p.im
3149 + self.twiddle7.re * x1112p.im;
3150 let b914im_b = self.twiddle9.im * x122n.re
3151 + -self.twiddle5.im * x221n.re
3152 + self.twiddle4.im * x320n.re
3153 + -self.twiddle10.im * x419n.re
3154 + -self.twiddle1.im * x518n.re
3155 + self.twiddle8.im * x617n.re
3156 + -self.twiddle6.im * x716n.re
3157 + self.twiddle3.im * x815n.re
3158 + -self.twiddle11.im * x914n.re
3159 + -self.twiddle2.im * x1013n.re
3160 + self.twiddle7.im * x1112n.re;
3161 let b1013im_a = buffer.load(0).im
3162 + self.twiddle10.re * x122p.im
3163 + self.twiddle3.re * x221p.im
3164 + self.twiddle7.re * x320p.im
3165 + self.twiddle6.re * x419p.im
3166 + self.twiddle4.re * x518p.im
3167 + self.twiddle9.re * x617p.im
3168 + self.twiddle1.re * x716p.im
3169 + self.twiddle11.re * x815p.im
3170 + self.twiddle2.re * x914p.im
3171 + self.twiddle8.re * x1013p.im
3172 + self.twiddle5.re * x1112p.im;
3173 let b1013im_b = self.twiddle10.im * x122n.re
3174 + -self.twiddle3.im * x221n.re
3175 + self.twiddle7.im * x320n.re
3176 + -self.twiddle6.im * x419n.re
3177 + self.twiddle4.im * x518n.re
3178 + -self.twiddle9.im * x617n.re
3179 + self.twiddle1.im * x716n.re
3180 + self.twiddle11.im * x815n.re
3181 + -self.twiddle2.im * x914n.re
3182 + self.twiddle8.im * x1013n.re
3183 + -self.twiddle5.im * x1112n.re;
3184 let b1112im_a = buffer.load(0).im
3185 + self.twiddle11.re * x122p.im
3186 + self.twiddle1.re * x221p.im
3187 + self.twiddle10.re * x320p.im
3188 + self.twiddle2.re * x419p.im
3189 + self.twiddle9.re * x518p.im
3190 + self.twiddle3.re * x617p.im
3191 + self.twiddle8.re * x716p.im
3192 + self.twiddle4.re * x815p.im
3193 + self.twiddle7.re * x914p.im
3194 + self.twiddle5.re * x1013p.im
3195 + self.twiddle6.re * x1112p.im;
3196 let b1112im_b = self.twiddle11.im * x122n.re
3197 + -self.twiddle1.im * x221n.re
3198 + self.twiddle10.im * x320n.re
3199 + -self.twiddle2.im * x419n.re
3200 + self.twiddle9.im * x518n.re
3201 + -self.twiddle3.im * x617n.re
3202 + self.twiddle8.im * x716n.re
3203 + -self.twiddle4.im * x815n.re
3204 + self.twiddle7.im * x914n.re
3205 + -self.twiddle5.im * x1013n.re
3206 + self.twiddle6.im * x1112n.re;
3207
3208 let out1re = b122re_a - b122re_b;
3209 let out1im = b122im_a + b122im_b;
3210 let out2re = b221re_a - b221re_b;
3211 let out2im = b221im_a + b221im_b;
3212 let out3re = b320re_a - b320re_b;
3213 let out3im = b320im_a + b320im_b;
3214 let out4re = b419re_a - b419re_b;
3215 let out4im = b419im_a + b419im_b;
3216 let out5re = b518re_a - b518re_b;
3217 let out5im = b518im_a + b518im_b;
3218 let out6re = b617re_a - b617re_b;
3219 let out6im = b617im_a + b617im_b;
3220 let out7re = b716re_a - b716re_b;
3221 let out7im = b716im_a + b716im_b;
3222 let out8re = b815re_a - b815re_b;
3223 let out8im = b815im_a + b815im_b;
3224 let out9re = b914re_a - b914re_b;
3225 let out9im = b914im_a + b914im_b;
3226 let out10re = b1013re_a - b1013re_b;
3227 let out10im = b1013im_a + b1013im_b;
3228 let out11re = b1112re_a - b1112re_b;
3229 let out11im = b1112im_a + b1112im_b;
3230 let out12re = b1112re_a + b1112re_b;
3231 let out12im = b1112im_a - b1112im_b;
3232 let out13re = b1013re_a + b1013re_b;
3233 let out13im = b1013im_a - b1013im_b;
3234 let out14re = b914re_a + b914re_b;
3235 let out14im = b914im_a - b914im_b;
3236 let out15re = b815re_a + b815re_b;
3237 let out15im = b815im_a - b815im_b;
3238 let out16re = b716re_a + b716re_b;
3239 let out16im = b716im_a - b716im_b;
3240 let out17re = b617re_a + b617re_b;
3241 let out17im = b617im_a - b617im_b;
3242 let out18re = b518re_a + b518re_b;
3243 let out18im = b518im_a - b518im_b;
3244 let out19re = b419re_a + b419re_b;
3245 let out19im = b419im_a - b419im_b;
3246 let out20re = b320re_a + b320re_b;
3247 let out20im = b320im_a - b320im_b;
3248 let out21re = b221re_a + b221re_b;
3249 let out21im = b221im_a - b221im_b;
3250 let out22re = b122re_a + b122re_b;
3251 let out22im = b122im_a - b122im_b;
3252 buffer.store(sum, 0);
3253 buffer.store(
3254 Complex {
3255 re: out1re,
3256 im: out1im,
3257 },
3258 1,
3259 );
3260 buffer.store(
3261 Complex {
3262 re: out2re,
3263 im: out2im,
3264 },
3265 2,
3266 );
3267 buffer.store(
3268 Complex {
3269 re: out3re,
3270 im: out3im,
3271 },
3272 3,
3273 );
3274 buffer.store(
3275 Complex {
3276 re: out4re,
3277 im: out4im,
3278 },
3279 4,
3280 );
3281 buffer.store(
3282 Complex {
3283 re: out5re,
3284 im: out5im,
3285 },
3286 5,
3287 );
3288 buffer.store(
3289 Complex {
3290 re: out6re,
3291 im: out6im,
3292 },
3293 6,
3294 );
3295 buffer.store(
3296 Complex {
3297 re: out7re,
3298 im: out7im,
3299 },
3300 7,
3301 );
3302 buffer.store(
3303 Complex {
3304 re: out8re,
3305 im: out8im,
3306 },
3307 8,
3308 );
3309 buffer.store(
3310 Complex {
3311 re: out9re,
3312 im: out9im,
3313 },
3314 9,
3315 );
3316 buffer.store(
3317 Complex {
3318 re: out10re,
3319 im: out10im,
3320 },
3321 10,
3322 );
3323 buffer.store(
3324 Complex {
3325 re: out11re,
3326 im: out11im,
3327 },
3328 11,
3329 );
3330 buffer.store(
3331 Complex {
3332 re: out12re,
3333 im: out12im,
3334 },
3335 12,
3336 );
3337 buffer.store(
3338 Complex {
3339 re: out13re,
3340 im: out13im,
3341 },
3342 13,
3343 );
3344 buffer.store(
3345 Complex {
3346 re: out14re,
3347 im: out14im,
3348 },
3349 14,
3350 );
3351 buffer.store(
3352 Complex {
3353 re: out15re,
3354 im: out15im,
3355 },
3356 15,
3357 );
3358 buffer.store(
3359 Complex {
3360 re: out16re,
3361 im: out16im,
3362 },
3363 16,
3364 );
3365 buffer.store(
3366 Complex {
3367 re: out17re,
3368 im: out17im,
3369 },
3370 17,
3371 );
3372 buffer.store(
3373 Complex {
3374 re: out18re,
3375 im: out18im,
3376 },
3377 18,
3378 );
3379 buffer.store(
3380 Complex {
3381 re: out19re,
3382 im: out19im,
3383 },
3384 19,
3385 );
3386 buffer.store(
3387 Complex {
3388 re: out20re,
3389 im: out20im,
3390 },
3391 20,
3392 );
3393 buffer.store(
3394 Complex {
3395 re: out21re,
3396 im: out21im,
3397 },
3398 21,
3399 );
3400 buffer.store(
3401 Complex {
3402 re: out22re,
3403 im: out22im,
3404 },
3405 22,
3406 );
3407 }
3408}
3409
3410pub struct Butterfly24<T> {
3411 butterfly4: Butterfly4<T>,
3412 butterfly6: Butterfly6<T>,
3413 twiddle1: Complex<T>,
3414 twiddle2: Complex<T>,
3415 twiddle4: Complex<T>,
3416 twiddle5: Complex<T>,
3417 twiddle8: Complex<T>,
3418 twiddle10: Complex<T>,
3419 root2: T,
3420}
3421boilerplate_fft_butterfly!(Butterfly24, 24, |this: &Butterfly24<_>| this
3422 .butterfly4
3423 .fft_direction());
3424impl<T: FftNum> Butterfly24<T> {
3425 #[inline(always)]
3426 pub fn new(direction: FftDirection) -> Self {
3427 Self {
3428 butterfly4: Butterfly4::new(direction),
3429 butterfly6: Butterfly6::new(direction),
3430 twiddle1: twiddles::compute_twiddle(1, 24, direction),
3431 twiddle2: twiddles::compute_twiddle(2, 24, direction),
3432 twiddle4: twiddles::compute_twiddle(4, 24, direction),
3433 twiddle5: twiddles::compute_twiddle(5, 24, direction),
3434 twiddle8: twiddles::compute_twiddle(8, 24, direction),
3435 twiddle10: twiddles::compute_twiddle(10, 24, direction),
3436 root2: T::from_f64(0.5f64.sqrt()).unwrap(),
3437 }
3438 }
3439 #[inline(never)]
3440 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3441 let mut scratch0 = [
3444 buffer.load(0),
3445 buffer.load(4),
3446 buffer.load(8),
3447 buffer.load(12),
3448 buffer.load(16),
3449 buffer.load(20),
3450 ];
3451 let mut scratch1 = [
3452 buffer.load(1),
3453 buffer.load(5),
3454 buffer.load(9),
3455 buffer.load(13),
3456 buffer.load(17),
3457 buffer.load(21),
3458 ];
3459 let mut scratch2 = [
3460 buffer.load(2),
3461 buffer.load(6),
3462 buffer.load(10),
3463 buffer.load(14),
3464 buffer.load(18),
3465 buffer.load(22),
3466 ];
3467 let mut scratch3 = [
3468 buffer.load(3),
3469 buffer.load(7),
3470 buffer.load(11),
3471 buffer.load(15),
3472 buffer.load(19),
3473 buffer.load(23),
3474 ];
3475
3476 self.butterfly6.perform_fft_contiguous(&mut scratch0);
3478 self.butterfly6.perform_fft_contiguous(&mut scratch1);
3479 self.butterfly6.perform_fft_contiguous(&mut scratch2);
3480 self.butterfly6.perform_fft_contiguous(&mut scratch3);
3481
3482 scratch1[1] = scratch1[1] * self.twiddle1;
3484 scratch1[2] = scratch1[2] * self.twiddle2;
3485 scratch1[3] =
3486 (twiddles::rotate_90(scratch1[3], self.fft_direction()) + scratch1[3]) * self.root2;
3487 scratch1[4] = scratch1[4] * self.twiddle4;
3488 scratch1[5] = scratch1[5] * self.twiddle5;
3489 scratch2[1] = scratch2[1] * self.twiddle2;
3490 scratch2[2] = scratch2[2] * self.twiddle4;
3491 scratch2[3] = twiddles::rotate_90(scratch2[3], self.fft_direction());
3492 scratch2[4] = scratch2[4] * self.twiddle8;
3493 scratch2[5] = scratch2[5] * self.twiddle10;
3494 scratch3[1] =
3495 (twiddles::rotate_90(scratch3[1], self.fft_direction()) + scratch3[1]) * self.root2;
3496 scratch3[2] = twiddles::rotate_90(scratch3[2], self.fft_direction());
3497 scratch3[3] =
3498 (twiddles::rotate_90(scratch3[3], self.fft_direction()) - scratch3[3]) * self.root2;
3499 scratch3[4] = -scratch3[4];
3500 scratch3[5] =
3501 (twiddles::rotate_90(scratch3[5], self.fft_direction()) + scratch3[5]) * -self.root2;
3502
3503 self.butterfly4.perform_fft_strided(
3507 &mut scratch0[0],
3508 &mut scratch1[0],
3509 &mut scratch2[0],
3510 &mut scratch3[0],
3511 );
3512 self.butterfly4.perform_fft_strided(
3513 &mut scratch0[1],
3514 &mut scratch1[1],
3515 &mut scratch2[1],
3516 &mut scratch3[1],
3517 );
3518 self.butterfly4.perform_fft_strided(
3519 &mut scratch0[2],
3520 &mut scratch1[2],
3521 &mut scratch2[2],
3522 &mut scratch3[2],
3523 );
3524 self.butterfly4.perform_fft_strided(
3525 &mut scratch0[3],
3526 &mut scratch1[3],
3527 &mut scratch2[3],
3528 &mut scratch3[3],
3529 );
3530 self.butterfly4.perform_fft_strided(
3531 &mut scratch0[4],
3532 &mut scratch1[4],
3533 &mut scratch2[4],
3534 &mut scratch3[4],
3535 );
3536 self.butterfly4.perform_fft_strided(
3537 &mut scratch0[5],
3538 &mut scratch1[5],
3539 &mut scratch2[5],
3540 &mut scratch3[5],
3541 );
3542
3543 buffer.store(scratch0[0], 0);
3545 buffer.store(scratch0[1], 1);
3546 buffer.store(scratch0[2], 2);
3547 buffer.store(scratch0[3], 3);
3548 buffer.store(scratch0[4], 4);
3549 buffer.store(scratch0[5], 5);
3550 buffer.store(scratch1[0], 6);
3551 buffer.store(scratch1[1], 7);
3552 buffer.store(scratch1[2], 8);
3553 buffer.store(scratch1[3], 9);
3554 buffer.store(scratch1[4], 10);
3555 buffer.store(scratch1[5], 11);
3556 buffer.store(scratch2[0], 12);
3557 buffer.store(scratch2[1], 13);
3558 buffer.store(scratch2[2], 14);
3559 buffer.store(scratch2[3], 15);
3560 buffer.store(scratch2[4], 16);
3561 buffer.store(scratch2[5], 17);
3562 buffer.store(scratch3[0], 18);
3563 buffer.store(scratch3[1], 19);
3564 buffer.store(scratch3[2], 20);
3565 buffer.store(scratch3[3], 21);
3566 buffer.store(scratch3[4], 22);
3567 buffer.store(scratch3[5], 23);
3568 }
3569}
3570
3571pub struct Butterfly27<T> {
3572 butterfly9: Butterfly9<T>,
3573 twiddles: [Complex<T>; 12],
3574}
3575boilerplate_fft_butterfly!(Butterfly27, 27, |this: &Butterfly27<_>| this
3576 .butterfly9
3577 .fft_direction());
3578impl<T: FftNum> Butterfly27<T> {
3579 #[inline(always)]
3580 pub fn new(direction: FftDirection) -> Self {
3581 Self {
3582 butterfly9: Butterfly9::new(direction),
3583 twiddles: [
3584 twiddles::compute_twiddle(1, 27, direction),
3585 twiddles::compute_twiddle(2, 27, direction),
3586 twiddles::compute_twiddle(3, 27, direction),
3587 twiddles::compute_twiddle(4, 27, direction),
3588 twiddles::compute_twiddle(5, 27, direction),
3589 twiddles::compute_twiddle(6, 27, direction),
3590 twiddles::compute_twiddle(7, 27, direction),
3591 twiddles::compute_twiddle(8, 27, direction),
3592 twiddles::compute_twiddle(10, 27, direction),
3593 twiddles::compute_twiddle(12, 27, direction),
3594 twiddles::compute_twiddle(14, 27, direction),
3595 twiddles::compute_twiddle(16, 27, direction),
3596 ],
3597 }
3598 }
3599
3600 #[inline(always)]
3601 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3602 let mut scratch0 = [
3606 buffer.load(0),
3607 buffer.load(3),
3608 buffer.load(6),
3609 buffer.load(9),
3610 buffer.load(12),
3611 buffer.load(15),
3612 buffer.load(18),
3613 buffer.load(21),
3614 buffer.load(24),
3615 ];
3616 let mut scratch1 = [
3617 buffer.load(1 + 0),
3618 buffer.load(1 + 3),
3619 buffer.load(1 + 6),
3620 buffer.load(1 + 9),
3621 buffer.load(1 + 12),
3622 buffer.load(1 + 15),
3623 buffer.load(1 + 18),
3624 buffer.load(1 + 21),
3625 buffer.load(1 + 24),
3626 ];
3627 let mut scratch2 = [
3628 buffer.load(2 + 0),
3629 buffer.load(2 + 3),
3630 buffer.load(2 + 6),
3631 buffer.load(2 + 9),
3632 buffer.load(2 + 12),
3633 buffer.load(2 + 15),
3634 buffer.load(2 + 18),
3635 buffer.load(2 + 21),
3636 buffer.load(2 + 24),
3637 ];
3638
3639 self.butterfly9.perform_fft_contiguous(&mut scratch0);
3641 self.butterfly9.perform_fft_contiguous(&mut scratch1);
3642 self.butterfly9.perform_fft_contiguous(&mut scratch2);
3643
3644 scratch1[1] = scratch1[1] * self.twiddles[0];
3646 scratch1[2] = scratch1[2] * self.twiddles[1];
3647 scratch1[3] = scratch1[3] * self.twiddles[2];
3648 scratch1[4] = scratch1[4] * self.twiddles[3];
3649 scratch1[5] = scratch1[5] * self.twiddles[4];
3650 scratch1[6] = scratch1[6] * self.twiddles[5];
3651 scratch1[7] = scratch1[7] * self.twiddles[6];
3652 scratch1[8] = scratch1[8] * self.twiddles[7];
3653 scratch2[1] = scratch2[1] * self.twiddles[1];
3654 scratch2[2] = scratch2[2] * self.twiddles[3];
3655 scratch2[3] = scratch2[3] * self.twiddles[5];
3656 scratch2[4] = scratch2[4] * self.twiddles[7];
3657 scratch2[5] = scratch2[5] * self.twiddles[8];
3658 scratch2[6] = scratch2[6] * self.twiddles[9];
3659 scratch2[7] = scratch2[7] * self.twiddles[10];
3660 scratch2[8] = scratch2[8] * self.twiddles[11];
3661
3662 self.butterfly9.butterfly3.perform_fft_strided(
3666 &mut scratch0[0],
3667 &mut scratch1[0],
3668 &mut scratch2[0],
3669 );
3670 self.butterfly9.butterfly3.perform_fft_strided(
3671 &mut scratch0[1],
3672 &mut scratch1[1],
3673 &mut scratch2[1],
3674 );
3675 self.butterfly9.butterfly3.perform_fft_strided(
3676 &mut scratch0[2],
3677 &mut scratch1[2],
3678 &mut scratch2[2],
3679 );
3680 self.butterfly9.butterfly3.perform_fft_strided(
3681 &mut scratch0[3],
3682 &mut scratch1[3],
3683 &mut scratch2[3],
3684 );
3685 self.butterfly9.butterfly3.perform_fft_strided(
3686 &mut scratch0[4],
3687 &mut scratch1[4],
3688 &mut scratch2[4],
3689 );
3690 self.butterfly9.butterfly3.perform_fft_strided(
3691 &mut scratch0[5],
3692 &mut scratch1[5],
3693 &mut scratch2[5],
3694 );
3695 self.butterfly9.butterfly3.perform_fft_strided(
3696 &mut scratch0[6],
3697 &mut scratch1[6],
3698 &mut scratch2[6],
3699 );
3700 self.butterfly9.butterfly3.perform_fft_strided(
3701 &mut scratch0[7],
3702 &mut scratch1[7],
3703 &mut scratch2[7],
3704 );
3705 self.butterfly9.butterfly3.perform_fft_strided(
3706 &mut scratch0[8],
3707 &mut scratch1[8],
3708 &mut scratch2[8],
3709 );
3710
3711 buffer.store(scratch0[0], 0);
3713 buffer.store(scratch0[1], 1);
3714 buffer.store(scratch0[2], 2);
3715 buffer.store(scratch0[3], 3);
3716 buffer.store(scratch0[4], 4);
3717 buffer.store(scratch0[5], 5);
3718 buffer.store(scratch0[6], 6);
3719 buffer.store(scratch0[7], 7);
3720 buffer.store(scratch0[8], 8);
3721
3722 buffer.store(scratch1[0], 9 + 0);
3723 buffer.store(scratch1[1], 9 + 1);
3724 buffer.store(scratch1[2], 9 + 2);
3725 buffer.store(scratch1[3], 9 + 3);
3726 buffer.store(scratch1[4], 9 + 4);
3727 buffer.store(scratch1[5], 9 + 5);
3728 buffer.store(scratch1[6], 9 + 6);
3729 buffer.store(scratch1[7], 9 + 7);
3730 buffer.store(scratch1[8], 9 + 8);
3731
3732 buffer.store(scratch2[0], 18 + 0);
3733 buffer.store(scratch2[1], 18 + 1);
3734 buffer.store(scratch2[2], 18 + 2);
3735 buffer.store(scratch2[3], 18 + 3);
3736 buffer.store(scratch2[4], 18 + 4);
3737 buffer.store(scratch2[5], 18 + 5);
3738 buffer.store(scratch2[6], 18 + 6);
3739 buffer.store(scratch2[7], 18 + 7);
3740 buffer.store(scratch2[8], 18 + 8);
3741 }
3742}
3743
3744pub struct Butterfly29<T> {
3745 twiddle1: Complex<T>,
3746 twiddle2: Complex<T>,
3747 twiddle3: Complex<T>,
3748 twiddle4: Complex<T>,
3749 twiddle5: Complex<T>,
3750 twiddle6: Complex<T>,
3751 twiddle7: Complex<T>,
3752 twiddle8: Complex<T>,
3753 twiddle9: Complex<T>,
3754 twiddle10: Complex<T>,
3755 twiddle11: Complex<T>,
3756 twiddle12: Complex<T>,
3757 twiddle13: Complex<T>,
3758 twiddle14: Complex<T>,
3759 direction: FftDirection,
3760}
3761boilerplate_fft_butterfly!(Butterfly29, 29, |this: &Butterfly29<_>| this.direction);
3762impl<T: FftNum> Butterfly29<T> {
3763 pub fn new(direction: FftDirection) -> Self {
3764 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 29, direction);
3765 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 29, direction);
3766 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 29, direction);
3767 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 29, direction);
3768 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 29, direction);
3769 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 29, direction);
3770 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 29, direction);
3771 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 29, direction);
3772 let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 29, direction);
3773 let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 29, direction);
3774 let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 29, direction);
3775 let twiddle12: Complex<T> = twiddles::compute_twiddle(12, 29, direction);
3776 let twiddle13: Complex<T> = twiddles::compute_twiddle(13, 29, direction);
3777 let twiddle14: Complex<T> = twiddles::compute_twiddle(14, 29, direction);
3778 Self {
3779 twiddle1,
3780 twiddle2,
3781 twiddle3,
3782 twiddle4,
3783 twiddle5,
3784 twiddle6,
3785 twiddle7,
3786 twiddle8,
3787 twiddle9,
3788 twiddle10,
3789 twiddle11,
3790 twiddle12,
3791 twiddle13,
3792 twiddle14,
3793 direction,
3794 }
3795 }
3796
3797 #[inline(never)]
3798 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3799 let x128p = buffer.load(1) + buffer.load(28);
3803 let x128n = buffer.load(1) - buffer.load(28);
3804 let x227p = buffer.load(2) + buffer.load(27);
3805 let x227n = buffer.load(2) - buffer.load(27);
3806 let x326p = buffer.load(3) + buffer.load(26);
3807 let x326n = buffer.load(3) - buffer.load(26);
3808 let x425p = buffer.load(4) + buffer.load(25);
3809 let x425n = buffer.load(4) - buffer.load(25);
3810 let x524p = buffer.load(5) + buffer.load(24);
3811 let x524n = buffer.load(5) - buffer.load(24);
3812 let x623p = buffer.load(6) + buffer.load(23);
3813 let x623n = buffer.load(6) - buffer.load(23);
3814 let x722p = buffer.load(7) + buffer.load(22);
3815 let x722n = buffer.load(7) - buffer.load(22);
3816 let x821p = buffer.load(8) + buffer.load(21);
3817 let x821n = buffer.load(8) - buffer.load(21);
3818 let x920p = buffer.load(9) + buffer.load(20);
3819 let x920n = buffer.load(9) - buffer.load(20);
3820 let x1019p = buffer.load(10) + buffer.load(19);
3821 let x1019n = buffer.load(10) - buffer.load(19);
3822 let x1118p = buffer.load(11) + buffer.load(18);
3823 let x1118n = buffer.load(11) - buffer.load(18);
3824 let x1217p = buffer.load(12) + buffer.load(17);
3825 let x1217n = buffer.load(12) - buffer.load(17);
3826 let x1316p = buffer.load(13) + buffer.load(16);
3827 let x1316n = buffer.load(13) - buffer.load(16);
3828 let x1415p = buffer.load(14) + buffer.load(15);
3829 let x1415n = buffer.load(14) - buffer.load(15);
3830 let sum = buffer.load(0)
3831 + x128p
3832 + x227p
3833 + x326p
3834 + x425p
3835 + x524p
3836 + x623p
3837 + x722p
3838 + x821p
3839 + x920p
3840 + x1019p
3841 + x1118p
3842 + x1217p
3843 + x1316p
3844 + x1415p;
3845 let b128re_a = buffer.load(0).re
3846 + self.twiddle1.re * x128p.re
3847 + self.twiddle2.re * x227p.re
3848 + self.twiddle3.re * x326p.re
3849 + self.twiddle4.re * x425p.re
3850 + self.twiddle5.re * x524p.re
3851 + self.twiddle6.re * x623p.re
3852 + self.twiddle7.re * x722p.re
3853 + self.twiddle8.re * x821p.re
3854 + self.twiddle9.re * x920p.re
3855 + self.twiddle10.re * x1019p.re
3856 + self.twiddle11.re * x1118p.re
3857 + self.twiddle12.re * x1217p.re
3858 + self.twiddle13.re * x1316p.re
3859 + self.twiddle14.re * x1415p.re;
3860 let b128re_b = self.twiddle1.im * x128n.im
3861 + self.twiddle2.im * x227n.im
3862 + self.twiddle3.im * x326n.im
3863 + self.twiddle4.im * x425n.im
3864 + self.twiddle5.im * x524n.im
3865 + self.twiddle6.im * x623n.im
3866 + self.twiddle7.im * x722n.im
3867 + self.twiddle8.im * x821n.im
3868 + self.twiddle9.im * x920n.im
3869 + self.twiddle10.im * x1019n.im
3870 + self.twiddle11.im * x1118n.im
3871 + self.twiddle12.im * x1217n.im
3872 + self.twiddle13.im * x1316n.im
3873 + self.twiddle14.im * x1415n.im;
3874 let b227re_a = buffer.load(0).re
3875 + self.twiddle2.re * x128p.re
3876 + self.twiddle4.re * x227p.re
3877 + self.twiddle6.re * x326p.re
3878 + self.twiddle8.re * x425p.re
3879 + self.twiddle10.re * x524p.re
3880 + self.twiddle12.re * x623p.re
3881 + self.twiddle14.re * x722p.re
3882 + self.twiddle13.re * x821p.re
3883 + self.twiddle11.re * x920p.re
3884 + self.twiddle9.re * x1019p.re
3885 + self.twiddle7.re * x1118p.re
3886 + self.twiddle5.re * x1217p.re
3887 + self.twiddle3.re * x1316p.re
3888 + self.twiddle1.re * x1415p.re;
3889 let b227re_b = self.twiddle2.im * x128n.im
3890 + self.twiddle4.im * x227n.im
3891 + self.twiddle6.im * x326n.im
3892 + self.twiddle8.im * x425n.im
3893 + self.twiddle10.im * x524n.im
3894 + self.twiddle12.im * x623n.im
3895 + self.twiddle14.im * x722n.im
3896 + -self.twiddle13.im * x821n.im
3897 + -self.twiddle11.im * x920n.im
3898 + -self.twiddle9.im * x1019n.im
3899 + -self.twiddle7.im * x1118n.im
3900 + -self.twiddle5.im * x1217n.im
3901 + -self.twiddle3.im * x1316n.im
3902 + -self.twiddle1.im * x1415n.im;
3903 let b326re_a = buffer.load(0).re
3904 + self.twiddle3.re * x128p.re
3905 + self.twiddle6.re * x227p.re
3906 + self.twiddle9.re * x326p.re
3907 + self.twiddle12.re * x425p.re
3908 + self.twiddle14.re * x524p.re
3909 + self.twiddle11.re * x623p.re
3910 + self.twiddle8.re * x722p.re
3911 + self.twiddle5.re * x821p.re
3912 + self.twiddle2.re * x920p.re
3913 + self.twiddle1.re * x1019p.re
3914 + self.twiddle4.re * x1118p.re
3915 + self.twiddle7.re * x1217p.re
3916 + self.twiddle10.re * x1316p.re
3917 + self.twiddle13.re * x1415p.re;
3918 let b326re_b = self.twiddle3.im * x128n.im
3919 + self.twiddle6.im * x227n.im
3920 + self.twiddle9.im * x326n.im
3921 + self.twiddle12.im * x425n.im
3922 + -self.twiddle14.im * x524n.im
3923 + -self.twiddle11.im * x623n.im
3924 + -self.twiddle8.im * x722n.im
3925 + -self.twiddle5.im * x821n.im
3926 + -self.twiddle2.im * x920n.im
3927 + self.twiddle1.im * x1019n.im
3928 + self.twiddle4.im * x1118n.im
3929 + self.twiddle7.im * x1217n.im
3930 + self.twiddle10.im * x1316n.im
3931 + self.twiddle13.im * x1415n.im;
3932 let b425re_a = buffer.load(0).re
3933 + self.twiddle4.re * x128p.re
3934 + self.twiddle8.re * x227p.re
3935 + self.twiddle12.re * x326p.re
3936 + self.twiddle13.re * x425p.re
3937 + self.twiddle9.re * x524p.re
3938 + self.twiddle5.re * x623p.re
3939 + self.twiddle1.re * x722p.re
3940 + self.twiddle3.re * x821p.re
3941 + self.twiddle7.re * x920p.re
3942 + self.twiddle11.re * x1019p.re
3943 + self.twiddle14.re * x1118p.re
3944 + self.twiddle10.re * x1217p.re
3945 + self.twiddle6.re * x1316p.re
3946 + self.twiddle2.re * x1415p.re;
3947 let b425re_b = self.twiddle4.im * x128n.im
3948 + self.twiddle8.im * x227n.im
3949 + self.twiddle12.im * x326n.im
3950 + -self.twiddle13.im * x425n.im
3951 + -self.twiddle9.im * x524n.im
3952 + -self.twiddle5.im * x623n.im
3953 + -self.twiddle1.im * x722n.im
3954 + self.twiddle3.im * x821n.im
3955 + self.twiddle7.im * x920n.im
3956 + self.twiddle11.im * x1019n.im
3957 + -self.twiddle14.im * x1118n.im
3958 + -self.twiddle10.im * x1217n.im
3959 + -self.twiddle6.im * x1316n.im
3960 + -self.twiddle2.im * x1415n.im;
3961 let b524re_a = buffer.load(0).re
3962 + self.twiddle5.re * x128p.re
3963 + self.twiddle10.re * x227p.re
3964 + self.twiddle14.re * x326p.re
3965 + self.twiddle9.re * x425p.re
3966 + self.twiddle4.re * x524p.re
3967 + self.twiddle1.re * x623p.re
3968 + self.twiddle6.re * x722p.re
3969 + self.twiddle11.re * x821p.re
3970 + self.twiddle13.re * x920p.re
3971 + self.twiddle8.re * x1019p.re
3972 + self.twiddle3.re * x1118p.re
3973 + self.twiddle2.re * x1217p.re
3974 + self.twiddle7.re * x1316p.re
3975 + self.twiddle12.re * x1415p.re;
3976 let b524re_b = self.twiddle5.im * x128n.im
3977 + self.twiddle10.im * x227n.im
3978 + -self.twiddle14.im * x326n.im
3979 + -self.twiddle9.im * x425n.im
3980 + -self.twiddle4.im * x524n.im
3981 + self.twiddle1.im * x623n.im
3982 + self.twiddle6.im * x722n.im
3983 + self.twiddle11.im * x821n.im
3984 + -self.twiddle13.im * x920n.im
3985 + -self.twiddle8.im * x1019n.im
3986 + -self.twiddle3.im * x1118n.im
3987 + self.twiddle2.im * x1217n.im
3988 + self.twiddle7.im * x1316n.im
3989 + self.twiddle12.im * x1415n.im;
3990 let b623re_a = buffer.load(0).re
3991 + self.twiddle6.re * x128p.re
3992 + self.twiddle12.re * x227p.re
3993 + self.twiddle11.re * x326p.re
3994 + self.twiddle5.re * x425p.re
3995 + self.twiddle1.re * x524p.re
3996 + self.twiddle7.re * x623p.re
3997 + self.twiddle13.re * x722p.re
3998 + self.twiddle10.re * x821p.re
3999 + self.twiddle4.re * x920p.re
4000 + self.twiddle2.re * x1019p.re
4001 + self.twiddle8.re * x1118p.re
4002 + self.twiddle14.re * x1217p.re
4003 + self.twiddle9.re * x1316p.re
4004 + self.twiddle3.re * x1415p.re;
4005 let b623re_b = self.twiddle6.im * x128n.im
4006 + self.twiddle12.im * x227n.im
4007 + -self.twiddle11.im * x326n.im
4008 + -self.twiddle5.im * x425n.im
4009 + self.twiddle1.im * x524n.im
4010 + self.twiddle7.im * x623n.im
4011 + self.twiddle13.im * x722n.im
4012 + -self.twiddle10.im * x821n.im
4013 + -self.twiddle4.im * x920n.im
4014 + self.twiddle2.im * x1019n.im
4015 + self.twiddle8.im * x1118n.im
4016 + self.twiddle14.im * x1217n.im
4017 + -self.twiddle9.im * x1316n.im
4018 + -self.twiddle3.im * x1415n.im;
4019 let b722re_a = buffer.load(0).re
4020 + self.twiddle7.re * x128p.re
4021 + self.twiddle14.re * x227p.re
4022 + self.twiddle8.re * x326p.re
4023 + self.twiddle1.re * x425p.re
4024 + self.twiddle6.re * x524p.re
4025 + self.twiddle13.re * x623p.re
4026 + self.twiddle9.re * x722p.re
4027 + self.twiddle2.re * x821p.re
4028 + self.twiddle5.re * x920p.re
4029 + self.twiddle12.re * x1019p.re
4030 + self.twiddle10.re * x1118p.re
4031 + self.twiddle3.re * x1217p.re
4032 + self.twiddle4.re * x1316p.re
4033 + self.twiddle11.re * x1415p.re;
4034 let b722re_b = self.twiddle7.im * x128n.im
4035 + self.twiddle14.im * x227n.im
4036 + -self.twiddle8.im * x326n.im
4037 + -self.twiddle1.im * x425n.im
4038 + self.twiddle6.im * x524n.im
4039 + self.twiddle13.im * x623n.im
4040 + -self.twiddle9.im * x722n.im
4041 + -self.twiddle2.im * x821n.im
4042 + self.twiddle5.im * x920n.im
4043 + self.twiddle12.im * x1019n.im
4044 + -self.twiddle10.im * x1118n.im
4045 + -self.twiddle3.im * x1217n.im
4046 + self.twiddle4.im * x1316n.im
4047 + self.twiddle11.im * x1415n.im;
4048 let b821re_a = buffer.load(0).re
4049 + self.twiddle8.re * x128p.re
4050 + self.twiddle13.re * x227p.re
4051 + self.twiddle5.re * x326p.re
4052 + self.twiddle3.re * x425p.re
4053 + self.twiddle11.re * x524p.re
4054 + self.twiddle10.re * x623p.re
4055 + self.twiddle2.re * x722p.re
4056 + self.twiddle6.re * x821p.re
4057 + self.twiddle14.re * x920p.re
4058 + self.twiddle7.re * x1019p.re
4059 + self.twiddle1.re * x1118p.re
4060 + self.twiddle9.re * x1217p.re
4061 + self.twiddle12.re * x1316p.re
4062 + self.twiddle4.re * x1415p.re;
4063 let b821re_b = self.twiddle8.im * x128n.im
4064 + -self.twiddle13.im * x227n.im
4065 + -self.twiddle5.im * x326n.im
4066 + self.twiddle3.im * x425n.im
4067 + self.twiddle11.im * x524n.im
4068 + -self.twiddle10.im * x623n.im
4069 + -self.twiddle2.im * x722n.im
4070 + self.twiddle6.im * x821n.im
4071 + self.twiddle14.im * x920n.im
4072 + -self.twiddle7.im * x1019n.im
4073 + self.twiddle1.im * x1118n.im
4074 + self.twiddle9.im * x1217n.im
4075 + -self.twiddle12.im * x1316n.im
4076 + -self.twiddle4.im * x1415n.im;
4077 let b920re_a = buffer.load(0).re
4078 + self.twiddle9.re * x128p.re
4079 + self.twiddle11.re * x227p.re
4080 + self.twiddle2.re * x326p.re
4081 + self.twiddle7.re * x425p.re
4082 + self.twiddle13.re * x524p.re
4083 + self.twiddle4.re * x623p.re
4084 + self.twiddle5.re * x722p.re
4085 + self.twiddle14.re * x821p.re
4086 + self.twiddle6.re * x920p.re
4087 + self.twiddle3.re * x1019p.re
4088 + self.twiddle12.re * x1118p.re
4089 + self.twiddle8.re * x1217p.re
4090 + self.twiddle1.re * x1316p.re
4091 + self.twiddle10.re * x1415p.re;
4092 let b920re_b = self.twiddle9.im * x128n.im
4093 + -self.twiddle11.im * x227n.im
4094 + -self.twiddle2.im * x326n.im
4095 + self.twiddle7.im * x425n.im
4096 + -self.twiddle13.im * x524n.im
4097 + -self.twiddle4.im * x623n.im
4098 + self.twiddle5.im * x722n.im
4099 + self.twiddle14.im * x821n.im
4100 + -self.twiddle6.im * x920n.im
4101 + self.twiddle3.im * x1019n.im
4102 + self.twiddle12.im * x1118n.im
4103 + -self.twiddle8.im * x1217n.im
4104 + self.twiddle1.im * x1316n.im
4105 + self.twiddle10.im * x1415n.im;
4106 let b1019re_a = buffer.load(0).re
4107 + self.twiddle10.re * x128p.re
4108 + self.twiddle9.re * x227p.re
4109 + self.twiddle1.re * x326p.re
4110 + self.twiddle11.re * x425p.re
4111 + self.twiddle8.re * x524p.re
4112 + self.twiddle2.re * x623p.re
4113 + self.twiddle12.re * x722p.re
4114 + self.twiddle7.re * x821p.re
4115 + self.twiddle3.re * x920p.re
4116 + self.twiddle13.re * x1019p.re
4117 + self.twiddle6.re * x1118p.re
4118 + self.twiddle4.re * x1217p.re
4119 + self.twiddle14.re * x1316p.re
4120 + self.twiddle5.re * x1415p.re;
4121 let b1019re_b = self.twiddle10.im * x128n.im
4122 + -self.twiddle9.im * x227n.im
4123 + self.twiddle1.im * x326n.im
4124 + self.twiddle11.im * x425n.im
4125 + -self.twiddle8.im * x524n.im
4126 + self.twiddle2.im * x623n.im
4127 + self.twiddle12.im * x722n.im
4128 + -self.twiddle7.im * x821n.im
4129 + self.twiddle3.im * x920n.im
4130 + self.twiddle13.im * x1019n.im
4131 + -self.twiddle6.im * x1118n.im
4132 + self.twiddle4.im * x1217n.im
4133 + self.twiddle14.im * x1316n.im
4134 + -self.twiddle5.im * x1415n.im;
4135 let b1118re_a = buffer.load(0).re
4136 + self.twiddle11.re * x128p.re
4137 + self.twiddle7.re * x227p.re
4138 + self.twiddle4.re * x326p.re
4139 + self.twiddle14.re * x425p.re
4140 + self.twiddle3.re * x524p.re
4141 + self.twiddle8.re * x623p.re
4142 + self.twiddle10.re * x722p.re
4143 + self.twiddle1.re * x821p.re
4144 + self.twiddle12.re * x920p.re
4145 + self.twiddle6.re * x1019p.re
4146 + self.twiddle5.re * x1118p.re
4147 + self.twiddle13.re * x1217p.re
4148 + self.twiddle2.re * x1316p.re
4149 + self.twiddle9.re * x1415p.re;
4150 let b1118re_b = self.twiddle11.im * x128n.im
4151 + -self.twiddle7.im * x227n.im
4152 + self.twiddle4.im * x326n.im
4153 + -self.twiddle14.im * x425n.im
4154 + -self.twiddle3.im * x524n.im
4155 + self.twiddle8.im * x623n.im
4156 + -self.twiddle10.im * x722n.im
4157 + self.twiddle1.im * x821n.im
4158 + self.twiddle12.im * x920n.im
4159 + -self.twiddle6.im * x1019n.im
4160 + self.twiddle5.im * x1118n.im
4161 + -self.twiddle13.im * x1217n.im
4162 + -self.twiddle2.im * x1316n.im
4163 + self.twiddle9.im * x1415n.im;
4164 let b1217re_a = buffer.load(0).re
4165 + self.twiddle12.re * x128p.re
4166 + self.twiddle5.re * x227p.re
4167 + self.twiddle7.re * x326p.re
4168 + self.twiddle10.re * x425p.re
4169 + self.twiddle2.re * x524p.re
4170 + self.twiddle14.re * x623p.re
4171 + self.twiddle3.re * x722p.re
4172 + self.twiddle9.re * x821p.re
4173 + self.twiddle8.re * x920p.re
4174 + self.twiddle4.re * x1019p.re
4175 + self.twiddle13.re * x1118p.re
4176 + self.twiddle1.re * x1217p.re
4177 + self.twiddle11.re * x1316p.re
4178 + self.twiddle6.re * x1415p.re;
4179 let b1217re_b = self.twiddle12.im * x128n.im
4180 + -self.twiddle5.im * x227n.im
4181 + self.twiddle7.im * x326n.im
4182 + -self.twiddle10.im * x425n.im
4183 + self.twiddle2.im * x524n.im
4184 + self.twiddle14.im * x623n.im
4185 + -self.twiddle3.im * x722n.im
4186 + self.twiddle9.im * x821n.im
4187 + -self.twiddle8.im * x920n.im
4188 + self.twiddle4.im * x1019n.im
4189 + -self.twiddle13.im * x1118n.im
4190 + -self.twiddle1.im * x1217n.im
4191 + self.twiddle11.im * x1316n.im
4192 + -self.twiddle6.im * x1415n.im;
4193 let b1316re_a = buffer.load(0).re
4194 + self.twiddle13.re * x128p.re
4195 + self.twiddle3.re * x227p.re
4196 + self.twiddle10.re * x326p.re
4197 + self.twiddle6.re * x425p.re
4198 + self.twiddle7.re * x524p.re
4199 + self.twiddle9.re * x623p.re
4200 + self.twiddle4.re * x722p.re
4201 + self.twiddle12.re * x821p.re
4202 + self.twiddle1.re * x920p.re
4203 + self.twiddle14.re * x1019p.re
4204 + self.twiddle2.re * x1118p.re
4205 + self.twiddle11.re * x1217p.re
4206 + self.twiddle5.re * x1316p.re
4207 + self.twiddle8.re * x1415p.re;
4208 let b1316re_b = self.twiddle13.im * x128n.im
4209 + -self.twiddle3.im * x227n.im
4210 + self.twiddle10.im * x326n.im
4211 + -self.twiddle6.im * x425n.im
4212 + self.twiddle7.im * x524n.im
4213 + -self.twiddle9.im * x623n.im
4214 + self.twiddle4.im * x722n.im
4215 + -self.twiddle12.im * x821n.im
4216 + self.twiddle1.im * x920n.im
4217 + self.twiddle14.im * x1019n.im
4218 + -self.twiddle2.im * x1118n.im
4219 + self.twiddle11.im * x1217n.im
4220 + -self.twiddle5.im * x1316n.im
4221 + self.twiddle8.im * x1415n.im;
4222 let b1415re_a = buffer.load(0).re
4223 + self.twiddle14.re * x128p.re
4224 + self.twiddle1.re * x227p.re
4225 + self.twiddle13.re * x326p.re
4226 + self.twiddle2.re * x425p.re
4227 + self.twiddle12.re * x524p.re
4228 + self.twiddle3.re * x623p.re
4229 + self.twiddle11.re * x722p.re
4230 + self.twiddle4.re * x821p.re
4231 + self.twiddle10.re * x920p.re
4232 + self.twiddle5.re * x1019p.re
4233 + self.twiddle9.re * x1118p.re
4234 + self.twiddle6.re * x1217p.re
4235 + self.twiddle8.re * x1316p.re
4236 + self.twiddle7.re * x1415p.re;
4237 let b1415re_b = self.twiddle14.im * x128n.im
4238 + -self.twiddle1.im * x227n.im
4239 + self.twiddle13.im * x326n.im
4240 + -self.twiddle2.im * x425n.im
4241 + self.twiddle12.im * x524n.im
4242 + -self.twiddle3.im * x623n.im
4243 + self.twiddle11.im * x722n.im
4244 + -self.twiddle4.im * x821n.im
4245 + self.twiddle10.im * x920n.im
4246 + -self.twiddle5.im * x1019n.im
4247 + self.twiddle9.im * x1118n.im
4248 + -self.twiddle6.im * x1217n.im
4249 + self.twiddle8.im * x1316n.im
4250 + -self.twiddle7.im * x1415n.im;
4251
4252 let b128im_a = buffer.load(0).im
4253 + self.twiddle1.re * x128p.im
4254 + self.twiddle2.re * x227p.im
4255 + self.twiddle3.re * x326p.im
4256 + self.twiddle4.re * x425p.im
4257 + self.twiddle5.re * x524p.im
4258 + self.twiddle6.re * x623p.im
4259 + self.twiddle7.re * x722p.im
4260 + self.twiddle8.re * x821p.im
4261 + self.twiddle9.re * x920p.im
4262 + self.twiddle10.re * x1019p.im
4263 + self.twiddle11.re * x1118p.im
4264 + self.twiddle12.re * x1217p.im
4265 + self.twiddle13.re * x1316p.im
4266 + self.twiddle14.re * x1415p.im;
4267 let b128im_b = self.twiddle1.im * x128n.re
4268 + self.twiddle2.im * x227n.re
4269 + self.twiddle3.im * x326n.re
4270 + self.twiddle4.im * x425n.re
4271 + self.twiddle5.im * x524n.re
4272 + self.twiddle6.im * x623n.re
4273 + self.twiddle7.im * x722n.re
4274 + self.twiddle8.im * x821n.re
4275 + self.twiddle9.im * x920n.re
4276 + self.twiddle10.im * x1019n.re
4277 + self.twiddle11.im * x1118n.re
4278 + self.twiddle12.im * x1217n.re
4279 + self.twiddle13.im * x1316n.re
4280 + self.twiddle14.im * x1415n.re;
4281 let b227im_a = buffer.load(0).im
4282 + self.twiddle2.re * x128p.im
4283 + self.twiddle4.re * x227p.im
4284 + self.twiddle6.re * x326p.im
4285 + self.twiddle8.re * x425p.im
4286 + self.twiddle10.re * x524p.im
4287 + self.twiddle12.re * x623p.im
4288 + self.twiddle14.re * x722p.im
4289 + self.twiddle13.re * x821p.im
4290 + self.twiddle11.re * x920p.im
4291 + self.twiddle9.re * x1019p.im
4292 + self.twiddle7.re * x1118p.im
4293 + self.twiddle5.re * x1217p.im
4294 + self.twiddle3.re * x1316p.im
4295 + self.twiddle1.re * x1415p.im;
4296 let b227im_b = self.twiddle2.im * x128n.re
4297 + self.twiddle4.im * x227n.re
4298 + self.twiddle6.im * x326n.re
4299 + self.twiddle8.im * x425n.re
4300 + self.twiddle10.im * x524n.re
4301 + self.twiddle12.im * x623n.re
4302 + self.twiddle14.im * x722n.re
4303 + -self.twiddle13.im * x821n.re
4304 + -self.twiddle11.im * x920n.re
4305 + -self.twiddle9.im * x1019n.re
4306 + -self.twiddle7.im * x1118n.re
4307 + -self.twiddle5.im * x1217n.re
4308 + -self.twiddle3.im * x1316n.re
4309 + -self.twiddle1.im * x1415n.re;
4310 let b326im_a = buffer.load(0).im
4311 + self.twiddle3.re * x128p.im
4312 + self.twiddle6.re * x227p.im
4313 + self.twiddle9.re * x326p.im
4314 + self.twiddle12.re * x425p.im
4315 + self.twiddle14.re * x524p.im
4316 + self.twiddle11.re * x623p.im
4317 + self.twiddle8.re * x722p.im
4318 + self.twiddle5.re * x821p.im
4319 + self.twiddle2.re * x920p.im
4320 + self.twiddle1.re * x1019p.im
4321 + self.twiddle4.re * x1118p.im
4322 + self.twiddle7.re * x1217p.im
4323 + self.twiddle10.re * x1316p.im
4324 + self.twiddle13.re * x1415p.im;
4325 let b326im_b = self.twiddle3.im * x128n.re
4326 + self.twiddle6.im * x227n.re
4327 + self.twiddle9.im * x326n.re
4328 + self.twiddle12.im * x425n.re
4329 + -self.twiddle14.im * x524n.re
4330 + -self.twiddle11.im * x623n.re
4331 + -self.twiddle8.im * x722n.re
4332 + -self.twiddle5.im * x821n.re
4333 + -self.twiddle2.im * x920n.re
4334 + self.twiddle1.im * x1019n.re
4335 + self.twiddle4.im * x1118n.re
4336 + self.twiddle7.im * x1217n.re
4337 + self.twiddle10.im * x1316n.re
4338 + self.twiddle13.im * x1415n.re;
4339 let b425im_a = buffer.load(0).im
4340 + self.twiddle4.re * x128p.im
4341 + self.twiddle8.re * x227p.im
4342 + self.twiddle12.re * x326p.im
4343 + self.twiddle13.re * x425p.im
4344 + self.twiddle9.re * x524p.im
4345 + self.twiddle5.re * x623p.im
4346 + self.twiddle1.re * x722p.im
4347 + self.twiddle3.re * x821p.im
4348 + self.twiddle7.re * x920p.im
4349 + self.twiddle11.re * x1019p.im
4350 + self.twiddle14.re * x1118p.im
4351 + self.twiddle10.re * x1217p.im
4352 + self.twiddle6.re * x1316p.im
4353 + self.twiddle2.re * x1415p.im;
4354 let b425im_b = self.twiddle4.im * x128n.re
4355 + self.twiddle8.im * x227n.re
4356 + self.twiddle12.im * x326n.re
4357 + -self.twiddle13.im * x425n.re
4358 + -self.twiddle9.im * x524n.re
4359 + -self.twiddle5.im * x623n.re
4360 + -self.twiddle1.im * x722n.re
4361 + self.twiddle3.im * x821n.re
4362 + self.twiddle7.im * x920n.re
4363 + self.twiddle11.im * x1019n.re
4364 + -self.twiddle14.im * x1118n.re
4365 + -self.twiddle10.im * x1217n.re
4366 + -self.twiddle6.im * x1316n.re
4367 + -self.twiddle2.im * x1415n.re;
4368 let b524im_a = buffer.load(0).im
4369 + self.twiddle5.re * x128p.im
4370 + self.twiddle10.re * x227p.im
4371 + self.twiddle14.re * x326p.im
4372 + self.twiddle9.re * x425p.im
4373 + self.twiddle4.re * x524p.im
4374 + self.twiddle1.re * x623p.im
4375 + self.twiddle6.re * x722p.im
4376 + self.twiddle11.re * x821p.im
4377 + self.twiddle13.re * x920p.im
4378 + self.twiddle8.re * x1019p.im
4379 + self.twiddle3.re * x1118p.im
4380 + self.twiddle2.re * x1217p.im
4381 + self.twiddle7.re * x1316p.im
4382 + self.twiddle12.re * x1415p.im;
4383 let b524im_b = self.twiddle5.im * x128n.re
4384 + self.twiddle10.im * x227n.re
4385 + -self.twiddle14.im * x326n.re
4386 + -self.twiddle9.im * x425n.re
4387 + -self.twiddle4.im * x524n.re
4388 + self.twiddle1.im * x623n.re
4389 + self.twiddle6.im * x722n.re
4390 + self.twiddle11.im * x821n.re
4391 + -self.twiddle13.im * x920n.re
4392 + -self.twiddle8.im * x1019n.re
4393 + -self.twiddle3.im * x1118n.re
4394 + self.twiddle2.im * x1217n.re
4395 + self.twiddle7.im * x1316n.re
4396 + self.twiddle12.im * x1415n.re;
4397 let b623im_a = buffer.load(0).im
4398 + self.twiddle6.re * x128p.im
4399 + self.twiddle12.re * x227p.im
4400 + self.twiddle11.re * x326p.im
4401 + self.twiddle5.re * x425p.im
4402 + self.twiddle1.re * x524p.im
4403 + self.twiddle7.re * x623p.im
4404 + self.twiddle13.re * x722p.im
4405 + self.twiddle10.re * x821p.im
4406 + self.twiddle4.re * x920p.im
4407 + self.twiddle2.re * x1019p.im
4408 + self.twiddle8.re * x1118p.im
4409 + self.twiddle14.re * x1217p.im
4410 + self.twiddle9.re * x1316p.im
4411 + self.twiddle3.re * x1415p.im;
4412 let b623im_b = self.twiddle6.im * x128n.re
4413 + self.twiddle12.im * x227n.re
4414 + -self.twiddle11.im * x326n.re
4415 + -self.twiddle5.im * x425n.re
4416 + self.twiddle1.im * x524n.re
4417 + self.twiddle7.im * x623n.re
4418 + self.twiddle13.im * x722n.re
4419 + -self.twiddle10.im * x821n.re
4420 + -self.twiddle4.im * x920n.re
4421 + self.twiddle2.im * x1019n.re
4422 + self.twiddle8.im * x1118n.re
4423 + self.twiddle14.im * x1217n.re
4424 + -self.twiddle9.im * x1316n.re
4425 + -self.twiddle3.im * x1415n.re;
4426 let b722im_a = buffer.load(0).im
4427 + self.twiddle7.re * x128p.im
4428 + self.twiddle14.re * x227p.im
4429 + self.twiddle8.re * x326p.im
4430 + self.twiddle1.re * x425p.im
4431 + self.twiddle6.re * x524p.im
4432 + self.twiddle13.re * x623p.im
4433 + self.twiddle9.re * x722p.im
4434 + self.twiddle2.re * x821p.im
4435 + self.twiddle5.re * x920p.im
4436 + self.twiddle12.re * x1019p.im
4437 + self.twiddle10.re * x1118p.im
4438 + self.twiddle3.re * x1217p.im
4439 + self.twiddle4.re * x1316p.im
4440 + self.twiddle11.re * x1415p.im;
4441 let b722im_b = self.twiddle7.im * x128n.re
4442 + self.twiddle14.im * x227n.re
4443 + -self.twiddle8.im * x326n.re
4444 + -self.twiddle1.im * x425n.re
4445 + self.twiddle6.im * x524n.re
4446 + self.twiddle13.im * x623n.re
4447 + -self.twiddle9.im * x722n.re
4448 + -self.twiddle2.im * x821n.re
4449 + self.twiddle5.im * x920n.re
4450 + self.twiddle12.im * x1019n.re
4451 + -self.twiddle10.im * x1118n.re
4452 + -self.twiddle3.im * x1217n.re
4453 + self.twiddle4.im * x1316n.re
4454 + self.twiddle11.im * x1415n.re;
4455 let b821im_a = buffer.load(0).im
4456 + self.twiddle8.re * x128p.im
4457 + self.twiddle13.re * x227p.im
4458 + self.twiddle5.re * x326p.im
4459 + self.twiddle3.re * x425p.im
4460 + self.twiddle11.re * x524p.im
4461 + self.twiddle10.re * x623p.im
4462 + self.twiddle2.re * x722p.im
4463 + self.twiddle6.re * x821p.im
4464 + self.twiddle14.re * x920p.im
4465 + self.twiddle7.re * x1019p.im
4466 + self.twiddle1.re * x1118p.im
4467 + self.twiddle9.re * x1217p.im
4468 + self.twiddle12.re * x1316p.im
4469 + self.twiddle4.re * x1415p.im;
4470 let b821im_b = self.twiddle8.im * x128n.re
4471 + -self.twiddle13.im * x227n.re
4472 + -self.twiddle5.im * x326n.re
4473 + self.twiddle3.im * x425n.re
4474 + self.twiddle11.im * x524n.re
4475 + -self.twiddle10.im * x623n.re
4476 + -self.twiddle2.im * x722n.re
4477 + self.twiddle6.im * x821n.re
4478 + self.twiddle14.im * x920n.re
4479 + -self.twiddle7.im * x1019n.re
4480 + self.twiddle1.im * x1118n.re
4481 + self.twiddle9.im * x1217n.re
4482 + -self.twiddle12.im * x1316n.re
4483 + -self.twiddle4.im * x1415n.re;
4484 let b920im_a = buffer.load(0).im
4485 + self.twiddle9.re * x128p.im
4486 + self.twiddle11.re * x227p.im
4487 + self.twiddle2.re * x326p.im
4488 + self.twiddle7.re * x425p.im
4489 + self.twiddle13.re * x524p.im
4490 + self.twiddle4.re * x623p.im
4491 + self.twiddle5.re * x722p.im
4492 + self.twiddle14.re * x821p.im
4493 + self.twiddle6.re * x920p.im
4494 + self.twiddle3.re * x1019p.im
4495 + self.twiddle12.re * x1118p.im
4496 + self.twiddle8.re * x1217p.im
4497 + self.twiddle1.re * x1316p.im
4498 + self.twiddle10.re * x1415p.im;
4499 let b920im_b = self.twiddle9.im * x128n.re
4500 + -self.twiddle11.im * x227n.re
4501 + -self.twiddle2.im * x326n.re
4502 + self.twiddle7.im * x425n.re
4503 + -self.twiddle13.im * x524n.re
4504 + -self.twiddle4.im * x623n.re
4505 + self.twiddle5.im * x722n.re
4506 + self.twiddle14.im * x821n.re
4507 + -self.twiddle6.im * x920n.re
4508 + self.twiddle3.im * x1019n.re
4509 + self.twiddle12.im * x1118n.re
4510 + -self.twiddle8.im * x1217n.re
4511 + self.twiddle1.im * x1316n.re
4512 + self.twiddle10.im * x1415n.re;
4513 let b1019im_a = buffer.load(0).im
4514 + self.twiddle10.re * x128p.im
4515 + self.twiddle9.re * x227p.im
4516 + self.twiddle1.re * x326p.im
4517 + self.twiddle11.re * x425p.im
4518 + self.twiddle8.re * x524p.im
4519 + self.twiddle2.re * x623p.im
4520 + self.twiddle12.re * x722p.im
4521 + self.twiddle7.re * x821p.im
4522 + self.twiddle3.re * x920p.im
4523 + self.twiddle13.re * x1019p.im
4524 + self.twiddle6.re * x1118p.im
4525 + self.twiddle4.re * x1217p.im
4526 + self.twiddle14.re * x1316p.im
4527 + self.twiddle5.re * x1415p.im;
4528 let b1019im_b = self.twiddle10.im * x128n.re
4529 + -self.twiddle9.im * x227n.re
4530 + self.twiddle1.im * x326n.re
4531 + self.twiddle11.im * x425n.re
4532 + -self.twiddle8.im * x524n.re
4533 + self.twiddle2.im * x623n.re
4534 + self.twiddle12.im * x722n.re
4535 + -self.twiddle7.im * x821n.re
4536 + self.twiddle3.im * x920n.re
4537 + self.twiddle13.im * x1019n.re
4538 + -self.twiddle6.im * x1118n.re
4539 + self.twiddle4.im * x1217n.re
4540 + self.twiddle14.im * x1316n.re
4541 + -self.twiddle5.im * x1415n.re;
4542 let b1118im_a = buffer.load(0).im
4543 + self.twiddle11.re * x128p.im
4544 + self.twiddle7.re * x227p.im
4545 + self.twiddle4.re * x326p.im
4546 + self.twiddle14.re * x425p.im
4547 + self.twiddle3.re * x524p.im
4548 + self.twiddle8.re * x623p.im
4549 + self.twiddle10.re * x722p.im
4550 + self.twiddle1.re * x821p.im
4551 + self.twiddle12.re * x920p.im
4552 + self.twiddle6.re * x1019p.im
4553 + self.twiddle5.re * x1118p.im
4554 + self.twiddle13.re * x1217p.im
4555 + self.twiddle2.re * x1316p.im
4556 + self.twiddle9.re * x1415p.im;
4557 let b1118im_b = self.twiddle11.im * x128n.re
4558 + -self.twiddle7.im * x227n.re
4559 + self.twiddle4.im * x326n.re
4560 + -self.twiddle14.im * x425n.re
4561 + -self.twiddle3.im * x524n.re
4562 + self.twiddle8.im * x623n.re
4563 + -self.twiddle10.im * x722n.re
4564 + self.twiddle1.im * x821n.re
4565 + self.twiddle12.im * x920n.re
4566 + -self.twiddle6.im * x1019n.re
4567 + self.twiddle5.im * x1118n.re
4568 + -self.twiddle13.im * x1217n.re
4569 + -self.twiddle2.im * x1316n.re
4570 + self.twiddle9.im * x1415n.re;
4571 let b1217im_a = buffer.load(0).im
4572 + self.twiddle12.re * x128p.im
4573 + self.twiddle5.re * x227p.im
4574 + self.twiddle7.re * x326p.im
4575 + self.twiddle10.re * x425p.im
4576 + self.twiddle2.re * x524p.im
4577 + self.twiddle14.re * x623p.im
4578 + self.twiddle3.re * x722p.im
4579 + self.twiddle9.re * x821p.im
4580 + self.twiddle8.re * x920p.im
4581 + self.twiddle4.re * x1019p.im
4582 + self.twiddle13.re * x1118p.im
4583 + self.twiddle1.re * x1217p.im
4584 + self.twiddle11.re * x1316p.im
4585 + self.twiddle6.re * x1415p.im;
4586 let b1217im_b = self.twiddle12.im * x128n.re
4587 + -self.twiddle5.im * x227n.re
4588 + self.twiddle7.im * x326n.re
4589 + -self.twiddle10.im * x425n.re
4590 + self.twiddle2.im * x524n.re
4591 + self.twiddle14.im * x623n.re
4592 + -self.twiddle3.im * x722n.re
4593 + self.twiddle9.im * x821n.re
4594 + -self.twiddle8.im * x920n.re
4595 + self.twiddle4.im * x1019n.re
4596 + -self.twiddle13.im * x1118n.re
4597 + -self.twiddle1.im * x1217n.re
4598 + self.twiddle11.im * x1316n.re
4599 + -self.twiddle6.im * x1415n.re;
4600 let b1316im_a = buffer.load(0).im
4601 + self.twiddle13.re * x128p.im
4602 + self.twiddle3.re * x227p.im
4603 + self.twiddle10.re * x326p.im
4604 + self.twiddle6.re * x425p.im
4605 + self.twiddle7.re * x524p.im
4606 + self.twiddle9.re * x623p.im
4607 + self.twiddle4.re * x722p.im
4608 + self.twiddle12.re * x821p.im
4609 + self.twiddle1.re * x920p.im
4610 + self.twiddle14.re * x1019p.im
4611 + self.twiddle2.re * x1118p.im
4612 + self.twiddle11.re * x1217p.im
4613 + self.twiddle5.re * x1316p.im
4614 + self.twiddle8.re * x1415p.im;
4615 let b1316im_b = self.twiddle13.im * x128n.re
4616 + -self.twiddle3.im * x227n.re
4617 + self.twiddle10.im * x326n.re
4618 + -self.twiddle6.im * x425n.re
4619 + self.twiddle7.im * x524n.re
4620 + -self.twiddle9.im * x623n.re
4621 + self.twiddle4.im * x722n.re
4622 + -self.twiddle12.im * x821n.re
4623 + self.twiddle1.im * x920n.re
4624 + self.twiddle14.im * x1019n.re
4625 + -self.twiddle2.im * x1118n.re
4626 + self.twiddle11.im * x1217n.re
4627 + -self.twiddle5.im * x1316n.re
4628 + self.twiddle8.im * x1415n.re;
4629 let b1415im_a = buffer.load(0).im
4630 + self.twiddle14.re * x128p.im
4631 + self.twiddle1.re * x227p.im
4632 + self.twiddle13.re * x326p.im
4633 + self.twiddle2.re * x425p.im
4634 + self.twiddle12.re * x524p.im
4635 + self.twiddle3.re * x623p.im
4636 + self.twiddle11.re * x722p.im
4637 + self.twiddle4.re * x821p.im
4638 + self.twiddle10.re * x920p.im
4639 + self.twiddle5.re * x1019p.im
4640 + self.twiddle9.re * x1118p.im
4641 + self.twiddle6.re * x1217p.im
4642 + self.twiddle8.re * x1316p.im
4643 + self.twiddle7.re * x1415p.im;
4644 let b1415im_b = self.twiddle14.im * x128n.re
4645 + -self.twiddle1.im * x227n.re
4646 + self.twiddle13.im * x326n.re
4647 + -self.twiddle2.im * x425n.re
4648 + self.twiddle12.im * x524n.re
4649 + -self.twiddle3.im * x623n.re
4650 + self.twiddle11.im * x722n.re
4651 + -self.twiddle4.im * x821n.re
4652 + self.twiddle10.im * x920n.re
4653 + -self.twiddle5.im * x1019n.re
4654 + self.twiddle9.im * x1118n.re
4655 + -self.twiddle6.im * x1217n.re
4656 + self.twiddle8.im * x1316n.re
4657 + -self.twiddle7.im * x1415n.re;
4658
4659 let out1re = b128re_a - b128re_b;
4660 let out1im = b128im_a + b128im_b;
4661 let out2re = b227re_a - b227re_b;
4662 let out2im = b227im_a + b227im_b;
4663 let out3re = b326re_a - b326re_b;
4664 let out3im = b326im_a + b326im_b;
4665 let out4re = b425re_a - b425re_b;
4666 let out4im = b425im_a + b425im_b;
4667 let out5re = b524re_a - b524re_b;
4668 let out5im = b524im_a + b524im_b;
4669 let out6re = b623re_a - b623re_b;
4670 let out6im = b623im_a + b623im_b;
4671 let out7re = b722re_a - b722re_b;
4672 let out7im = b722im_a + b722im_b;
4673 let out8re = b821re_a - b821re_b;
4674 let out8im = b821im_a + b821im_b;
4675 let out9re = b920re_a - b920re_b;
4676 let out9im = b920im_a + b920im_b;
4677 let out10re = b1019re_a - b1019re_b;
4678 let out10im = b1019im_a + b1019im_b;
4679 let out11re = b1118re_a - b1118re_b;
4680 let out11im = b1118im_a + b1118im_b;
4681 let out12re = b1217re_a - b1217re_b;
4682 let out12im = b1217im_a + b1217im_b;
4683 let out13re = b1316re_a - b1316re_b;
4684 let out13im = b1316im_a + b1316im_b;
4685 let out14re = b1415re_a - b1415re_b;
4686 let out14im = b1415im_a + b1415im_b;
4687 let out15re = b1415re_a + b1415re_b;
4688 let out15im = b1415im_a - b1415im_b;
4689 let out16re = b1316re_a + b1316re_b;
4690 let out16im = b1316im_a - b1316im_b;
4691 let out17re = b1217re_a + b1217re_b;
4692 let out17im = b1217im_a - b1217im_b;
4693 let out18re = b1118re_a + b1118re_b;
4694 let out18im = b1118im_a - b1118im_b;
4695 let out19re = b1019re_a + b1019re_b;
4696 let out19im = b1019im_a - b1019im_b;
4697 let out20re = b920re_a + b920re_b;
4698 let out20im = b920im_a - b920im_b;
4699 let out21re = b821re_a + b821re_b;
4700 let out21im = b821im_a - b821im_b;
4701 let out22re = b722re_a + b722re_b;
4702 let out22im = b722im_a - b722im_b;
4703 let out23re = b623re_a + b623re_b;
4704 let out23im = b623im_a - b623im_b;
4705 let out24re = b524re_a + b524re_b;
4706 let out24im = b524im_a - b524im_b;
4707 let out25re = b425re_a + b425re_b;
4708 let out25im = b425im_a - b425im_b;
4709 let out26re = b326re_a + b326re_b;
4710 let out26im = b326im_a - b326im_b;
4711 let out27re = b227re_a + b227re_b;
4712 let out27im = b227im_a - b227im_b;
4713 let out28re = b128re_a + b128re_b;
4714 let out28im = b128im_a - b128im_b;
4715 buffer.store(sum, 0);
4716 buffer.store(
4717 Complex {
4718 re: out1re,
4719 im: out1im,
4720 },
4721 1,
4722 );
4723 buffer.store(
4724 Complex {
4725 re: out2re,
4726 im: out2im,
4727 },
4728 2,
4729 );
4730 buffer.store(
4731 Complex {
4732 re: out3re,
4733 im: out3im,
4734 },
4735 3,
4736 );
4737 buffer.store(
4738 Complex {
4739 re: out4re,
4740 im: out4im,
4741 },
4742 4,
4743 );
4744 buffer.store(
4745 Complex {
4746 re: out5re,
4747 im: out5im,
4748 },
4749 5,
4750 );
4751 buffer.store(
4752 Complex {
4753 re: out6re,
4754 im: out6im,
4755 },
4756 6,
4757 );
4758 buffer.store(
4759 Complex {
4760 re: out7re,
4761 im: out7im,
4762 },
4763 7,
4764 );
4765 buffer.store(
4766 Complex {
4767 re: out8re,
4768 im: out8im,
4769 },
4770 8,
4771 );
4772 buffer.store(
4773 Complex {
4774 re: out9re,
4775 im: out9im,
4776 },
4777 9,
4778 );
4779 buffer.store(
4780 Complex {
4781 re: out10re,
4782 im: out10im,
4783 },
4784 10,
4785 );
4786 buffer.store(
4787 Complex {
4788 re: out11re,
4789 im: out11im,
4790 },
4791 11,
4792 );
4793 buffer.store(
4794 Complex {
4795 re: out12re,
4796 im: out12im,
4797 },
4798 12,
4799 );
4800 buffer.store(
4801 Complex {
4802 re: out13re,
4803 im: out13im,
4804 },
4805 13,
4806 );
4807 buffer.store(
4808 Complex {
4809 re: out14re,
4810 im: out14im,
4811 },
4812 14,
4813 );
4814 buffer.store(
4815 Complex {
4816 re: out15re,
4817 im: out15im,
4818 },
4819 15,
4820 );
4821 buffer.store(
4822 Complex {
4823 re: out16re,
4824 im: out16im,
4825 },
4826 16,
4827 );
4828 buffer.store(
4829 Complex {
4830 re: out17re,
4831 im: out17im,
4832 },
4833 17,
4834 );
4835 buffer.store(
4836 Complex {
4837 re: out18re,
4838 im: out18im,
4839 },
4840 18,
4841 );
4842 buffer.store(
4843 Complex {
4844 re: out19re,
4845 im: out19im,
4846 },
4847 19,
4848 );
4849 buffer.store(
4850 Complex {
4851 re: out20re,
4852 im: out20im,
4853 },
4854 20,
4855 );
4856 buffer.store(
4857 Complex {
4858 re: out21re,
4859 im: out21im,
4860 },
4861 21,
4862 );
4863 buffer.store(
4864 Complex {
4865 re: out22re,
4866 im: out22im,
4867 },
4868 22,
4869 );
4870 buffer.store(
4871 Complex {
4872 re: out23re,
4873 im: out23im,
4874 },
4875 23,
4876 );
4877 buffer.store(
4878 Complex {
4879 re: out24re,
4880 im: out24im,
4881 },
4882 24,
4883 );
4884 buffer.store(
4885 Complex {
4886 re: out25re,
4887 im: out25im,
4888 },
4889 25,
4890 );
4891 buffer.store(
4892 Complex {
4893 re: out26re,
4894 im: out26im,
4895 },
4896 26,
4897 );
4898 buffer.store(
4899 Complex {
4900 re: out27re,
4901 im: out27im,
4902 },
4903 27,
4904 );
4905 buffer.store(
4906 Complex {
4907 re: out28re,
4908 im: out28im,
4909 },
4910 28,
4911 );
4912 }
4913}
4914pub struct Butterfly31<T> {
4915 twiddle1: Complex<T>,
4916 twiddle2: Complex<T>,
4917 twiddle3: Complex<T>,
4918 twiddle4: Complex<T>,
4919 twiddle5: Complex<T>,
4920 twiddle6: Complex<T>,
4921 twiddle7: Complex<T>,
4922 twiddle8: Complex<T>,
4923 twiddle9: Complex<T>,
4924 twiddle10: Complex<T>,
4925 twiddle11: Complex<T>,
4926 twiddle12: Complex<T>,
4927 twiddle13: Complex<T>,
4928 twiddle14: Complex<T>,
4929 twiddle15: Complex<T>,
4930 direction: FftDirection,
4931}
4932boilerplate_fft_butterfly!(Butterfly31, 31, |this: &Butterfly31<_>| this.direction);
4933impl<T: FftNum> Butterfly31<T> {
4934 pub fn new(direction: FftDirection) -> Self {
4935 let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 31, direction);
4936 let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 31, direction);
4937 let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 31, direction);
4938 let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 31, direction);
4939 let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 31, direction);
4940 let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 31, direction);
4941 let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 31, direction);
4942 let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 31, direction);
4943 let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 31, direction);
4944 let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 31, direction);
4945 let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 31, direction);
4946 let twiddle12: Complex<T> = twiddles::compute_twiddle(12, 31, direction);
4947 let twiddle13: Complex<T> = twiddles::compute_twiddle(13, 31, direction);
4948 let twiddle14: Complex<T> = twiddles::compute_twiddle(14, 31, direction);
4949 let twiddle15: Complex<T> = twiddles::compute_twiddle(15, 31, direction);
4950 Self {
4951 twiddle1,
4952 twiddle2,
4953 twiddle3,
4954 twiddle4,
4955 twiddle5,
4956 twiddle6,
4957 twiddle7,
4958 twiddle8,
4959 twiddle9,
4960 twiddle10,
4961 twiddle11,
4962 twiddle12,
4963 twiddle13,
4964 twiddle14,
4965 twiddle15,
4966 direction,
4967 }
4968 }
4969
4970 #[inline(never)]
4971 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
4972 let x130p = buffer.load(1) + buffer.load(30);
4976 let x130n = buffer.load(1) - buffer.load(30);
4977 let x229p = buffer.load(2) + buffer.load(29);
4978 let x229n = buffer.load(2) - buffer.load(29);
4979 let x328p = buffer.load(3) + buffer.load(28);
4980 let x328n = buffer.load(3) - buffer.load(28);
4981 let x427p = buffer.load(4) + buffer.load(27);
4982 let x427n = buffer.load(4) - buffer.load(27);
4983 let x526p = buffer.load(5) + buffer.load(26);
4984 let x526n = buffer.load(5) - buffer.load(26);
4985 let x625p = buffer.load(6) + buffer.load(25);
4986 let x625n = buffer.load(6) - buffer.load(25);
4987 let x724p = buffer.load(7) + buffer.load(24);
4988 let x724n = buffer.load(7) - buffer.load(24);
4989 let x823p = buffer.load(8) + buffer.load(23);
4990 let x823n = buffer.load(8) - buffer.load(23);
4991 let x922p = buffer.load(9) + buffer.load(22);
4992 let x922n = buffer.load(9) - buffer.load(22);
4993 let x1021p = buffer.load(10) + buffer.load(21);
4994 let x1021n = buffer.load(10) - buffer.load(21);
4995 let x1120p = buffer.load(11) + buffer.load(20);
4996 let x1120n = buffer.load(11) - buffer.load(20);
4997 let x1219p = buffer.load(12) + buffer.load(19);
4998 let x1219n = buffer.load(12) - buffer.load(19);
4999 let x1318p = buffer.load(13) + buffer.load(18);
5000 let x1318n = buffer.load(13) - buffer.load(18);
5001 let x1417p = buffer.load(14) + buffer.load(17);
5002 let x1417n = buffer.load(14) - buffer.load(17);
5003 let x1516p = buffer.load(15) + buffer.load(16);
5004 let x1516n = buffer.load(15) - buffer.load(16);
5005 let sum = buffer.load(0)
5006 + x130p
5007 + x229p
5008 + x328p
5009 + x427p
5010 + x526p
5011 + x625p
5012 + x724p
5013 + x823p
5014 + x922p
5015 + x1021p
5016 + x1120p
5017 + x1219p
5018 + x1318p
5019 + x1417p
5020 + x1516p;
5021 let b130re_a = buffer.load(0).re
5022 + self.twiddle1.re * x130p.re
5023 + self.twiddle2.re * x229p.re
5024 + self.twiddle3.re * x328p.re
5025 + self.twiddle4.re * x427p.re
5026 + self.twiddle5.re * x526p.re
5027 + self.twiddle6.re * x625p.re
5028 + self.twiddle7.re * x724p.re
5029 + self.twiddle8.re * x823p.re
5030 + self.twiddle9.re * x922p.re
5031 + self.twiddle10.re * x1021p.re
5032 + self.twiddle11.re * x1120p.re
5033 + self.twiddle12.re * x1219p.re
5034 + self.twiddle13.re * x1318p.re
5035 + self.twiddle14.re * x1417p.re
5036 + self.twiddle15.re * x1516p.re;
5037 let b130re_b = self.twiddle1.im * x130n.im
5038 + self.twiddle2.im * x229n.im
5039 + self.twiddle3.im * x328n.im
5040 + self.twiddle4.im * x427n.im
5041 + self.twiddle5.im * x526n.im
5042 + self.twiddle6.im * x625n.im
5043 + self.twiddle7.im * x724n.im
5044 + self.twiddle8.im * x823n.im
5045 + self.twiddle9.im * x922n.im
5046 + self.twiddle10.im * x1021n.im
5047 + self.twiddle11.im * x1120n.im
5048 + self.twiddle12.im * x1219n.im
5049 + self.twiddle13.im * x1318n.im
5050 + self.twiddle14.im * x1417n.im
5051 + self.twiddle15.im * x1516n.im;
5052 let b229re_a = buffer.load(0).re
5053 + self.twiddle2.re * x130p.re
5054 + self.twiddle4.re * x229p.re
5055 + self.twiddle6.re * x328p.re
5056 + self.twiddle8.re * x427p.re
5057 + self.twiddle10.re * x526p.re
5058 + self.twiddle12.re * x625p.re
5059 + self.twiddle14.re * x724p.re
5060 + self.twiddle15.re * x823p.re
5061 + self.twiddle13.re * x922p.re
5062 + self.twiddle11.re * x1021p.re
5063 + self.twiddle9.re * x1120p.re
5064 + self.twiddle7.re * x1219p.re
5065 + self.twiddle5.re * x1318p.re
5066 + self.twiddle3.re * x1417p.re
5067 + self.twiddle1.re * x1516p.re;
5068 let b229re_b = self.twiddle2.im * x130n.im
5069 + self.twiddle4.im * x229n.im
5070 + self.twiddle6.im * x328n.im
5071 + self.twiddle8.im * x427n.im
5072 + self.twiddle10.im * x526n.im
5073 + self.twiddle12.im * x625n.im
5074 + self.twiddle14.im * x724n.im
5075 + -self.twiddle15.im * x823n.im
5076 + -self.twiddle13.im * x922n.im
5077 + -self.twiddle11.im * x1021n.im
5078 + -self.twiddle9.im * x1120n.im
5079 + -self.twiddle7.im * x1219n.im
5080 + -self.twiddle5.im * x1318n.im
5081 + -self.twiddle3.im * x1417n.im
5082 + -self.twiddle1.im * x1516n.im;
5083 let b328re_a = buffer.load(0).re
5084 + self.twiddle3.re * x130p.re
5085 + self.twiddle6.re * x229p.re
5086 + self.twiddle9.re * x328p.re
5087 + self.twiddle12.re * x427p.re
5088 + self.twiddle15.re * x526p.re
5089 + self.twiddle13.re * x625p.re
5090 + self.twiddle10.re * x724p.re
5091 + self.twiddle7.re * x823p.re
5092 + self.twiddle4.re * x922p.re
5093 + self.twiddle1.re * x1021p.re
5094 + self.twiddle2.re * x1120p.re
5095 + self.twiddle5.re * x1219p.re
5096 + self.twiddle8.re * x1318p.re
5097 + self.twiddle11.re * x1417p.re
5098 + self.twiddle14.re * x1516p.re;
5099 let b328re_b = self.twiddle3.im * x130n.im
5100 + self.twiddle6.im * x229n.im
5101 + self.twiddle9.im * x328n.im
5102 + self.twiddle12.im * x427n.im
5103 + self.twiddle15.im * x526n.im
5104 + -self.twiddle13.im * x625n.im
5105 + -self.twiddle10.im * x724n.im
5106 + -self.twiddle7.im * x823n.im
5107 + -self.twiddle4.im * x922n.im
5108 + -self.twiddle1.im * x1021n.im
5109 + self.twiddle2.im * x1120n.im
5110 + self.twiddle5.im * x1219n.im
5111 + self.twiddle8.im * x1318n.im
5112 + self.twiddle11.im * x1417n.im
5113 + self.twiddle14.im * x1516n.im;
5114 let b427re_a = buffer.load(0).re
5115 + self.twiddle4.re * x130p.re
5116 + self.twiddle8.re * x229p.re
5117 + self.twiddle12.re * x328p.re
5118 + self.twiddle15.re * x427p.re
5119 + self.twiddle11.re * x526p.re
5120 + self.twiddle7.re * x625p.re
5121 + self.twiddle3.re * x724p.re
5122 + self.twiddle1.re * x823p.re
5123 + self.twiddle5.re * x922p.re
5124 + self.twiddle9.re * x1021p.re
5125 + self.twiddle13.re * x1120p.re
5126 + self.twiddle14.re * x1219p.re
5127 + self.twiddle10.re * x1318p.re
5128 + self.twiddle6.re * x1417p.re
5129 + self.twiddle2.re * x1516p.re;
5130 let b427re_b = self.twiddle4.im * x130n.im
5131 + self.twiddle8.im * x229n.im
5132 + self.twiddle12.im * x328n.im
5133 + -self.twiddle15.im * x427n.im
5134 + -self.twiddle11.im * x526n.im
5135 + -self.twiddle7.im * x625n.im
5136 + -self.twiddle3.im * x724n.im
5137 + self.twiddle1.im * x823n.im
5138 + self.twiddle5.im * x922n.im
5139 + self.twiddle9.im * x1021n.im
5140 + self.twiddle13.im * x1120n.im
5141 + -self.twiddle14.im * x1219n.im
5142 + -self.twiddle10.im * x1318n.im
5143 + -self.twiddle6.im * x1417n.im
5144 + -self.twiddle2.im * x1516n.im;
5145 let b526re_a = buffer.load(0).re
5146 + self.twiddle5.re * x130p.re
5147 + self.twiddle10.re * x229p.re
5148 + self.twiddle15.re * x328p.re
5149 + self.twiddle11.re * x427p.re
5150 + self.twiddle6.re * x526p.re
5151 + self.twiddle1.re * x625p.re
5152 + self.twiddle4.re * x724p.re
5153 + self.twiddle9.re * x823p.re
5154 + self.twiddle14.re * x922p.re
5155 + self.twiddle12.re * x1021p.re
5156 + self.twiddle7.re * x1120p.re
5157 + self.twiddle2.re * x1219p.re
5158 + self.twiddle3.re * x1318p.re
5159 + self.twiddle8.re * x1417p.re
5160 + self.twiddle13.re * x1516p.re;
5161 let b526re_b = self.twiddle5.im * x130n.im
5162 + self.twiddle10.im * x229n.im
5163 + self.twiddle15.im * x328n.im
5164 + -self.twiddle11.im * x427n.im
5165 + -self.twiddle6.im * x526n.im
5166 + -self.twiddle1.im * x625n.im
5167 + self.twiddle4.im * x724n.im
5168 + self.twiddle9.im * x823n.im
5169 + self.twiddle14.im * x922n.im
5170 + -self.twiddle12.im * x1021n.im
5171 + -self.twiddle7.im * x1120n.im
5172 + -self.twiddle2.im * x1219n.im
5173 + self.twiddle3.im * x1318n.im
5174 + self.twiddle8.im * x1417n.im
5175 + self.twiddle13.im * x1516n.im;
5176 let b625re_a = buffer.load(0).re
5177 + self.twiddle6.re * x130p.re
5178 + self.twiddle12.re * x229p.re
5179 + self.twiddle13.re * x328p.re
5180 + self.twiddle7.re * x427p.re
5181 + self.twiddle1.re * x526p.re
5182 + self.twiddle5.re * x625p.re
5183 + self.twiddle11.re * x724p.re
5184 + self.twiddle14.re * x823p.re
5185 + self.twiddle8.re * x922p.re
5186 + self.twiddle2.re * x1021p.re
5187 + self.twiddle4.re * x1120p.re
5188 + self.twiddle10.re * x1219p.re
5189 + self.twiddle15.re * x1318p.re
5190 + self.twiddle9.re * x1417p.re
5191 + self.twiddle3.re * x1516p.re;
5192 let b625re_b = self.twiddle6.im * x130n.im
5193 + self.twiddle12.im * x229n.im
5194 + -self.twiddle13.im * x328n.im
5195 + -self.twiddle7.im * x427n.im
5196 + -self.twiddle1.im * x526n.im
5197 + self.twiddle5.im * x625n.im
5198 + self.twiddle11.im * x724n.im
5199 + -self.twiddle14.im * x823n.im
5200 + -self.twiddle8.im * x922n.im
5201 + -self.twiddle2.im * x1021n.im
5202 + self.twiddle4.im * x1120n.im
5203 + self.twiddle10.im * x1219n.im
5204 + -self.twiddle15.im * x1318n.im
5205 + -self.twiddle9.im * x1417n.im
5206 + -self.twiddle3.im * x1516n.im;
5207 let b724re_a = buffer.load(0).re
5208 + self.twiddle7.re * x130p.re
5209 + self.twiddle14.re * x229p.re
5210 + self.twiddle10.re * x328p.re
5211 + self.twiddle3.re * x427p.re
5212 + self.twiddle4.re * x526p.re
5213 + self.twiddle11.re * x625p.re
5214 + self.twiddle13.re * x724p.re
5215 + self.twiddle6.re * x823p.re
5216 + self.twiddle1.re * x922p.re
5217 + self.twiddle8.re * x1021p.re
5218 + self.twiddle15.re * x1120p.re
5219 + self.twiddle9.re * x1219p.re
5220 + self.twiddle2.re * x1318p.re
5221 + self.twiddle5.re * x1417p.re
5222 + self.twiddle12.re * x1516p.re;
5223 let b724re_b = self.twiddle7.im * x130n.im
5224 + self.twiddle14.im * x229n.im
5225 + -self.twiddle10.im * x328n.im
5226 + -self.twiddle3.im * x427n.im
5227 + self.twiddle4.im * x526n.im
5228 + self.twiddle11.im * x625n.im
5229 + -self.twiddle13.im * x724n.im
5230 + -self.twiddle6.im * x823n.im
5231 + self.twiddle1.im * x922n.im
5232 + self.twiddle8.im * x1021n.im
5233 + self.twiddle15.im * x1120n.im
5234 + -self.twiddle9.im * x1219n.im
5235 + -self.twiddle2.im * x1318n.im
5236 + self.twiddle5.im * x1417n.im
5237 + self.twiddle12.im * x1516n.im;
5238 let b823re_a = buffer.load(0).re
5239 + self.twiddle8.re * x130p.re
5240 + self.twiddle15.re * x229p.re
5241 + self.twiddle7.re * x328p.re
5242 + self.twiddle1.re * x427p.re
5243 + self.twiddle9.re * x526p.re
5244 + self.twiddle14.re * x625p.re
5245 + self.twiddle6.re * x724p.re
5246 + self.twiddle2.re * x823p.re
5247 + self.twiddle10.re * x922p.re
5248 + self.twiddle13.re * x1021p.re
5249 + self.twiddle5.re * x1120p.re
5250 + self.twiddle3.re * x1219p.re
5251 + self.twiddle11.re * x1318p.re
5252 + self.twiddle12.re * x1417p.re
5253 + self.twiddle4.re * x1516p.re;
5254 let b823re_b = self.twiddle8.im * x130n.im
5255 + -self.twiddle15.im * x229n.im
5256 + -self.twiddle7.im * x328n.im
5257 + self.twiddle1.im * x427n.im
5258 + self.twiddle9.im * x526n.im
5259 + -self.twiddle14.im * x625n.im
5260 + -self.twiddle6.im * x724n.im
5261 + self.twiddle2.im * x823n.im
5262 + self.twiddle10.im * x922n.im
5263 + -self.twiddle13.im * x1021n.im
5264 + -self.twiddle5.im * x1120n.im
5265 + self.twiddle3.im * x1219n.im
5266 + self.twiddle11.im * x1318n.im
5267 + -self.twiddle12.im * x1417n.im
5268 + -self.twiddle4.im * x1516n.im;
5269 let b922re_a = buffer.load(0).re
5270 + self.twiddle9.re * x130p.re
5271 + self.twiddle13.re * x229p.re
5272 + self.twiddle4.re * x328p.re
5273 + self.twiddle5.re * x427p.re
5274 + self.twiddle14.re * x526p.re
5275 + self.twiddle8.re * x625p.re
5276 + self.twiddle1.re * x724p.re
5277 + self.twiddle10.re * x823p.re
5278 + self.twiddle12.re * x922p.re
5279 + self.twiddle3.re * x1021p.re
5280 + self.twiddle6.re * x1120p.re
5281 + self.twiddle15.re * x1219p.re
5282 + self.twiddle7.re * x1318p.re
5283 + self.twiddle2.re * x1417p.re
5284 + self.twiddle11.re * x1516p.re;
5285 let b922re_b = self.twiddle9.im * x130n.im
5286 + -self.twiddle13.im * x229n.im
5287 + -self.twiddle4.im * x328n.im
5288 + self.twiddle5.im * x427n.im
5289 + self.twiddle14.im * x526n.im
5290 + -self.twiddle8.im * x625n.im
5291 + self.twiddle1.im * x724n.im
5292 + self.twiddle10.im * x823n.im
5293 + -self.twiddle12.im * x922n.im
5294 + -self.twiddle3.im * x1021n.im
5295 + self.twiddle6.im * x1120n.im
5296 + self.twiddle15.im * x1219n.im
5297 + -self.twiddle7.im * x1318n.im
5298 + self.twiddle2.im * x1417n.im
5299 + self.twiddle11.im * x1516n.im;
5300 let b1021re_a = buffer.load(0).re
5301 + self.twiddle10.re * x130p.re
5302 + self.twiddle11.re * x229p.re
5303 + self.twiddle1.re * x328p.re
5304 + self.twiddle9.re * x427p.re
5305 + self.twiddle12.re * x526p.re
5306 + self.twiddle2.re * x625p.re
5307 + self.twiddle8.re * x724p.re
5308 + self.twiddle13.re * x823p.re
5309 + self.twiddle3.re * x922p.re
5310 + self.twiddle7.re * x1021p.re
5311 + self.twiddle14.re * x1120p.re
5312 + self.twiddle4.re * x1219p.re
5313 + self.twiddle6.re * x1318p.re
5314 + self.twiddle15.re * x1417p.re
5315 + self.twiddle5.re * x1516p.re;
5316 let b1021re_b = self.twiddle10.im * x130n.im
5317 + -self.twiddle11.im * x229n.im
5318 + -self.twiddle1.im * x328n.im
5319 + self.twiddle9.im * x427n.im
5320 + -self.twiddle12.im * x526n.im
5321 + -self.twiddle2.im * x625n.im
5322 + self.twiddle8.im * x724n.im
5323 + -self.twiddle13.im * x823n.im
5324 + -self.twiddle3.im * x922n.im
5325 + self.twiddle7.im * x1021n.im
5326 + -self.twiddle14.im * x1120n.im
5327 + -self.twiddle4.im * x1219n.im
5328 + self.twiddle6.im * x1318n.im
5329 + -self.twiddle15.im * x1417n.im
5330 + -self.twiddle5.im * x1516n.im;
5331 let b1120re_a = buffer.load(0).re
5332 + self.twiddle11.re * x130p.re
5333 + self.twiddle9.re * x229p.re
5334 + self.twiddle2.re * x328p.re
5335 + self.twiddle13.re * x427p.re
5336 + self.twiddle7.re * x526p.re
5337 + self.twiddle4.re * x625p.re
5338 + self.twiddle15.re * x724p.re
5339 + self.twiddle5.re * x823p.re
5340 + self.twiddle6.re * x922p.re
5341 + self.twiddle14.re * x1021p.re
5342 + self.twiddle3.re * x1120p.re
5343 + self.twiddle8.re * x1219p.re
5344 + self.twiddle12.re * x1318p.re
5345 + self.twiddle1.re * x1417p.re
5346 + self.twiddle10.re * x1516p.re;
5347 let b1120re_b = self.twiddle11.im * x130n.im
5348 + -self.twiddle9.im * x229n.im
5349 + self.twiddle2.im * x328n.im
5350 + self.twiddle13.im * x427n.im
5351 + -self.twiddle7.im * x526n.im
5352 + self.twiddle4.im * x625n.im
5353 + self.twiddle15.im * x724n.im
5354 + -self.twiddle5.im * x823n.im
5355 + self.twiddle6.im * x922n.im
5356 + -self.twiddle14.im * x1021n.im
5357 + -self.twiddle3.im * x1120n.im
5358 + self.twiddle8.im * x1219n.im
5359 + -self.twiddle12.im * x1318n.im
5360 + -self.twiddle1.im * x1417n.im
5361 + self.twiddle10.im * x1516n.im;
5362 let b1219re_a = buffer.load(0).re
5363 + self.twiddle12.re * x130p.re
5364 + self.twiddle7.re * x229p.re
5365 + self.twiddle5.re * x328p.re
5366 + self.twiddle14.re * x427p.re
5367 + self.twiddle2.re * x526p.re
5368 + self.twiddle10.re * x625p.re
5369 + self.twiddle9.re * x724p.re
5370 + self.twiddle3.re * x823p.re
5371 + self.twiddle15.re * x922p.re
5372 + self.twiddle4.re * x1021p.re
5373 + self.twiddle8.re * x1120p.re
5374 + self.twiddle11.re * x1219p.re
5375 + self.twiddle1.re * x1318p.re
5376 + self.twiddle13.re * x1417p.re
5377 + self.twiddle6.re * x1516p.re;
5378 let b1219re_b = self.twiddle12.im * x130n.im
5379 + -self.twiddle7.im * x229n.im
5380 + self.twiddle5.im * x328n.im
5381 + -self.twiddle14.im * x427n.im
5382 + -self.twiddle2.im * x526n.im
5383 + self.twiddle10.im * x625n.im
5384 + -self.twiddle9.im * x724n.im
5385 + self.twiddle3.im * x823n.im
5386 + self.twiddle15.im * x922n.im
5387 + -self.twiddle4.im * x1021n.im
5388 + self.twiddle8.im * x1120n.im
5389 + -self.twiddle11.im * x1219n.im
5390 + self.twiddle1.im * x1318n.im
5391 + self.twiddle13.im * x1417n.im
5392 + -self.twiddle6.im * x1516n.im;
5393 let b1318re_a = buffer.load(0).re
5394 + self.twiddle13.re * x130p.re
5395 + self.twiddle5.re * x229p.re
5396 + self.twiddle8.re * x328p.re
5397 + self.twiddle10.re * x427p.re
5398 + self.twiddle3.re * x526p.re
5399 + self.twiddle15.re * x625p.re
5400 + self.twiddle2.re * x724p.re
5401 + self.twiddle11.re * x823p.re
5402 + self.twiddle7.re * x922p.re
5403 + self.twiddle6.re * x1021p.re
5404 + self.twiddle12.re * x1120p.re
5405 + self.twiddle1.re * x1219p.re
5406 + self.twiddle14.re * x1318p.re
5407 + self.twiddle4.re * x1417p.re
5408 + self.twiddle9.re * x1516p.re;
5409 let b1318re_b = self.twiddle13.im * x130n.im
5410 + -self.twiddle5.im * x229n.im
5411 + self.twiddle8.im * x328n.im
5412 + -self.twiddle10.im * x427n.im
5413 + self.twiddle3.im * x526n.im
5414 + -self.twiddle15.im * x625n.im
5415 + -self.twiddle2.im * x724n.im
5416 + self.twiddle11.im * x823n.im
5417 + -self.twiddle7.im * x922n.im
5418 + self.twiddle6.im * x1021n.im
5419 + -self.twiddle12.im * x1120n.im
5420 + self.twiddle1.im * x1219n.im
5421 + self.twiddle14.im * x1318n.im
5422 + -self.twiddle4.im * x1417n.im
5423 + self.twiddle9.im * x1516n.im;
5424 let b1417re_a = buffer.load(0).re
5425 + self.twiddle14.re * x130p.re
5426 + self.twiddle3.re * x229p.re
5427 + self.twiddle11.re * x328p.re
5428 + self.twiddle6.re * x427p.re
5429 + self.twiddle8.re * x526p.re
5430 + self.twiddle9.re * x625p.re
5431 + self.twiddle5.re * x724p.re
5432 + self.twiddle12.re * x823p.re
5433 + self.twiddle2.re * x922p.re
5434 + self.twiddle15.re * x1021p.re
5435 + self.twiddle1.re * x1120p.re
5436 + self.twiddle13.re * x1219p.re
5437 + self.twiddle4.re * x1318p.re
5438 + self.twiddle10.re * x1417p.re
5439 + self.twiddle7.re * x1516p.re;
5440 let b1417re_b = self.twiddle14.im * x130n.im
5441 + -self.twiddle3.im * x229n.im
5442 + self.twiddle11.im * x328n.im
5443 + -self.twiddle6.im * x427n.im
5444 + self.twiddle8.im * x526n.im
5445 + -self.twiddle9.im * x625n.im
5446 + self.twiddle5.im * x724n.im
5447 + -self.twiddle12.im * x823n.im
5448 + self.twiddle2.im * x922n.im
5449 + -self.twiddle15.im * x1021n.im
5450 + -self.twiddle1.im * x1120n.im
5451 + self.twiddle13.im * x1219n.im
5452 + -self.twiddle4.im * x1318n.im
5453 + self.twiddle10.im * x1417n.im
5454 + -self.twiddle7.im * x1516n.im;
5455 let b1516re_a = buffer.load(0).re
5456 + self.twiddle15.re * x130p.re
5457 + self.twiddle1.re * x229p.re
5458 + self.twiddle14.re * x328p.re
5459 + self.twiddle2.re * x427p.re
5460 + self.twiddle13.re * x526p.re
5461 + self.twiddle3.re * x625p.re
5462 + self.twiddle12.re * x724p.re
5463 + self.twiddle4.re * x823p.re
5464 + self.twiddle11.re * x922p.re
5465 + self.twiddle5.re * x1021p.re
5466 + self.twiddle10.re * x1120p.re
5467 + self.twiddle6.re * x1219p.re
5468 + self.twiddle9.re * x1318p.re
5469 + self.twiddle7.re * x1417p.re
5470 + self.twiddle8.re * x1516p.re;
5471 let b1516re_b = self.twiddle15.im * x130n.im
5472 + -self.twiddle1.im * x229n.im
5473 + self.twiddle14.im * x328n.im
5474 + -self.twiddle2.im * x427n.im
5475 + self.twiddle13.im * x526n.im
5476 + -self.twiddle3.im * x625n.im
5477 + self.twiddle12.im * x724n.im
5478 + -self.twiddle4.im * x823n.im
5479 + self.twiddle11.im * x922n.im
5480 + -self.twiddle5.im * x1021n.im
5481 + self.twiddle10.im * x1120n.im
5482 + -self.twiddle6.im * x1219n.im
5483 + self.twiddle9.im * x1318n.im
5484 + -self.twiddle7.im * x1417n.im
5485 + self.twiddle8.im * x1516n.im;
5486
5487 let b130im_a = buffer.load(0).im
5488 + self.twiddle1.re * x130p.im
5489 + self.twiddle2.re * x229p.im
5490 + self.twiddle3.re * x328p.im
5491 + self.twiddle4.re * x427p.im
5492 + self.twiddle5.re * x526p.im
5493 + self.twiddle6.re * x625p.im
5494 + self.twiddle7.re * x724p.im
5495 + self.twiddle8.re * x823p.im
5496 + self.twiddle9.re * x922p.im
5497 + self.twiddle10.re * x1021p.im
5498 + self.twiddle11.re * x1120p.im
5499 + self.twiddle12.re * x1219p.im
5500 + self.twiddle13.re * x1318p.im
5501 + self.twiddle14.re * x1417p.im
5502 + self.twiddle15.re * x1516p.im;
5503 let b130im_b = self.twiddle1.im * x130n.re
5504 + self.twiddle2.im * x229n.re
5505 + self.twiddle3.im * x328n.re
5506 + self.twiddle4.im * x427n.re
5507 + self.twiddle5.im * x526n.re
5508 + self.twiddle6.im * x625n.re
5509 + self.twiddle7.im * x724n.re
5510 + self.twiddle8.im * x823n.re
5511 + self.twiddle9.im * x922n.re
5512 + self.twiddle10.im * x1021n.re
5513 + self.twiddle11.im * x1120n.re
5514 + self.twiddle12.im * x1219n.re
5515 + self.twiddle13.im * x1318n.re
5516 + self.twiddle14.im * x1417n.re
5517 + self.twiddle15.im * x1516n.re;
5518 let b229im_a = buffer.load(0).im
5519 + self.twiddle2.re * x130p.im
5520 + self.twiddle4.re * x229p.im
5521 + self.twiddle6.re * x328p.im
5522 + self.twiddle8.re * x427p.im
5523 + self.twiddle10.re * x526p.im
5524 + self.twiddle12.re * x625p.im
5525 + self.twiddle14.re * x724p.im
5526 + self.twiddle15.re * x823p.im
5527 + self.twiddle13.re * x922p.im
5528 + self.twiddle11.re * x1021p.im
5529 + self.twiddle9.re * x1120p.im
5530 + self.twiddle7.re * x1219p.im
5531 + self.twiddle5.re * x1318p.im
5532 + self.twiddle3.re * x1417p.im
5533 + self.twiddle1.re * x1516p.im;
5534 let b229im_b = self.twiddle2.im * x130n.re
5535 + self.twiddle4.im * x229n.re
5536 + self.twiddle6.im * x328n.re
5537 + self.twiddle8.im * x427n.re
5538 + self.twiddle10.im * x526n.re
5539 + self.twiddle12.im * x625n.re
5540 + self.twiddle14.im * x724n.re
5541 + -self.twiddle15.im * x823n.re
5542 + -self.twiddle13.im * x922n.re
5543 + -self.twiddle11.im * x1021n.re
5544 + -self.twiddle9.im * x1120n.re
5545 + -self.twiddle7.im * x1219n.re
5546 + -self.twiddle5.im * x1318n.re
5547 + -self.twiddle3.im * x1417n.re
5548 + -self.twiddle1.im * x1516n.re;
5549 let b328im_a = buffer.load(0).im
5550 + self.twiddle3.re * x130p.im
5551 + self.twiddle6.re * x229p.im
5552 + self.twiddle9.re * x328p.im
5553 + self.twiddle12.re * x427p.im
5554 + self.twiddle15.re * x526p.im
5555 + self.twiddle13.re * x625p.im
5556 + self.twiddle10.re * x724p.im
5557 + self.twiddle7.re * x823p.im
5558 + self.twiddle4.re * x922p.im
5559 + self.twiddle1.re * x1021p.im
5560 + self.twiddle2.re * x1120p.im
5561 + self.twiddle5.re * x1219p.im
5562 + self.twiddle8.re * x1318p.im
5563 + self.twiddle11.re * x1417p.im
5564 + self.twiddle14.re * x1516p.im;
5565 let b328im_b = self.twiddle3.im * x130n.re
5566 + self.twiddle6.im * x229n.re
5567 + self.twiddle9.im * x328n.re
5568 + self.twiddle12.im * x427n.re
5569 + self.twiddle15.im * x526n.re
5570 + -self.twiddle13.im * x625n.re
5571 + -self.twiddle10.im * x724n.re
5572 + -self.twiddle7.im * x823n.re
5573 + -self.twiddle4.im * x922n.re
5574 + -self.twiddle1.im * x1021n.re
5575 + self.twiddle2.im * x1120n.re
5576 + self.twiddle5.im * x1219n.re
5577 + self.twiddle8.im * x1318n.re
5578 + self.twiddle11.im * x1417n.re
5579 + self.twiddle14.im * x1516n.re;
5580 let b427im_a = buffer.load(0).im
5581 + self.twiddle4.re * x130p.im
5582 + self.twiddle8.re * x229p.im
5583 + self.twiddle12.re * x328p.im
5584 + self.twiddle15.re * x427p.im
5585 + self.twiddle11.re * x526p.im
5586 + self.twiddle7.re * x625p.im
5587 + self.twiddle3.re * x724p.im
5588 + self.twiddle1.re * x823p.im
5589 + self.twiddle5.re * x922p.im
5590 + self.twiddle9.re * x1021p.im
5591 + self.twiddle13.re * x1120p.im
5592 + self.twiddle14.re * x1219p.im
5593 + self.twiddle10.re * x1318p.im
5594 + self.twiddle6.re * x1417p.im
5595 + self.twiddle2.re * x1516p.im;
5596 let b427im_b = self.twiddle4.im * x130n.re
5597 + self.twiddle8.im * x229n.re
5598 + self.twiddle12.im * x328n.re
5599 + -self.twiddle15.im * x427n.re
5600 + -self.twiddle11.im * x526n.re
5601 + -self.twiddle7.im * x625n.re
5602 + -self.twiddle3.im * x724n.re
5603 + self.twiddle1.im * x823n.re
5604 + self.twiddle5.im * x922n.re
5605 + self.twiddle9.im * x1021n.re
5606 + self.twiddle13.im * x1120n.re
5607 + -self.twiddle14.im * x1219n.re
5608 + -self.twiddle10.im * x1318n.re
5609 + -self.twiddle6.im * x1417n.re
5610 + -self.twiddle2.im * x1516n.re;
5611 let b526im_a = buffer.load(0).im
5612 + self.twiddle5.re * x130p.im
5613 + self.twiddle10.re * x229p.im
5614 + self.twiddle15.re * x328p.im
5615 + self.twiddle11.re * x427p.im
5616 + self.twiddle6.re * x526p.im
5617 + self.twiddle1.re * x625p.im
5618 + self.twiddle4.re * x724p.im
5619 + self.twiddle9.re * x823p.im
5620 + self.twiddle14.re * x922p.im
5621 + self.twiddle12.re * x1021p.im
5622 + self.twiddle7.re * x1120p.im
5623 + self.twiddle2.re * x1219p.im
5624 + self.twiddle3.re * x1318p.im
5625 + self.twiddle8.re * x1417p.im
5626 + self.twiddle13.re * x1516p.im;
5627 let b526im_b = self.twiddle5.im * x130n.re
5628 + self.twiddle10.im * x229n.re
5629 + self.twiddle15.im * x328n.re
5630 + -self.twiddle11.im * x427n.re
5631 + -self.twiddle6.im * x526n.re
5632 + -self.twiddle1.im * x625n.re
5633 + self.twiddle4.im * x724n.re
5634 + self.twiddle9.im * x823n.re
5635 + self.twiddle14.im * x922n.re
5636 + -self.twiddle12.im * x1021n.re
5637 + -self.twiddle7.im * x1120n.re
5638 + -self.twiddle2.im * x1219n.re
5639 + self.twiddle3.im * x1318n.re
5640 + self.twiddle8.im * x1417n.re
5641 + self.twiddle13.im * x1516n.re;
5642 let b625im_a = buffer.load(0).im
5643 + self.twiddle6.re * x130p.im
5644 + self.twiddle12.re * x229p.im
5645 + self.twiddle13.re * x328p.im
5646 + self.twiddle7.re * x427p.im
5647 + self.twiddle1.re * x526p.im
5648 + self.twiddle5.re * x625p.im
5649 + self.twiddle11.re * x724p.im
5650 + self.twiddle14.re * x823p.im
5651 + self.twiddle8.re * x922p.im
5652 + self.twiddle2.re * x1021p.im
5653 + self.twiddle4.re * x1120p.im
5654 + self.twiddle10.re * x1219p.im
5655 + self.twiddle15.re * x1318p.im
5656 + self.twiddle9.re * x1417p.im
5657 + self.twiddle3.re * x1516p.im;
5658 let b625im_b = self.twiddle6.im * x130n.re
5659 + self.twiddle12.im * x229n.re
5660 + -self.twiddle13.im * x328n.re
5661 + -self.twiddle7.im * x427n.re
5662 + -self.twiddle1.im * x526n.re
5663 + self.twiddle5.im * x625n.re
5664 + self.twiddle11.im * x724n.re
5665 + -self.twiddle14.im * x823n.re
5666 + -self.twiddle8.im * x922n.re
5667 + -self.twiddle2.im * x1021n.re
5668 + self.twiddle4.im * x1120n.re
5669 + self.twiddle10.im * x1219n.re
5670 + -self.twiddle15.im * x1318n.re
5671 + -self.twiddle9.im * x1417n.re
5672 + -self.twiddle3.im * x1516n.re;
5673 let b724im_a = buffer.load(0).im
5674 + self.twiddle7.re * x130p.im
5675 + self.twiddle14.re * x229p.im
5676 + self.twiddle10.re * x328p.im
5677 + self.twiddle3.re * x427p.im
5678 + self.twiddle4.re * x526p.im
5679 + self.twiddle11.re * x625p.im
5680 + self.twiddle13.re * x724p.im
5681 + self.twiddle6.re * x823p.im
5682 + self.twiddle1.re * x922p.im
5683 + self.twiddle8.re * x1021p.im
5684 + self.twiddle15.re * x1120p.im
5685 + self.twiddle9.re * x1219p.im
5686 + self.twiddle2.re * x1318p.im
5687 + self.twiddle5.re * x1417p.im
5688 + self.twiddle12.re * x1516p.im;
5689 let b724im_b = self.twiddle7.im * x130n.re
5690 + self.twiddle14.im * x229n.re
5691 + -self.twiddle10.im * x328n.re
5692 + -self.twiddle3.im * x427n.re
5693 + self.twiddle4.im * x526n.re
5694 + self.twiddle11.im * x625n.re
5695 + -self.twiddle13.im * x724n.re
5696 + -self.twiddle6.im * x823n.re
5697 + self.twiddle1.im * x922n.re
5698 + self.twiddle8.im * x1021n.re
5699 + self.twiddle15.im * x1120n.re
5700 + -self.twiddle9.im * x1219n.re
5701 + -self.twiddle2.im * x1318n.re
5702 + self.twiddle5.im * x1417n.re
5703 + self.twiddle12.im * x1516n.re;
5704 let b823im_a = buffer.load(0).im
5705 + self.twiddle8.re * x130p.im
5706 + self.twiddle15.re * x229p.im
5707 + self.twiddle7.re * x328p.im
5708 + self.twiddle1.re * x427p.im
5709 + self.twiddle9.re * x526p.im
5710 + self.twiddle14.re * x625p.im
5711 + self.twiddle6.re * x724p.im
5712 + self.twiddle2.re * x823p.im
5713 + self.twiddle10.re * x922p.im
5714 + self.twiddle13.re * x1021p.im
5715 + self.twiddle5.re * x1120p.im
5716 + self.twiddle3.re * x1219p.im
5717 + self.twiddle11.re * x1318p.im
5718 + self.twiddle12.re * x1417p.im
5719 + self.twiddle4.re * x1516p.im;
5720 let b823im_b = self.twiddle8.im * x130n.re
5721 + -self.twiddle15.im * x229n.re
5722 + -self.twiddle7.im * x328n.re
5723 + self.twiddle1.im * x427n.re
5724 + self.twiddle9.im * x526n.re
5725 + -self.twiddle14.im * x625n.re
5726 + -self.twiddle6.im * x724n.re
5727 + self.twiddle2.im * x823n.re
5728 + self.twiddle10.im * x922n.re
5729 + -self.twiddle13.im * x1021n.re
5730 + -self.twiddle5.im * x1120n.re
5731 + self.twiddle3.im * x1219n.re
5732 + self.twiddle11.im * x1318n.re
5733 + -self.twiddle12.im * x1417n.re
5734 + -self.twiddle4.im * x1516n.re;
5735 let b922im_a = buffer.load(0).im
5736 + self.twiddle9.re * x130p.im
5737 + self.twiddle13.re * x229p.im
5738 + self.twiddle4.re * x328p.im
5739 + self.twiddle5.re * x427p.im
5740 + self.twiddle14.re * x526p.im
5741 + self.twiddle8.re * x625p.im
5742 + self.twiddle1.re * x724p.im
5743 + self.twiddle10.re * x823p.im
5744 + self.twiddle12.re * x922p.im
5745 + self.twiddle3.re * x1021p.im
5746 + self.twiddle6.re * x1120p.im
5747 + self.twiddle15.re * x1219p.im
5748 + self.twiddle7.re * x1318p.im
5749 + self.twiddle2.re * x1417p.im
5750 + self.twiddle11.re * x1516p.im;
5751 let b922im_b = self.twiddle9.im * x130n.re
5752 + -self.twiddle13.im * x229n.re
5753 + -self.twiddle4.im * x328n.re
5754 + self.twiddle5.im * x427n.re
5755 + self.twiddle14.im * x526n.re
5756 + -self.twiddle8.im * x625n.re
5757 + self.twiddle1.im * x724n.re
5758 + self.twiddle10.im * x823n.re
5759 + -self.twiddle12.im * x922n.re
5760 + -self.twiddle3.im * x1021n.re
5761 + self.twiddle6.im * x1120n.re
5762 + self.twiddle15.im * x1219n.re
5763 + -self.twiddle7.im * x1318n.re
5764 + self.twiddle2.im * x1417n.re
5765 + self.twiddle11.im * x1516n.re;
5766 let b1021im_a = buffer.load(0).im
5767 + self.twiddle10.re * x130p.im
5768 + self.twiddle11.re * x229p.im
5769 + self.twiddle1.re * x328p.im
5770 + self.twiddle9.re * x427p.im
5771 + self.twiddle12.re * x526p.im
5772 + self.twiddle2.re * x625p.im
5773 + self.twiddle8.re * x724p.im
5774 + self.twiddle13.re * x823p.im
5775 + self.twiddle3.re * x922p.im
5776 + self.twiddle7.re * x1021p.im
5777 + self.twiddle14.re * x1120p.im
5778 + self.twiddle4.re * x1219p.im
5779 + self.twiddle6.re * x1318p.im
5780 + self.twiddle15.re * x1417p.im
5781 + self.twiddle5.re * x1516p.im;
5782 let b1021im_b = self.twiddle10.im * x130n.re
5783 + -self.twiddle11.im * x229n.re
5784 + -self.twiddle1.im * x328n.re
5785 + self.twiddle9.im * x427n.re
5786 + -self.twiddle12.im * x526n.re
5787 + -self.twiddle2.im * x625n.re
5788 + self.twiddle8.im * x724n.re
5789 + -self.twiddle13.im * x823n.re
5790 + -self.twiddle3.im * x922n.re
5791 + self.twiddle7.im * x1021n.re
5792 + -self.twiddle14.im * x1120n.re
5793 + -self.twiddle4.im * x1219n.re
5794 + self.twiddle6.im * x1318n.re
5795 + -self.twiddle15.im * x1417n.re
5796 + -self.twiddle5.im * x1516n.re;
5797 let b1120im_a = buffer.load(0).im
5798 + self.twiddle11.re * x130p.im
5799 + self.twiddle9.re * x229p.im
5800 + self.twiddle2.re * x328p.im
5801 + self.twiddle13.re * x427p.im
5802 + self.twiddle7.re * x526p.im
5803 + self.twiddle4.re * x625p.im
5804 + self.twiddle15.re * x724p.im
5805 + self.twiddle5.re * x823p.im
5806 + self.twiddle6.re * x922p.im
5807 + self.twiddle14.re * x1021p.im
5808 + self.twiddle3.re * x1120p.im
5809 + self.twiddle8.re * x1219p.im
5810 + self.twiddle12.re * x1318p.im
5811 + self.twiddle1.re * x1417p.im
5812 + self.twiddle10.re * x1516p.im;
5813 let b1120im_b = self.twiddle11.im * x130n.re
5814 + -self.twiddle9.im * x229n.re
5815 + self.twiddle2.im * x328n.re
5816 + self.twiddle13.im * x427n.re
5817 + -self.twiddle7.im * x526n.re
5818 + self.twiddle4.im * x625n.re
5819 + self.twiddle15.im * x724n.re
5820 + -self.twiddle5.im * x823n.re
5821 + self.twiddle6.im * x922n.re
5822 + -self.twiddle14.im * x1021n.re
5823 + -self.twiddle3.im * x1120n.re
5824 + self.twiddle8.im * x1219n.re
5825 + -self.twiddle12.im * x1318n.re
5826 + -self.twiddle1.im * x1417n.re
5827 + self.twiddle10.im * x1516n.re;
5828 let b1219im_a = buffer.load(0).im
5829 + self.twiddle12.re * x130p.im
5830 + self.twiddle7.re * x229p.im
5831 + self.twiddle5.re * x328p.im
5832 + self.twiddle14.re * x427p.im
5833 + self.twiddle2.re * x526p.im
5834 + self.twiddle10.re * x625p.im
5835 + self.twiddle9.re * x724p.im
5836 + self.twiddle3.re * x823p.im
5837 + self.twiddle15.re * x922p.im
5838 + self.twiddle4.re * x1021p.im
5839 + self.twiddle8.re * x1120p.im
5840 + self.twiddle11.re * x1219p.im
5841 + self.twiddle1.re * x1318p.im
5842 + self.twiddle13.re * x1417p.im
5843 + self.twiddle6.re * x1516p.im;
5844 let b1219im_b = self.twiddle12.im * x130n.re
5845 + -self.twiddle7.im * x229n.re
5846 + self.twiddle5.im * x328n.re
5847 + -self.twiddle14.im * x427n.re
5848 + -self.twiddle2.im * x526n.re
5849 + self.twiddle10.im * x625n.re
5850 + -self.twiddle9.im * x724n.re
5851 + self.twiddle3.im * x823n.re
5852 + self.twiddle15.im * x922n.re
5853 + -self.twiddle4.im * x1021n.re
5854 + self.twiddle8.im * x1120n.re
5855 + -self.twiddle11.im * x1219n.re
5856 + self.twiddle1.im * x1318n.re
5857 + self.twiddle13.im * x1417n.re
5858 + -self.twiddle6.im * x1516n.re;
5859 let b1318im_a = buffer.load(0).im
5860 + self.twiddle13.re * x130p.im
5861 + self.twiddle5.re * x229p.im
5862 + self.twiddle8.re * x328p.im
5863 + self.twiddle10.re * x427p.im
5864 + self.twiddle3.re * x526p.im
5865 + self.twiddle15.re * x625p.im
5866 + self.twiddle2.re * x724p.im
5867 + self.twiddle11.re * x823p.im
5868 + self.twiddle7.re * x922p.im
5869 + self.twiddle6.re * x1021p.im
5870 + self.twiddle12.re * x1120p.im
5871 + self.twiddle1.re * x1219p.im
5872 + self.twiddle14.re * x1318p.im
5873 + self.twiddle4.re * x1417p.im
5874 + self.twiddle9.re * x1516p.im;
5875 let b1318im_b = self.twiddle13.im * x130n.re
5876 + -self.twiddle5.im * x229n.re
5877 + self.twiddle8.im * x328n.re
5878 + -self.twiddle10.im * x427n.re
5879 + self.twiddle3.im * x526n.re
5880 + -self.twiddle15.im * x625n.re
5881 + -self.twiddle2.im * x724n.re
5882 + self.twiddle11.im * x823n.re
5883 + -self.twiddle7.im * x922n.re
5884 + self.twiddle6.im * x1021n.re
5885 + -self.twiddle12.im * x1120n.re
5886 + self.twiddle1.im * x1219n.re
5887 + self.twiddle14.im * x1318n.re
5888 + -self.twiddle4.im * x1417n.re
5889 + self.twiddle9.im * x1516n.re;
5890 let b1417im_a = buffer.load(0).im
5891 + self.twiddle14.re * x130p.im
5892 + self.twiddle3.re * x229p.im
5893 + self.twiddle11.re * x328p.im
5894 + self.twiddle6.re * x427p.im
5895 + self.twiddle8.re * x526p.im
5896 + self.twiddle9.re * x625p.im
5897 + self.twiddle5.re * x724p.im
5898 + self.twiddle12.re * x823p.im
5899 + self.twiddle2.re * x922p.im
5900 + self.twiddle15.re * x1021p.im
5901 + self.twiddle1.re * x1120p.im
5902 + self.twiddle13.re * x1219p.im
5903 + self.twiddle4.re * x1318p.im
5904 + self.twiddle10.re * x1417p.im
5905 + self.twiddle7.re * x1516p.im;
5906 let b1417im_b = self.twiddle14.im * x130n.re
5907 + -self.twiddle3.im * x229n.re
5908 + self.twiddle11.im * x328n.re
5909 + -self.twiddle6.im * x427n.re
5910 + self.twiddle8.im * x526n.re
5911 + -self.twiddle9.im * x625n.re
5912 + self.twiddle5.im * x724n.re
5913 + -self.twiddle12.im * x823n.re
5914 + self.twiddle2.im * x922n.re
5915 + -self.twiddle15.im * x1021n.re
5916 + -self.twiddle1.im * x1120n.re
5917 + self.twiddle13.im * x1219n.re
5918 + -self.twiddle4.im * x1318n.re
5919 + self.twiddle10.im * x1417n.re
5920 + -self.twiddle7.im * x1516n.re;
5921 let b1516im_a = buffer.load(0).im
5922 + self.twiddle15.re * x130p.im
5923 + self.twiddle1.re * x229p.im
5924 + self.twiddle14.re * x328p.im
5925 + self.twiddle2.re * x427p.im
5926 + self.twiddle13.re * x526p.im
5927 + self.twiddle3.re * x625p.im
5928 + self.twiddle12.re * x724p.im
5929 + self.twiddle4.re * x823p.im
5930 + self.twiddle11.re * x922p.im
5931 + self.twiddle5.re * x1021p.im
5932 + self.twiddle10.re * x1120p.im
5933 + self.twiddle6.re * x1219p.im
5934 + self.twiddle9.re * x1318p.im
5935 + self.twiddle7.re * x1417p.im
5936 + self.twiddle8.re * x1516p.im;
5937 let b1516im_b = self.twiddle15.im * x130n.re
5938 + -self.twiddle1.im * x229n.re
5939 + self.twiddle14.im * x328n.re
5940 + -self.twiddle2.im * x427n.re
5941 + self.twiddle13.im * x526n.re
5942 + -self.twiddle3.im * x625n.re
5943 + self.twiddle12.im * x724n.re
5944 + -self.twiddle4.im * x823n.re
5945 + self.twiddle11.im * x922n.re
5946 + -self.twiddle5.im * x1021n.re
5947 + self.twiddle10.im * x1120n.re
5948 + -self.twiddle6.im * x1219n.re
5949 + self.twiddle9.im * x1318n.re
5950 + -self.twiddle7.im * x1417n.re
5951 + self.twiddle8.im * x1516n.re;
5952
5953 let out1re = b130re_a - b130re_b;
5954 let out1im = b130im_a + b130im_b;
5955 let out2re = b229re_a - b229re_b;
5956 let out2im = b229im_a + b229im_b;
5957 let out3re = b328re_a - b328re_b;
5958 let out3im = b328im_a + b328im_b;
5959 let out4re = b427re_a - b427re_b;
5960 let out4im = b427im_a + b427im_b;
5961 let out5re = b526re_a - b526re_b;
5962 let out5im = b526im_a + b526im_b;
5963 let out6re = b625re_a - b625re_b;
5964 let out6im = b625im_a + b625im_b;
5965 let out7re = b724re_a - b724re_b;
5966 let out7im = b724im_a + b724im_b;
5967 let out8re = b823re_a - b823re_b;
5968 let out8im = b823im_a + b823im_b;
5969 let out9re = b922re_a - b922re_b;
5970 let out9im = b922im_a + b922im_b;
5971 let out10re = b1021re_a - b1021re_b;
5972 let out10im = b1021im_a + b1021im_b;
5973 let out11re = b1120re_a - b1120re_b;
5974 let out11im = b1120im_a + b1120im_b;
5975 let out12re = b1219re_a - b1219re_b;
5976 let out12im = b1219im_a + b1219im_b;
5977 let out13re = b1318re_a - b1318re_b;
5978 let out13im = b1318im_a + b1318im_b;
5979 let out14re = b1417re_a - b1417re_b;
5980 let out14im = b1417im_a + b1417im_b;
5981 let out15re = b1516re_a - b1516re_b;
5982 let out15im = b1516im_a + b1516im_b;
5983 let out16re = b1516re_a + b1516re_b;
5984 let out16im = b1516im_a - b1516im_b;
5985 let out17re = b1417re_a + b1417re_b;
5986 let out17im = b1417im_a - b1417im_b;
5987 let out18re = b1318re_a + b1318re_b;
5988 let out18im = b1318im_a - b1318im_b;
5989 let out19re = b1219re_a + b1219re_b;
5990 let out19im = b1219im_a - b1219im_b;
5991 let out20re = b1120re_a + b1120re_b;
5992 let out20im = b1120im_a - b1120im_b;
5993 let out21re = b1021re_a + b1021re_b;
5994 let out21im = b1021im_a - b1021im_b;
5995 let out22re = b922re_a + b922re_b;
5996 let out22im = b922im_a - b922im_b;
5997 let out23re = b823re_a + b823re_b;
5998 let out23im = b823im_a - b823im_b;
5999 let out24re = b724re_a + b724re_b;
6000 let out24im = b724im_a - b724im_b;
6001 let out25re = b625re_a + b625re_b;
6002 let out25im = b625im_a - b625im_b;
6003 let out26re = b526re_a + b526re_b;
6004 let out26im = b526im_a - b526im_b;
6005 let out27re = b427re_a + b427re_b;
6006 let out27im = b427im_a - b427im_b;
6007 let out28re = b328re_a + b328re_b;
6008 let out28im = b328im_a - b328im_b;
6009 let out29re = b229re_a + b229re_b;
6010 let out29im = b229im_a - b229im_b;
6011 let out30re = b130re_a + b130re_b;
6012 let out30im = b130im_a - b130im_b;
6013 buffer.store(sum, 0);
6014 buffer.store(
6015 Complex {
6016 re: out1re,
6017 im: out1im,
6018 },
6019 1,
6020 );
6021 buffer.store(
6022 Complex {
6023 re: out2re,
6024 im: out2im,
6025 },
6026 2,
6027 );
6028 buffer.store(
6029 Complex {
6030 re: out3re,
6031 im: out3im,
6032 },
6033 3,
6034 );
6035 buffer.store(
6036 Complex {
6037 re: out4re,
6038 im: out4im,
6039 },
6040 4,
6041 );
6042 buffer.store(
6043 Complex {
6044 re: out5re,
6045 im: out5im,
6046 },
6047 5,
6048 );
6049 buffer.store(
6050 Complex {
6051 re: out6re,
6052 im: out6im,
6053 },
6054 6,
6055 );
6056 buffer.store(
6057 Complex {
6058 re: out7re,
6059 im: out7im,
6060 },
6061 7,
6062 );
6063 buffer.store(
6064 Complex {
6065 re: out8re,
6066 im: out8im,
6067 },
6068 8,
6069 );
6070 buffer.store(
6071 Complex {
6072 re: out9re,
6073 im: out9im,
6074 },
6075 9,
6076 );
6077 buffer.store(
6078 Complex {
6079 re: out10re,
6080 im: out10im,
6081 },
6082 10,
6083 );
6084 buffer.store(
6085 Complex {
6086 re: out11re,
6087 im: out11im,
6088 },
6089 11,
6090 );
6091 buffer.store(
6092 Complex {
6093 re: out12re,
6094 im: out12im,
6095 },
6096 12,
6097 );
6098 buffer.store(
6099 Complex {
6100 re: out13re,
6101 im: out13im,
6102 },
6103 13,
6104 );
6105 buffer.store(
6106 Complex {
6107 re: out14re,
6108 im: out14im,
6109 },
6110 14,
6111 );
6112 buffer.store(
6113 Complex {
6114 re: out15re,
6115 im: out15im,
6116 },
6117 15,
6118 );
6119 buffer.store(
6120 Complex {
6121 re: out16re,
6122 im: out16im,
6123 },
6124 16,
6125 );
6126 buffer.store(
6127 Complex {
6128 re: out17re,
6129 im: out17im,
6130 },
6131 17,
6132 );
6133 buffer.store(
6134 Complex {
6135 re: out18re,
6136 im: out18im,
6137 },
6138 18,
6139 );
6140 buffer.store(
6141 Complex {
6142 re: out19re,
6143 im: out19im,
6144 },
6145 19,
6146 );
6147 buffer.store(
6148 Complex {
6149 re: out20re,
6150 im: out20im,
6151 },
6152 20,
6153 );
6154 buffer.store(
6155 Complex {
6156 re: out21re,
6157 im: out21im,
6158 },
6159 21,
6160 );
6161 buffer.store(
6162 Complex {
6163 re: out22re,
6164 im: out22im,
6165 },
6166 22,
6167 );
6168 buffer.store(
6169 Complex {
6170 re: out23re,
6171 im: out23im,
6172 },
6173 23,
6174 );
6175 buffer.store(
6176 Complex {
6177 re: out24re,
6178 im: out24im,
6179 },
6180 24,
6181 );
6182 buffer.store(
6183 Complex {
6184 re: out25re,
6185 im: out25im,
6186 },
6187 25,
6188 );
6189 buffer.store(
6190 Complex {
6191 re: out26re,
6192 im: out26im,
6193 },
6194 26,
6195 );
6196 buffer.store(
6197 Complex {
6198 re: out27re,
6199 im: out27im,
6200 },
6201 27,
6202 );
6203 buffer.store(
6204 Complex {
6205 re: out28re,
6206 im: out28im,
6207 },
6208 28,
6209 );
6210 buffer.store(
6211 Complex {
6212 re: out29re,
6213 im: out29im,
6214 },
6215 29,
6216 );
6217 buffer.store(
6218 Complex {
6219 re: out30re,
6220 im: out30im,
6221 },
6222 30,
6223 );
6224 }
6225}
6226pub struct Butterfly32<T> {
6227 butterfly16: Butterfly16<T>,
6228 butterfly8: Butterfly8<T>,
6229 twiddles: [Complex<T>; 7],
6230}
6231boilerplate_fft_butterfly!(Butterfly32, 32, |this: &Butterfly32<_>| this
6232 .butterfly8
6233 .fft_direction());
6234impl<T: FftNum> Butterfly32<T> {
6235 pub fn new(direction: FftDirection) -> Self {
6236 Self {
6237 butterfly16: Butterfly16::new(direction),
6238 butterfly8: Butterfly8::new(direction),
6239 twiddles: [
6240 twiddles::compute_twiddle(1, 32, direction),
6241 twiddles::compute_twiddle(2, 32, direction),
6242 twiddles::compute_twiddle(3, 32, direction),
6243 twiddles::compute_twiddle(4, 32, direction),
6244 twiddles::compute_twiddle(5, 32, direction),
6245 twiddles::compute_twiddle(6, 32, direction),
6246 twiddles::compute_twiddle(7, 32, direction),
6247 ],
6248 }
6249 }
6250
6251 #[inline(never)]
6252 unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
6253 let mut scratch_evens = [
6256 buffer.load(0),
6257 buffer.load(2),
6258 buffer.load(4),
6259 buffer.load(6),
6260 buffer.load(8),
6261 buffer.load(10),
6262 buffer.load(12),
6263 buffer.load(14),
6264 buffer.load(16),
6265 buffer.load(18),
6266 buffer.load(20),
6267 buffer.load(22),
6268 buffer.load(24),
6269 buffer.load(26),
6270 buffer.load(28),
6271 buffer.load(30),
6272 ];
6273
6274 let mut scratch_odds_n1 = [
6275 buffer.load(1),
6276 buffer.load(5),
6277 buffer.load(9),
6278 buffer.load(13),
6279 buffer.load(17),
6280 buffer.load(21),
6281 buffer.load(25),
6282 buffer.load(29),
6283 ];
6284 let mut scratch_odds_n3 = [
6285 buffer.load(31),
6286 buffer.load(3),
6287 buffer.load(7),
6288 buffer.load(11),
6289 buffer.load(15),
6290 buffer.load(19),
6291 buffer.load(23),
6292 buffer.load(27),
6293 ];
6294
6295 self.butterfly16.perform_fft_contiguous(&mut scratch_evens);
6297 self.butterfly8.perform_fft_contiguous(&mut scratch_odds_n1);
6298 self.butterfly8.perform_fft_contiguous(&mut scratch_odds_n3);
6299
6300 scratch_odds_n1[1] = scratch_odds_n1[1] * self.twiddles[0];
6302 scratch_odds_n3[1] = scratch_odds_n3[1] * self.twiddles[0].conj();
6303
6304 scratch_odds_n1[2] = scratch_odds_n1[2] * self.twiddles[1];
6305 scratch_odds_n3[2] = scratch_odds_n3[2] * self.twiddles[1].conj();
6306
6307 scratch_odds_n1[3] = scratch_odds_n1[3] * self.twiddles[2];
6308 scratch_odds_n3[3] = scratch_odds_n3[3] * self.twiddles[2].conj();
6309
6310 scratch_odds_n1[4] = scratch_odds_n1[4] * self.twiddles[3];
6311 scratch_odds_n3[4] = scratch_odds_n3[4] * self.twiddles[3].conj();
6312
6313 scratch_odds_n1[5] = scratch_odds_n1[5] * self.twiddles[4];
6314 scratch_odds_n3[5] = scratch_odds_n3[5] * self.twiddles[4].conj();
6315
6316 scratch_odds_n1[6] = scratch_odds_n1[6] * self.twiddles[5];
6317 scratch_odds_n3[6] = scratch_odds_n3[6] * self.twiddles[5].conj();
6318
6319 scratch_odds_n1[7] = scratch_odds_n1[7] * self.twiddles[6];
6320 scratch_odds_n3[7] = scratch_odds_n3[7] * self.twiddles[6].conj();
6321
6322 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[0], &mut scratch_odds_n3[0]);
6324 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[1], &mut scratch_odds_n3[1]);
6325 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[2], &mut scratch_odds_n3[2]);
6326 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[3], &mut scratch_odds_n3[3]);
6327 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[4], &mut scratch_odds_n3[4]);
6328 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[5], &mut scratch_odds_n3[5]);
6329 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[6], &mut scratch_odds_n3[6]);
6330 Butterfly2::perform_fft_strided(&mut scratch_odds_n1[7], &mut scratch_odds_n3[7]);
6331
6332 scratch_odds_n3[0] = twiddles::rotate_90(scratch_odds_n3[0], self.fft_direction());
6334 scratch_odds_n3[1] = twiddles::rotate_90(scratch_odds_n3[1], self.fft_direction());
6335 scratch_odds_n3[2] = twiddles::rotate_90(scratch_odds_n3[2], self.fft_direction());
6336 scratch_odds_n3[3] = twiddles::rotate_90(scratch_odds_n3[3], self.fft_direction());
6337 scratch_odds_n3[4] = twiddles::rotate_90(scratch_odds_n3[4], self.fft_direction());
6338 scratch_odds_n3[5] = twiddles::rotate_90(scratch_odds_n3[5], self.fft_direction());
6339 scratch_odds_n3[6] = twiddles::rotate_90(scratch_odds_n3[6], self.fft_direction());
6340 scratch_odds_n3[7] = twiddles::rotate_90(scratch_odds_n3[7], self.fft_direction());
6341
6342 buffer.store(scratch_evens[0] + scratch_odds_n1[0], 0);
6344 buffer.store(scratch_evens[1] + scratch_odds_n1[1], 1);
6345 buffer.store(scratch_evens[2] + scratch_odds_n1[2], 2);
6346 buffer.store(scratch_evens[3] + scratch_odds_n1[3], 3);
6347 buffer.store(scratch_evens[4] + scratch_odds_n1[4], 4);
6348 buffer.store(scratch_evens[5] + scratch_odds_n1[5], 5);
6349 buffer.store(scratch_evens[6] + scratch_odds_n1[6], 6);
6350 buffer.store(scratch_evens[7] + scratch_odds_n1[7], 7);
6351 buffer.store(scratch_evens[8] + scratch_odds_n3[0], 8);
6352 buffer.store(scratch_evens[9] + scratch_odds_n3[1], 9);
6353 buffer.store(scratch_evens[10] + scratch_odds_n3[2], 10);
6354 buffer.store(scratch_evens[11] + scratch_odds_n3[3], 11);
6355 buffer.store(scratch_evens[12] + scratch_odds_n3[4], 12);
6356 buffer.store(scratch_evens[13] + scratch_odds_n3[5], 13);
6357 buffer.store(scratch_evens[14] + scratch_odds_n3[6], 14);
6358 buffer.store(scratch_evens[15] + scratch_odds_n3[7], 15);
6359 buffer.store(scratch_evens[0] - scratch_odds_n1[0], 16);
6360 buffer.store(scratch_evens[1] - scratch_odds_n1[1], 17);
6361 buffer.store(scratch_evens[2] - scratch_odds_n1[2], 18);
6362 buffer.store(scratch_evens[3] - scratch_odds_n1[3], 19);
6363 buffer.store(scratch_evens[4] - scratch_odds_n1[4], 20);
6364 buffer.store(scratch_evens[5] - scratch_odds_n1[5], 21);
6365 buffer.store(scratch_evens[6] - scratch_odds_n1[6], 22);
6366 buffer.store(scratch_evens[7] - scratch_odds_n1[7], 23);
6367 buffer.store(scratch_evens[8] - scratch_odds_n3[0], 24);
6368 buffer.store(scratch_evens[9] - scratch_odds_n3[1], 25);
6369 buffer.store(scratch_evens[10] - scratch_odds_n3[2], 26);
6370 buffer.store(scratch_evens[11] - scratch_odds_n3[3], 27);
6371 buffer.store(scratch_evens[12] - scratch_odds_n3[4], 28);
6372 buffer.store(scratch_evens[13] - scratch_odds_n3[5], 29);
6373 buffer.store(scratch_evens[14] - scratch_odds_n3[6], 30);
6374 buffer.store(scratch_evens[15] - scratch_odds_n3[7], 31);
6375 }
6376}
6377
6378#[cfg(test)]
6379mod unit_tests {
6380 use super::*;
6381 use crate::test_utils::check_fft_algorithm;
6382
6383 macro_rules! test_butterfly_func {
6386 ($test_name:ident, $struct_name:ident, $size:expr) => {
6387 #[test]
6388 fn $test_name() {
6389 let butterfly = $struct_name::new(FftDirection::Forward);
6390 check_fft_algorithm::<f32>(&butterfly, $size, FftDirection::Forward);
6391
6392 let butterfly_direction = $struct_name::new(FftDirection::Inverse);
6393 check_fft_algorithm::<f32>(&butterfly_direction, $size, FftDirection::Inverse);
6394 }
6395 };
6396 }
6397 test_butterfly_func!(test_butterfly2, Butterfly2, 2);
6398 test_butterfly_func!(test_butterfly3, Butterfly3, 3);
6399 test_butterfly_func!(test_butterfly4, Butterfly4, 4);
6400 test_butterfly_func!(test_butterfly5, Butterfly5, 5);
6401 test_butterfly_func!(test_butterfly6, Butterfly6, 6);
6402 test_butterfly_func!(test_butterfly7, Butterfly7, 7);
6403 test_butterfly_func!(test_butterfly8, Butterfly8, 8);
6404 test_butterfly_func!(test_butterfly9, Butterfly9, 9);
6405 test_butterfly_func!(test_butterfly11, Butterfly11, 11);
6406 test_butterfly_func!(test_butterfly12, Butterfly12, 12);
6407 test_butterfly_func!(test_butterfly13, Butterfly13, 13);
6408 test_butterfly_func!(test_butterfly16, Butterfly16, 16);
6409 test_butterfly_func!(test_butterfly17, Butterfly17, 17);
6410 test_butterfly_func!(test_butterfly19, Butterfly19, 19);
6411 test_butterfly_func!(test_butterfly23, Butterfly23, 23);
6412 test_butterfly_func!(test_butterfly24, Butterfly24, 24);
6413 test_butterfly_func!(test_butterfly27, Butterfly27, 27);
6414 test_butterfly_func!(test_butterfly29, Butterfly29, 29);
6415 test_butterfly_func!(test_butterfly31, Butterfly31, 31);
6416 test_butterfly_func!(test_butterfly32, Butterfly32, 32);
6417}