rustfft/algorithm/
butterflies.rs

1use num_complex::Complex;
2
3use crate::{common::FftNum, FftDirection};
4
5use crate::array_utils::{self, DoubleBuf, LoadStore};
6use crate::common::{fft_error_inplace, fft_error_outofplace};
7use crate::twiddles;
8use crate::{Direction, Fft, Length};
9
10#[allow(unused)]
11macro_rules! boilerplate_fft_butterfly {
12    ($struct_name:ident, $len:expr, $direction_fn:expr) => {
13        impl<T: FftNum> $struct_name<T> {
14            #[inline(always)]
15            pub(crate) unsafe fn perform_fft_butterfly(&self, buffer: impl LoadStore<T>) {
16                self.perform_fft_contiguous(buffer);
17            }
18        }
19        impl<T: FftNum> Fft<T> for $struct_name<T> {
20            fn process_outofplace_with_scratch(
21                &self,
22                input: &mut [Complex<T>],
23                output: &mut [Complex<T>],
24                _scratch: &mut [Complex<T>],
25            ) {
26                if input.len() < self.len() || output.len() != input.len() {
27                    // We want to trigger a panic, but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
28                    fft_error_outofplace(self.len(), input.len(), output.len(), 0, 0);
29                    return; // Unreachable, because fft_error_outofplace asserts, but it helps codegen to put it here
30                }
31
32                let result = array_utils::iter_chunks_zipped(
33                    input,
34                    output,
35                    self.len(),
36                    |in_chunk, out_chunk| {
37                        unsafe {
38                            self.perform_fft_butterfly(DoubleBuf {
39                                input: in_chunk,
40                                output: out_chunk,
41                            })
42                        };
43                    },
44                );
45
46                if result.is_err() {
47                    // We want to trigger a panic, because the buffer sizes weren't cleanly divisible by the FFT size,
48                    // but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
49                    fft_error_outofplace(self.len(), input.len(), output.len(), 0, 0);
50                }
51            }
52            fn process_with_scratch(&self, buffer: &mut [Complex<T>], _scratch: &mut [Complex<T>]) {
53                if buffer.len() < self.len() {
54                    // We want to trigger a panic, but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
55                    fft_error_inplace(self.len(), buffer.len(), 0, 0);
56                    return; // Unreachable, because fft_error_inplace asserts, but it helps codegen to put it here
57                }
58
59                let result = array_utils::iter_chunks(buffer, self.len(), |chunk| unsafe {
60                    self.perform_fft_butterfly(chunk)
61                });
62
63                if result.is_err() {
64                    // We want to trigger a panic, because the buffer sizes weren't cleanly divisible by the FFT size,
65                    // but we want to avoid doing it in this function to reduce code size, so call a function marked cold and inline(never) that will do it for us
66                    fft_error_inplace(self.len(), buffer.len(), 0, 0);
67                }
68            }
69            #[inline(always)]
70            fn get_inplace_scratch_len(&self) -> usize {
71                0
72            }
73            #[inline(always)]
74            fn get_outofplace_scratch_len(&self) -> usize {
75                0
76            }
77        }
78        impl<T> Length for $struct_name<T> {
79            #[inline(always)]
80            fn len(&self) -> usize {
81                $len
82            }
83        }
84        impl<T> Direction for $struct_name<T> {
85            #[inline(always)]
86            fn fft_direction(&self) -> FftDirection {
87                $direction_fn(self)
88            }
89        }
90    };
91}
92
93pub struct Butterfly1<T> {
94    direction: FftDirection,
95    _phantom: std::marker::PhantomData<T>,
96}
97impl<T: FftNum> Butterfly1<T> {
98    #[inline(always)]
99    pub fn new(direction: FftDirection) -> Self {
100        Self {
101            direction,
102            _phantom: std::marker::PhantomData,
103        }
104    }
105}
106impl<T: FftNum> Fft<T> for Butterfly1<T> {
107    fn process_outofplace_with_scratch(
108        &self,
109        input: &mut [Complex<T>],
110        output: &mut [Complex<T>],
111        _scratch: &mut [Complex<T>],
112    ) {
113        output.copy_from_slice(&input);
114    }
115
116    fn process_with_scratch(&self, _buffer: &mut [Complex<T>], _scratch: &mut [Complex<T>]) {}
117
118    fn get_inplace_scratch_len(&self) -> usize {
119        0
120    }
121
122    fn get_outofplace_scratch_len(&self) -> usize {
123        0
124    }
125}
126impl<T> Length for Butterfly1<T> {
127    fn len(&self) -> usize {
128        1
129    }
130}
131impl<T> Direction for Butterfly1<T> {
132    fn fft_direction(&self) -> FftDirection {
133        self.direction
134    }
135}
136
137pub struct Butterfly2<T> {
138    direction: FftDirection,
139    _phantom: std::marker::PhantomData<T>,
140}
141boilerplate_fft_butterfly!(Butterfly2, 2, |this: &Butterfly2<_>| this.direction);
142impl<T: FftNum> Butterfly2<T> {
143    #[inline(always)]
144    pub fn new(direction: FftDirection) -> Self {
145        Self {
146            direction,
147            _phantom: std::marker::PhantomData,
148        }
149    }
150    #[inline(always)]
151    unsafe fn perform_fft_strided(left: &mut Complex<T>, right: &mut Complex<T>) {
152        let temp = *left + *right;
153
154        *right = *left - *right;
155        *left = temp;
156    }
157    #[inline(always)]
158    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
159        let value0 = buffer.load(0);
160        let value1 = buffer.load(1);
161        buffer.store(value0 + value1, 0);
162        buffer.store(value0 - value1, 1);
163    }
164}
165
166pub struct Butterfly3<T> {
167    pub twiddle: Complex<T>,
168    direction: FftDirection,
169}
170boilerplate_fft_butterfly!(Butterfly3, 3, |this: &Butterfly3<_>| this.direction);
171impl<T: FftNum> Butterfly3<T> {
172    #[inline(always)]
173    pub fn new(direction: FftDirection) -> Self {
174        Self {
175            twiddle: twiddles::compute_twiddle(1, 3, direction),
176            direction,
177        }
178    }
179    #[inline(always)]
180    pub fn direction_of(fft: &Butterfly3<T>) -> Self {
181        Self {
182            twiddle: fft.twiddle.conj(),
183            direction: fft.direction.opposite_direction(),
184        }
185    }
186    #[inline(always)]
187    unsafe fn perform_fft_strided(
188        &self,
189        val0: &mut Complex<T>,
190        val1: &mut Complex<T>,
191        val2: &mut Complex<T>,
192    ) {
193        let xp = *val1 + *val2;
194        let xn = *val1 - *val2;
195        let sum = *val0 + xp;
196
197        let temp_a = *val0
198            + Complex {
199                re: self.twiddle.re * xp.re,
200                im: self.twiddle.re * xp.im,
201            };
202        let temp_b = Complex {
203            re: -self.twiddle.im * xn.im,
204            im: self.twiddle.im * xn.re,
205        };
206
207        *val0 = sum;
208        *val1 = temp_a + temp_b;
209        *val2 = temp_a - temp_b;
210    }
211
212    #[inline(always)]
213    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
214        let xp = buffer.load(1) + buffer.load(2);
215        let xn = buffer.load(1) - buffer.load(2);
216        let sum = buffer.load(0) + xp;
217
218        let temp_a = buffer.load(0)
219            + Complex {
220                re: self.twiddle.re * xp.re,
221                im: self.twiddle.re * xp.im,
222            };
223        let temp_b = Complex {
224            re: -self.twiddle.im * xn.im,
225            im: self.twiddle.im * xn.re,
226        };
227
228        buffer.store(sum, 0);
229        buffer.store(temp_a + temp_b, 1);
230        buffer.store(temp_a - temp_b, 2);
231    }
232}
233
234pub struct Butterfly4<T> {
235    direction: FftDirection,
236    _phantom: std::marker::PhantomData<T>,
237}
238boilerplate_fft_butterfly!(Butterfly4, 4, |this: &Butterfly4<_>| this.direction);
239impl<T: FftNum> Butterfly4<T> {
240    #[inline(always)]
241    pub fn new(direction: FftDirection) -> Self {
242        Self {
243            direction,
244            _phantom: std::marker::PhantomData,
245        }
246    }
247    #[inline(always)]
248    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
249        //we're going to hardcode a step of mixed radix
250        //aka we're going to do the six step algorithm
251
252        // step 1: transpose, which we're skipping because we're just going to perform non-contiguous FFTs
253        let mut value0 = buffer.load(0);
254        let mut value1 = buffer.load(1);
255        let mut value2 = buffer.load(2);
256        let mut value3 = buffer.load(3);
257
258        // step 2: column FFTs
259        Butterfly2::perform_fft_strided(&mut value0, &mut value2);
260        Butterfly2::perform_fft_strided(&mut value1, &mut value3);
261
262        // step 3: apply twiddle factors (only one in this case, and it's either 0 + i or 0 - i)
263        value3 = twiddles::rotate_90(value3, self.direction);
264
265        // step 4: transpose, which we're skipping because we're the previous FFTs were non-contiguous
266
267        // step 5: row FFTs
268        Butterfly2::perform_fft_strided(&mut value0, &mut value1);
269        Butterfly2::perform_fft_strided(&mut value2, &mut value3);
270
271        // step 6: transpose by swapping index 1 and 2
272        buffer.store(value0, 0);
273        buffer.store(value2, 1);
274        buffer.store(value1, 2);
275        buffer.store(value3, 3);
276    }
277
278    #[inline(always)]
279    unsafe fn perform_fft_strided(
280        &self,
281        value0: &mut Complex<T>,
282        value1: &mut Complex<T>,
283        value2: &mut Complex<T>,
284        value3: &mut Complex<T>,
285    ) {
286        // step 2: column FFTs
287        Butterfly2::perform_fft_strided(value0, value2);
288        Butterfly2::perform_fft_strided(value1, value3);
289
290        // step 3: apply twiddle factors (only one in this case, and it's either 0 + i or 0 - i)
291        *value3 = twiddles::rotate_90(*value3, self.direction);
292
293        // step 4: transpose, which we're skipping because we're the previous FFTs were non-contiguous
294
295        // step 5: row FFTs
296        Butterfly2::perform_fft_strided(value0, value1);
297        Butterfly2::perform_fft_strided(value2, value3);
298
299        // step 6: transpose
300        let temp = *value1;
301        *value1 = *value2;
302        *value2 = temp;
303    }
304}
305
306pub struct Butterfly5<T> {
307    twiddle1: Complex<T>,
308    twiddle2: Complex<T>,
309    direction: FftDirection,
310}
311boilerplate_fft_butterfly!(Butterfly5, 5, |this: &Butterfly5<_>| this.direction);
312impl<T: FftNum> Butterfly5<T> {
313    pub fn new(direction: FftDirection) -> Self {
314        Self {
315            twiddle1: twiddles::compute_twiddle(1, 5, direction),
316            twiddle2: twiddles::compute_twiddle(2, 5, direction),
317            direction,
318        }
319    }
320
321    #[inline(never)] // refusing to inline this code reduces code size, and doesn't hurt performance
322    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
323        // let mut outer = Butterfly2::perform_fft_array([buffer.load(1), buffer.load(4)]);
324        // let mut inner = Butterfly2::perform_fft_array([buffer.load(2), buffer.load(3)]);
325        // let input0 = buffer.load(0);
326
327        // buffer.store(input0 + outer[0] + inner[0], 0);
328
329        // inner[1] = twiddles::rotate_90(inner[1], true);
330        // outer[1] = twiddles::rotate_90(outer[1], true);
331
332        // {
333        //     let twiddled1 = outer[0] * self.twiddles[0].re;
334        //     let twiddled2 = inner[0] * self.twiddles[1].re;
335        //     let twiddled3 = inner[1] * self.twiddles[1].im;
336        //     let twiddled4 = outer[1] * self.twiddles[0].im;
337
338        //     let sum12 = twiddled1 + twiddled2;
339        //     let sum34 = twiddled4 + twiddled3;
340
341        //     let output1 = sum12 + sum34;
342        //     let output4 = sum12 - sum34;
343
344        //     buffer.store(input0 + output1, 1);
345        //     buffer.store(input0 + output4, 4);
346        // }
347
348        // {
349        //     let twiddled1 = outer[0] * self.twiddles[1].re;
350        //     let twiddled2 = inner[0] * self.twiddles[0].re;
351        //     let twiddled3 = inner[1] * self.twiddles[0].im;
352        //     let twiddled4 = outer[1] * self.twiddles[1].im;
353        // }
354
355        // Let's do a plain 5-point Dft
356        // |X0|   | W0 W0  W0  W0  W0  |   |x0|
357        // |X1|   | W0 W1  W2  W3  W4  |   |x1|
358        // |X2| = | W0 W2  W4  W6  W8  | * |x2|
359        // |X3|   | W0 W3  W6  W9  W12 |   |x3|
360        // |X4|   | W0 W4  W8  W12 W16 |   |x4|
361        //
362        // where Wn = exp(-2*pi*n/5) for a forward transform, and exp(+2*pi*n/5) for an direction.
363        //
364        // This can be simplified a bit since exp(-2*pi*n/5) = exp(-2*pi*n/5 + m*2*pi)
365        // |X0|   | W0 W0  W0  W0  W0 |   |x0|
366        // |X1|   | W0 W1  W2  W3  W4 |   |x1|
367        // |X2| = | W0 W2  W4  W1  W3 | * |x2|
368        // |X3|   | W0 W3  W1  W4  W2 |   |x3|
369        // |X4|   | W0 W4  W3  W2  W1 |   |x4|
370        //
371        // Next we can use the symmetry that W3 = W2* and W4 = W1* (where * means complex conjugate), and W0 = 1
372        // |X0|   | 1  1   1   1   1   |   |x0|
373        // |X1|   | 1  W1  W2  W2* W1* |   |x1|
374        // |X2| = | 1  W2  W1* W1  W2* | * |x2|
375        // |X3|   | 1  W2* W1  W1* W2  |   |x3|
376        // |X4|   | 1  W1* W2* W2  W1  |   |x4|
377        //
378        // Next, we write out the whole expression with real and imaginary parts.
379        // X0 = x0 + x1 + x2 + x3 + x4
380        // X1 = x0 + (W1.re + j*W1.im)*x1 + (W2.re + j*W2.im)*x2 + (W2.re - j*W2.im)*x3 + (W1.re - j*W1.im)*x4
381        // X2 = x0 + (W2.re + j*W2.im)*x1 + (W1.re - j*W1.im)*x2 + (W1.re + j*W1.im)*x3 + (W2.re - j*W2.im)*x4
382        // X3 = x0 + (W2.re - j*W2.im)*x1 + (W1.re + j*W1.im)*x2 + (W1.re - j*W1.im)*x3 + (W2.re + j*W2.im)*x4
383        // X4 = x0 + (W1.re - j*W1.im)*x1 + (W2.re - j*W2.im)*x2 + (W2.re + j*W2.im)*x3 + (W1.re + j*W1.im)*x4
384        //
385        // Then we rearrange and sort terms.
386        // X0 = x0 + x1 + x2 + x3 + x4
387        // X1 = x0 + W1.re*(x1+x4) + W2.re*(x2+x3) + j*(W1.im*(x1-x4) + W2.im*(x2-x3))
388        // X2 = x0 + W1.re*(x2+x3) + W2.re*(x1+x4) - j*(W1.im*(x2-x3) - W2.im*(x1-x4))
389        // X3 = x0 + W1.re*(x2+x3) + W2.re*(x1+x4) + j*(W1.im*(x2-x3) - W2.im*(x1-x4))
390        // X4 = x0 + W1.re*(x1+x4) + W2.re*(x2+x3) - j*(W1.im*(x1-x4) + W2.im*(x2-x3))
391        //
392        // Now we define x14p=x1+x4 x14n=x1-x4, x23p=x2+x3, x23n=x2-x3
393        // X0 = x0 + x1 + x2 + x3 + x4
394        // X1 = x0 + W1.re*(x14p) + W2.re*(x23p) + j*(W1.im*(x14n) + W2.im*(x23n))
395        // X2 = x0 + W1.re*(x23p) + W2.re*(x14p) - j*(W1.im*(x23n) - W2.im*(x14n))
396        // X3 = x0 + W1.re*(x23p) + W2.re*(x14p) + j*(W1.im*(x23n) - W2.im*(x14n))
397        // X4 = x0 + W1.re*(x14p) + W2.re*(x23p) - j*(W1.im*(x14n) + W2.im*(x23n))
398        //
399        // The final step is to write out real and imaginary parts of x14n etc, and replacing using j*j=-1
400        // After this it's easy to remove any repeated calculation of the same values.
401
402        let x14p = buffer.load(1) + buffer.load(4);
403        let x14n = buffer.load(1) - buffer.load(4);
404        let x23p = buffer.load(2) + buffer.load(3);
405        let x23n = buffer.load(2) - buffer.load(3);
406        let sum = buffer.load(0) + x14p + x23p;
407        let b14re_a = buffer.load(0).re + self.twiddle1.re * x14p.re + self.twiddle2.re * x23p.re;
408        let b14re_b = self.twiddle1.im * x14n.im + self.twiddle2.im * x23n.im;
409        let b23re_a = buffer.load(0).re + self.twiddle2.re * x14p.re + self.twiddle1.re * x23p.re;
410        let b23re_b = self.twiddle2.im * x14n.im + -self.twiddle1.im * x23n.im;
411
412        let b14im_a = buffer.load(0).im + self.twiddle1.re * x14p.im + self.twiddle2.re * x23p.im;
413        let b14im_b = self.twiddle1.im * x14n.re + self.twiddle2.im * x23n.re;
414        let b23im_a = buffer.load(0).im + self.twiddle2.re * x14p.im + self.twiddle1.re * x23p.im;
415        let b23im_b = self.twiddle2.im * x14n.re + -self.twiddle1.im * x23n.re;
416
417        let out1re = b14re_a - b14re_b;
418        let out1im = b14im_a + b14im_b;
419        let out2re = b23re_a - b23re_b;
420        let out2im = b23im_a + b23im_b;
421        let out3re = b23re_a + b23re_b;
422        let out3im = b23im_a - b23im_b;
423        let out4re = b14re_a + b14re_b;
424        let out4im = b14im_a - b14im_b;
425        buffer.store(sum, 0);
426        buffer.store(
427            Complex {
428                re: out1re,
429                im: out1im,
430            },
431            1,
432        );
433        buffer.store(
434            Complex {
435                re: out2re,
436                im: out2im,
437            },
438            2,
439        );
440        buffer.store(
441            Complex {
442                re: out3re,
443                im: out3im,
444            },
445            3,
446        );
447        buffer.store(
448            Complex {
449                re: out4re,
450                im: out4im,
451            },
452            4,
453        );
454    }
455}
456
457pub struct Butterfly6<T> {
458    butterfly3: Butterfly3<T>,
459}
460boilerplate_fft_butterfly!(Butterfly6, 6, |this: &Butterfly6<_>| this
461    .butterfly3
462    .fft_direction());
463impl<T: FftNum> Butterfly6<T> {
464    #[inline(always)]
465    pub fn new(direction: FftDirection) -> Self {
466        Self {
467            butterfly3: Butterfly3::new(direction),
468        }
469    }
470    #[inline(always)]
471    pub fn direction_of(fft: &Butterfly6<T>) -> Self {
472        Self {
473            butterfly3: Butterfly3::direction_of(&fft.butterfly3),
474        }
475    }
476    #[inline(always)]
477    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
478        //since GCD(2,3) == 1 we're going to hardcode a step of the Good-Thomas algorithm to avoid twiddle factors
479
480        // step 1: reorder the input directly into the scratch. normally there's a whole thing to compute this ordering
481        //but thankfully we can just precompute it and hardcode it
482        let mut scratch_a = [buffer.load(0), buffer.load(2), buffer.load(4)];
483
484        let mut scratch_b = [buffer.load(3), buffer.load(5), buffer.load(1)];
485
486        // step 2: column FFTs
487        self.butterfly3.perform_fft_contiguous(&mut scratch_a);
488        self.butterfly3.perform_fft_contiguous(&mut scratch_b);
489
490        // step 3: apply twiddle factors -- SKIPPED because good-thomas doesn't have twiddle factors :)
491
492        // step 4: SKIPPED because the next FFTs will be non-contiguous
493
494        // step 5: row FFTs
495        Butterfly2::perform_fft_strided(&mut scratch_a[0], &mut scratch_b[0]);
496        Butterfly2::perform_fft_strided(&mut scratch_a[1], &mut scratch_b[1]);
497        Butterfly2::perform_fft_strided(&mut scratch_a[2], &mut scratch_b[2]);
498
499        // step 6: reorder the result back into the buffer. again we would normally have to do an expensive computation
500        // but instead we can precompute and hardcode the ordering
501        // note that we're also rolling a transpose step into this reorder
502        buffer.store(scratch_a[0], 0);
503        buffer.store(scratch_b[1], 1);
504        buffer.store(scratch_a[2], 2);
505        buffer.store(scratch_b[0], 3);
506        buffer.store(scratch_a[1], 4);
507        buffer.store(scratch_b[2], 5);
508    }
509}
510
511pub struct Butterfly7<T> {
512    twiddle1: Complex<T>,
513    twiddle2: Complex<T>,
514    twiddle3: Complex<T>,
515    direction: FftDirection,
516}
517boilerplate_fft_butterfly!(Butterfly7, 7, |this: &Butterfly7<_>| this.direction);
518impl<T: FftNum> Butterfly7<T> {
519    pub fn new(direction: FftDirection) -> Self {
520        Self {
521            twiddle1: twiddles::compute_twiddle(1, 7, direction),
522            twiddle2: twiddles::compute_twiddle(2, 7, direction),
523            twiddle3: twiddles::compute_twiddle(3, 7, direction),
524            direction,
525        }
526    }
527    #[inline(never)]
528    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
529        // let mut outer = Butterfly2::perform_fft_array([buffer.load(1), buffer.load(6)]);
530        // let mut mid   = Butterfly2::perform_fft_array([buffer.load(2), buffer.load(5)]);
531        // let mut inner = Butterfly2::perform_fft_array([buffer.load(3), buffer.load(4)]);
532        // let input0 = buffer.load(0);
533
534        // buffer.store(input0 + outer[0] + mid[0] + inner[0], 0);
535
536        // inner[1] = twiddles::rotate_90(inner[1], true);
537        // mid[1]   = twiddles::rotate_90(mid[1],   true);
538        // outer[1] = twiddles::rotate_90(outer[1], true);
539
540        // {
541        //     let twiddled1 = outer[0] * self.twiddles[0].re;
542        //     let twiddled2 =   mid[0] * self.twiddles[1].re;
543        //     let twiddled3 = inner[0] * self.twiddles[2].re;
544        //     let twiddled4 = inner[1] * self.twiddles[2].im;
545        //     let twiddled5 =   mid[1] * self.twiddles[1].im;
546        //     let twiddled6 = outer[1] * self.twiddles[0].im;
547
548        //     let sum123 = twiddled1 + twiddled2 + twiddled3;
549        //     let sum456 = twiddled4 + twiddled5 + twiddled6;
550
551        //     let output1 = sum123 + sum456;
552        //     let output6 = sum123 - sum456;
553
554        //     buffer.store(input0 + output1, 1);
555        //     buffer.store(input0 + output6, 6);
556        // }
557
558        // {
559        //     let twiddled1 = outer[0] * self.twiddles[1].re;
560        //     let twiddled2 =   mid[0] * self.twiddles[2].re;
561        //     let twiddled3 = inner[0] * self.twiddles[0].re;
562        //     let twiddled4 = inner[1] * self.twiddles[0].im;
563        //     let twiddled5 =   mid[1] * self.twiddles[2].im;
564        //     let twiddled6 = outer[1] * self.twiddles[1].im;
565
566        //     let sum123 = twiddled1 + twiddled2 + twiddled3;
567        //     let sum456 = twiddled6 - twiddled4 - twiddled5;
568
569        //     let output2 = sum123 + sum456;
570        //     let output5 = sum123 - sum456;
571
572        //     buffer.store(input0 + output2, 2);
573        //     buffer.store(input0 + output5, 5);
574        // }
575
576        // Let's do a plain 7-point Dft
577        // |X0|   | W0 W0  W0  W0  W0  W0  W0  |   |x0|
578        // |X1|   | W0 W1  W2  W3  W4  W5  W6  |   |x1|
579        // |X2|   | W0 W2  W4  W6  W8  W10 W12 |   |x2|
580        // |X3| = | W0 W3  W6  W9  W12 W15 W18 | * |x3|
581        // |X4|   | W0 W4  W8  W12 W16 W20 W24 |   |x4|
582        // |X5|   | W0 W5  W10 W15 W20 W25 W30 |   |x4|
583        // |X6|   | W0 W6  W12 W18 W24 W30 W36 |   |x4|
584        //
585        // where Wn = exp(-2*pi*n/7) for a forward transform, and exp(+2*pi*n/7) for an direction.
586        //
587        // Using the same logic as for the 5-point butterfly, this can be simplified to:
588        // |X0|   | 1  1   1   1   1   1   1   |   |x0|
589        // |X1|   | 1  W1  W2  W3  W3* W2* W1* |   |x1|
590        // |X2|   | 1  W2  W3* W1* W1  W3  W2* |   |x2|
591        // |X3| = | 1  W3  W1* W2  W2* W1  W3* | * |x3|
592        // |X4|   | 1  W3* W1  W2* W2  W1* W3  |   |x4|
593        // |X5|   | 1  W2* W3  W1  W1* W3* W2  |   |x5|
594        // |X6|   | 1  W1* W2* W3* W3  W2  W1  |   |x6|
595        //
596        // From here it's just about eliminating repeated calculations, following the same procedure as for the 5-point butterfly.
597
598        let x16p = buffer.load(1) + buffer.load(6);
599        let x16n = buffer.load(1) - buffer.load(6);
600        let x25p = buffer.load(2) + buffer.load(5);
601        let x25n = buffer.load(2) - buffer.load(5);
602        let x34p = buffer.load(3) + buffer.load(4);
603        let x34n = buffer.load(3) - buffer.load(4);
604        let sum = buffer.load(0) + x16p + x25p + x34p;
605
606        let x16re_a = buffer.load(0).re
607            + self.twiddle1.re * x16p.re
608            + self.twiddle2.re * x25p.re
609            + self.twiddle3.re * x34p.re;
610        let x16re_b =
611            self.twiddle1.im * x16n.im + self.twiddle2.im * x25n.im + self.twiddle3.im * x34n.im;
612        let x25re_a = buffer.load(0).re
613            + self.twiddle1.re * x34p.re
614            + self.twiddle2.re * x16p.re
615            + self.twiddle3.re * x25p.re;
616        let x25re_b =
617            -self.twiddle1.im * x34n.im + self.twiddle2.im * x16n.im - self.twiddle3.im * x25n.im;
618        let x34re_a = buffer.load(0).re
619            + self.twiddle1.re * x25p.re
620            + self.twiddle2.re * x34p.re
621            + self.twiddle3.re * x16p.re;
622        let x34re_b =
623            -self.twiddle1.im * x25n.im + self.twiddle2.im * x34n.im + self.twiddle3.im * x16n.im;
624        let x16im_a = buffer.load(0).im
625            + self.twiddle1.re * x16p.im
626            + self.twiddle2.re * x25p.im
627            + self.twiddle3.re * x34p.im;
628        let x16im_b =
629            self.twiddle1.im * x16n.re + self.twiddle2.im * x25n.re + self.twiddle3.im * x34n.re;
630        let x25im_a = buffer.load(0).im
631            + self.twiddle1.re * x34p.im
632            + self.twiddle2.re * x16p.im
633            + self.twiddle3.re * x25p.im;
634        let x25im_b =
635            -self.twiddle1.im * x34n.re + self.twiddle2.im * x16n.re - self.twiddle3.im * x25n.re;
636        let x34im_a = buffer.load(0).im
637            + self.twiddle1.re * x25p.im
638            + self.twiddle2.re * x34p.im
639            + self.twiddle3.re * x16p.im;
640        let x34im_b =
641            self.twiddle1.im * x25n.re - self.twiddle2.im * x34n.re - self.twiddle3.im * x16n.re;
642
643        let out1re = x16re_a - x16re_b;
644        let out1im = x16im_a + x16im_b;
645        let out2re = x25re_a - x25re_b;
646        let out2im = x25im_a + x25im_b;
647        let out3re = x34re_a - x34re_b;
648        let out3im = x34im_a - x34im_b;
649        let out4re = x34re_a + x34re_b;
650        let out4im = x34im_a + x34im_b;
651        let out5re = x25re_a + x25re_b;
652        let out5im = x25im_a - x25im_b;
653        let out6re = x16re_a + x16re_b;
654        let out6im = x16im_a - x16im_b;
655
656        buffer.store(sum, 0);
657        buffer.store(
658            Complex {
659                re: out1re,
660                im: out1im,
661            },
662            1,
663        );
664        buffer.store(
665            Complex {
666                re: out2re,
667                im: out2im,
668            },
669            2,
670        );
671        buffer.store(
672            Complex {
673                re: out3re,
674                im: out3im,
675            },
676            3,
677        );
678        buffer.store(
679            Complex {
680                re: out4re,
681                im: out4im,
682            },
683            4,
684        );
685        buffer.store(
686            Complex {
687                re: out5re,
688                im: out5im,
689            },
690            5,
691        );
692        buffer.store(
693            Complex {
694                re: out6re,
695                im: out6im,
696            },
697            6,
698        );
699    }
700}
701
702pub struct Butterfly8<T> {
703    root2: T,
704    direction: FftDirection,
705}
706boilerplate_fft_butterfly!(Butterfly8, 8, |this: &Butterfly8<_>| this.direction);
707impl<T: FftNum> Butterfly8<T> {
708    #[inline(always)]
709    pub fn new(direction: FftDirection) -> Self {
710        Self {
711            root2: T::from_f64(0.5f64.sqrt()).unwrap(),
712            direction,
713        }
714    }
715
716    #[inline(always)]
717    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
718        let butterfly4 = Butterfly4::new(self.direction);
719
720        //we're going to hardcode a step of mixed radix
721        //aka we're going to do the six step algorithm
722
723        // step 1: transpose the input into the scratch
724        let mut scratch0 = [
725            buffer.load(0),
726            buffer.load(2),
727            buffer.load(4),
728            buffer.load(6),
729        ];
730        let mut scratch1 = [
731            buffer.load(1),
732            buffer.load(3),
733            buffer.load(5),
734            buffer.load(7),
735        ];
736
737        // step 2: column FFTs
738        butterfly4.perform_fft_contiguous(&mut scratch0);
739        butterfly4.perform_fft_contiguous(&mut scratch1);
740
741        // step 3: apply twiddle factors
742        scratch1[1] = (twiddles::rotate_90(scratch1[1], self.direction) + scratch1[1]) * self.root2;
743        scratch1[2] = twiddles::rotate_90(scratch1[2], self.direction);
744        scratch1[3] = (twiddles::rotate_90(scratch1[3], self.direction) - scratch1[3]) * self.root2;
745
746        // step 4: transpose -- skipped because we're going to do the next FFTs non-contiguously
747
748        // step 5: row FFTs
749        for i in 0..4 {
750            Butterfly2::perform_fft_strided(&mut scratch0[i], &mut scratch1[i]);
751        }
752
753        // step 6: copy data to the output. we don't need to transpose, because we skipped the step 4 transpose
754        for i in 0..4 {
755            buffer.store(scratch0[i], i);
756        }
757        for i in 0..4 {
758            buffer.store(scratch1[i], i + 4);
759        }
760    }
761}
762
763pub struct Butterfly9<T> {
764    butterfly3: Butterfly3<T>,
765    twiddle1: Complex<T>,
766    twiddle2: Complex<T>,
767    twiddle4: Complex<T>,
768}
769boilerplate_fft_butterfly!(Butterfly9, 9, |this: &Butterfly9<_>| this
770    .butterfly3
771    .fft_direction());
772impl<T: FftNum> Butterfly9<T> {
773    #[inline(always)]
774    pub fn new(direction: FftDirection) -> Self {
775        Self {
776            butterfly3: Butterfly3::new(direction),
777            twiddle1: twiddles::compute_twiddle(1, 9, direction),
778            twiddle2: twiddles::compute_twiddle(2, 9, direction),
779            twiddle4: twiddles::compute_twiddle(4, 9, direction),
780        }
781    }
782    #[inline(always)]
783    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
784        // algorithm: mixed radix with width=3 and height=3
785
786        // step 1: transpose the input into the scratch
787        let mut scratch0 = [buffer.load(0), buffer.load(3), buffer.load(6)];
788        let mut scratch1 = [buffer.load(1), buffer.load(4), buffer.load(7)];
789        let mut scratch2 = [buffer.load(2), buffer.load(5), buffer.load(8)];
790
791        // step 2: column FFTs
792        self.butterfly3.perform_fft_contiguous(&mut scratch0);
793        self.butterfly3.perform_fft_contiguous(&mut scratch1);
794        self.butterfly3.perform_fft_contiguous(&mut scratch2);
795
796        // step 3: apply twiddle factors
797        scratch1[1] = scratch1[1] * self.twiddle1;
798        scratch1[2] = scratch1[2] * self.twiddle2;
799        scratch2[1] = scratch2[1] * self.twiddle2;
800        scratch2[2] = scratch2[2] * self.twiddle4;
801
802        // step 4: SKIPPED because the next FFTs will be non-contiguous
803
804        // step 5: row FFTs
805        self.butterfly3
806            .perform_fft_strided(&mut scratch0[0], &mut scratch1[0], &mut scratch2[0]);
807        self.butterfly3
808            .perform_fft_strided(&mut scratch0[1], &mut scratch1[1], &mut scratch2[1]);
809        self.butterfly3
810            .perform_fft_strided(&mut scratch0[2], &mut scratch1[2], &mut scratch2[2]);
811
812        // step 6: copy the result into the output. normally we'd need to do a transpose here, but we can skip it because we skipped the transpose in step 4
813        buffer.store(scratch0[0], 0);
814        buffer.store(scratch0[1], 1);
815        buffer.store(scratch0[2], 2);
816        buffer.store(scratch1[0], 3);
817        buffer.store(scratch1[1], 4);
818        buffer.store(scratch1[2], 5);
819        buffer.store(scratch2[0], 6);
820        buffer.store(scratch2[1], 7);
821        buffer.store(scratch2[2], 8);
822    }
823}
824
825pub struct Butterfly11<T> {
826    twiddle1: Complex<T>,
827    twiddle2: Complex<T>,
828    twiddle3: Complex<T>,
829    twiddle4: Complex<T>,
830    twiddle5: Complex<T>,
831    direction: FftDirection,
832}
833boilerplate_fft_butterfly!(Butterfly11, 11, |this: &Butterfly11<_>| this.direction);
834impl<T: FftNum> Butterfly11<T> {
835    pub fn new(direction: FftDirection) -> Self {
836        let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 11, direction);
837        let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 11, direction);
838        let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 11, direction);
839        let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 11, direction);
840        let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 11, direction);
841        Self {
842            twiddle1,
843            twiddle2,
844            twiddle3,
845            twiddle4,
846            twiddle5,
847            direction,
848        }
849    }
850
851    #[inline(never)]
852    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
853        // This function was derived in the same manner as the butterflies for length 3, 5 and 7.
854        // However, instead of doing it by hand the actual code is autogenerated
855        // with the `genbutterflies.py` script in the `tools` directory.
856
857        let x110p = buffer.load(1) + buffer.load(10);
858        let x110n = buffer.load(1) - buffer.load(10);
859        let x29p = buffer.load(2) + buffer.load(9);
860        let x29n = buffer.load(2) - buffer.load(9);
861        let x38p = buffer.load(3) + buffer.load(8);
862        let x38n = buffer.load(3) - buffer.load(8);
863        let x47p = buffer.load(4) + buffer.load(7);
864        let x47n = buffer.load(4) - buffer.load(7);
865        let x56p = buffer.load(5) + buffer.load(6);
866        let x56n = buffer.load(5) - buffer.load(6);
867        let sum = buffer.load(0) + x110p + x29p + x38p + x47p + x56p;
868        let b110re_a = buffer.load(0).re
869            + self.twiddle1.re * x110p.re
870            + self.twiddle2.re * x29p.re
871            + self.twiddle3.re * x38p.re
872            + self.twiddle4.re * x47p.re
873            + self.twiddle5.re * x56p.re;
874        let b110re_b = self.twiddle1.im * x110n.im
875            + self.twiddle2.im * x29n.im
876            + self.twiddle3.im * x38n.im
877            + self.twiddle4.im * x47n.im
878            + self.twiddle5.im * x56n.im;
879        let b29re_a = buffer.load(0).re
880            + self.twiddle2.re * x110p.re
881            + self.twiddle4.re * x29p.re
882            + self.twiddle5.re * x38p.re
883            + self.twiddle3.re * x47p.re
884            + self.twiddle1.re * x56p.re;
885        let b29re_b = self.twiddle2.im * x110n.im
886            + self.twiddle4.im * x29n.im
887            + -self.twiddle5.im * x38n.im
888            + -self.twiddle3.im * x47n.im
889            + -self.twiddle1.im * x56n.im;
890        let b38re_a = buffer.load(0).re
891            + self.twiddle3.re * x110p.re
892            + self.twiddle5.re * x29p.re
893            + self.twiddle2.re * x38p.re
894            + self.twiddle1.re * x47p.re
895            + self.twiddle4.re * x56p.re;
896        let b38re_b = self.twiddle3.im * x110n.im
897            + -self.twiddle5.im * x29n.im
898            + -self.twiddle2.im * x38n.im
899            + self.twiddle1.im * x47n.im
900            + self.twiddle4.im * x56n.im;
901        let b47re_a = buffer.load(0).re
902            + self.twiddle4.re * x110p.re
903            + self.twiddle3.re * x29p.re
904            + self.twiddle1.re * x38p.re
905            + self.twiddle5.re * x47p.re
906            + self.twiddle2.re * x56p.re;
907        let b47re_b = self.twiddle4.im * x110n.im
908            + -self.twiddle3.im * x29n.im
909            + self.twiddle1.im * x38n.im
910            + self.twiddle5.im * x47n.im
911            + -self.twiddle2.im * x56n.im;
912        let b56re_a = buffer.load(0).re
913            + self.twiddle5.re * x110p.re
914            + self.twiddle1.re * x29p.re
915            + self.twiddle4.re * x38p.re
916            + self.twiddle2.re * x47p.re
917            + self.twiddle3.re * x56p.re;
918        let b56re_b = self.twiddle5.im * x110n.im
919            + -self.twiddle1.im * x29n.im
920            + self.twiddle4.im * x38n.im
921            + -self.twiddle2.im * x47n.im
922            + self.twiddle3.im * x56n.im;
923
924        let b110im_a = buffer.load(0).im
925            + self.twiddle1.re * x110p.im
926            + self.twiddle2.re * x29p.im
927            + self.twiddle3.re * x38p.im
928            + self.twiddle4.re * x47p.im
929            + self.twiddle5.re * x56p.im;
930        let b110im_b = self.twiddle1.im * x110n.re
931            + self.twiddle2.im * x29n.re
932            + self.twiddle3.im * x38n.re
933            + self.twiddle4.im * x47n.re
934            + self.twiddle5.im * x56n.re;
935        let b29im_a = buffer.load(0).im
936            + self.twiddle2.re * x110p.im
937            + self.twiddle4.re * x29p.im
938            + self.twiddle5.re * x38p.im
939            + self.twiddle3.re * x47p.im
940            + self.twiddle1.re * x56p.im;
941        let b29im_b = self.twiddle2.im * x110n.re
942            + self.twiddle4.im * x29n.re
943            + -self.twiddle5.im * x38n.re
944            + -self.twiddle3.im * x47n.re
945            + -self.twiddle1.im * x56n.re;
946        let b38im_a = buffer.load(0).im
947            + self.twiddle3.re * x110p.im
948            + self.twiddle5.re * x29p.im
949            + self.twiddle2.re * x38p.im
950            + self.twiddle1.re * x47p.im
951            + self.twiddle4.re * x56p.im;
952        let b38im_b = self.twiddle3.im * x110n.re
953            + -self.twiddle5.im * x29n.re
954            + -self.twiddle2.im * x38n.re
955            + self.twiddle1.im * x47n.re
956            + self.twiddle4.im * x56n.re;
957        let b47im_a = buffer.load(0).im
958            + self.twiddle4.re * x110p.im
959            + self.twiddle3.re * x29p.im
960            + self.twiddle1.re * x38p.im
961            + self.twiddle5.re * x47p.im
962            + self.twiddle2.re * x56p.im;
963        let b47im_b = self.twiddle4.im * x110n.re
964            + -self.twiddle3.im * x29n.re
965            + self.twiddle1.im * x38n.re
966            + self.twiddle5.im * x47n.re
967            + -self.twiddle2.im * x56n.re;
968        let b56im_a = buffer.load(0).im
969            + self.twiddle5.re * x110p.im
970            + self.twiddle1.re * x29p.im
971            + self.twiddle4.re * x38p.im
972            + self.twiddle2.re * x47p.im
973            + self.twiddle3.re * x56p.im;
974        let b56im_b = self.twiddle5.im * x110n.re
975            + -self.twiddle1.im * x29n.re
976            + self.twiddle4.im * x38n.re
977            + -self.twiddle2.im * x47n.re
978            + self.twiddle3.im * x56n.re;
979
980        let out1re = b110re_a - b110re_b;
981        let out1im = b110im_a + b110im_b;
982        let out2re = b29re_a - b29re_b;
983        let out2im = b29im_a + b29im_b;
984        let out3re = b38re_a - b38re_b;
985        let out3im = b38im_a + b38im_b;
986        let out4re = b47re_a - b47re_b;
987        let out4im = b47im_a + b47im_b;
988        let out5re = b56re_a - b56re_b;
989        let out5im = b56im_a + b56im_b;
990        let out6re = b56re_a + b56re_b;
991        let out6im = b56im_a - b56im_b;
992        let out7re = b47re_a + b47re_b;
993        let out7im = b47im_a - b47im_b;
994        let out8re = b38re_a + b38re_b;
995        let out8im = b38im_a - b38im_b;
996        let out9re = b29re_a + b29re_b;
997        let out9im = b29im_a - b29im_b;
998        let out10re = b110re_a + b110re_b;
999        let out10im = b110im_a - b110im_b;
1000        buffer.store(sum, 0);
1001        buffer.store(
1002            Complex {
1003                re: out1re,
1004                im: out1im,
1005            },
1006            1,
1007        );
1008        buffer.store(
1009            Complex {
1010                re: out2re,
1011                im: out2im,
1012            },
1013            2,
1014        );
1015        buffer.store(
1016            Complex {
1017                re: out3re,
1018                im: out3im,
1019            },
1020            3,
1021        );
1022        buffer.store(
1023            Complex {
1024                re: out4re,
1025                im: out4im,
1026            },
1027            4,
1028        );
1029        buffer.store(
1030            Complex {
1031                re: out5re,
1032                im: out5im,
1033            },
1034            5,
1035        );
1036        buffer.store(
1037            Complex {
1038                re: out6re,
1039                im: out6im,
1040            },
1041            6,
1042        );
1043        buffer.store(
1044            Complex {
1045                re: out7re,
1046                im: out7im,
1047            },
1048            7,
1049        );
1050        buffer.store(
1051            Complex {
1052                re: out8re,
1053                im: out8im,
1054            },
1055            8,
1056        );
1057        buffer.store(
1058            Complex {
1059                re: out9re,
1060                im: out9im,
1061            },
1062            9,
1063        );
1064        buffer.store(
1065            Complex {
1066                re: out10re,
1067                im: out10im,
1068            },
1069            10,
1070        );
1071    }
1072}
1073
1074pub struct Butterfly12<T> {
1075    butterfly3: Butterfly3<T>,
1076    butterfly4: Butterfly4<T>,
1077}
1078boilerplate_fft_butterfly!(Butterfly12, 12, |this: &Butterfly12<_>| this
1079    .butterfly3
1080    .fft_direction());
1081impl<T: FftNum> Butterfly12<T> {
1082    #[inline(always)]
1083    pub fn new(direction: FftDirection) -> Self {
1084        Self {
1085            butterfly3: Butterfly3::new(direction),
1086            butterfly4: Butterfly4::new(direction),
1087        }
1088    }
1089    #[inline(always)]
1090    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1091        //since GCD(4,3) == 1 we're going to hardcode a step of the Good-Thomas algorithm to avoid twiddle factors
1092
1093        // step 1: reorder the input directly into the scratch. normally there's a whole thing to compute this ordering
1094        //but thankfully we can just precompute it and hardcode it
1095        let mut scratch0 = [
1096            buffer.load(0),
1097            buffer.load(3),
1098            buffer.load(6),
1099            buffer.load(9),
1100        ];
1101        let mut scratch1 = [
1102            buffer.load(4),
1103            buffer.load(7),
1104            buffer.load(10),
1105            buffer.load(1),
1106        ];
1107        let mut scratch2 = [
1108            buffer.load(8),
1109            buffer.load(11),
1110            buffer.load(2),
1111            buffer.load(5),
1112        ];
1113
1114        // step 2: column FFTs
1115        self.butterfly4.perform_fft_contiguous(&mut scratch0);
1116        self.butterfly4.perform_fft_contiguous(&mut scratch1);
1117        self.butterfly4.perform_fft_contiguous(&mut scratch2);
1118
1119        // step 3: apply twiddle factors -- SKIPPED because good-thomas doesn't have twiddle factors :)
1120
1121        // step 4: SKIPPED because the next FFTs will be non-contiguous
1122
1123        // step 5: row FFTs
1124        self.butterfly3
1125            .perform_fft_strided(&mut scratch0[0], &mut scratch1[0], &mut scratch2[0]);
1126        self.butterfly3
1127            .perform_fft_strided(&mut scratch0[1], &mut scratch1[1], &mut scratch2[1]);
1128        self.butterfly3
1129            .perform_fft_strided(&mut scratch0[2], &mut scratch1[2], &mut scratch2[2]);
1130        self.butterfly3
1131            .perform_fft_strided(&mut scratch0[3], &mut scratch1[3], &mut scratch2[3]);
1132
1133        // step 6: reorder the result back into the buffer. again we would normally have to do an expensive computation
1134        // but instead we can precompute and hardcode the ordering
1135        // note that we're also rolling a transpose step into this reorder
1136        buffer.store(scratch0[0], 0);
1137        buffer.store(scratch1[1], 1);
1138        buffer.store(scratch2[2], 2);
1139        buffer.store(scratch0[3], 3);
1140        buffer.store(scratch1[0], 4);
1141        buffer.store(scratch2[1], 5);
1142        buffer.store(scratch0[2], 6);
1143        buffer.store(scratch1[3], 7);
1144        buffer.store(scratch2[0], 8);
1145        buffer.store(scratch0[1], 9);
1146        buffer.store(scratch1[2], 10);
1147        buffer.store(scratch2[3], 11);
1148    }
1149}
1150
1151pub struct Butterfly13<T> {
1152    twiddle1: Complex<T>,
1153    twiddle2: Complex<T>,
1154    twiddle3: Complex<T>,
1155    twiddle4: Complex<T>,
1156    twiddle5: Complex<T>,
1157    twiddle6: Complex<T>,
1158    direction: FftDirection,
1159}
1160boilerplate_fft_butterfly!(Butterfly13, 13, |this: &Butterfly13<_>| this.direction);
1161impl<T: FftNum> Butterfly13<T> {
1162    pub fn new(direction: FftDirection) -> Self {
1163        let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 13, direction);
1164        let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 13, direction);
1165        let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 13, direction);
1166        let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 13, direction);
1167        let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 13, direction);
1168        let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 13, direction);
1169        Self {
1170            twiddle1,
1171            twiddle2,
1172            twiddle3,
1173            twiddle4,
1174            twiddle5,
1175            twiddle6,
1176            direction,
1177        }
1178    }
1179
1180    #[inline(never)]
1181    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1182        // This function was derived in the same manner as the butterflies for length 3, 5 and 7.
1183        // However, instead of doing it by hand the actual code is autogenerated
1184        // with the `genbutterflies.py` script in the `tools` directory.
1185        let x112p = buffer.load(1) + buffer.load(12);
1186        let x112n = buffer.load(1) - buffer.load(12);
1187        let x211p = buffer.load(2) + buffer.load(11);
1188        let x211n = buffer.load(2) - buffer.load(11);
1189        let x310p = buffer.load(3) + buffer.load(10);
1190        let x310n = buffer.load(3) - buffer.load(10);
1191        let x49p = buffer.load(4) + buffer.load(9);
1192        let x49n = buffer.load(4) - buffer.load(9);
1193        let x58p = buffer.load(5) + buffer.load(8);
1194        let x58n = buffer.load(5) - buffer.load(8);
1195        let x67p = buffer.load(6) + buffer.load(7);
1196        let x67n = buffer.load(6) - buffer.load(7);
1197        let sum = buffer.load(0) + x112p + x211p + x310p + x49p + x58p + x67p;
1198        let b112re_a = buffer.load(0).re
1199            + self.twiddle1.re * x112p.re
1200            + self.twiddle2.re * x211p.re
1201            + self.twiddle3.re * x310p.re
1202            + self.twiddle4.re * x49p.re
1203            + self.twiddle5.re * x58p.re
1204            + self.twiddle6.re * x67p.re;
1205        let b112re_b = self.twiddle1.im * x112n.im
1206            + self.twiddle2.im * x211n.im
1207            + self.twiddle3.im * x310n.im
1208            + self.twiddle4.im * x49n.im
1209            + self.twiddle5.im * x58n.im
1210            + self.twiddle6.im * x67n.im;
1211        let b211re_a = buffer.load(0).re
1212            + self.twiddle2.re * x112p.re
1213            + self.twiddle4.re * x211p.re
1214            + self.twiddle6.re * x310p.re
1215            + self.twiddle5.re * x49p.re
1216            + self.twiddle3.re * x58p.re
1217            + self.twiddle1.re * x67p.re;
1218        let b211re_b = self.twiddle2.im * x112n.im
1219            + self.twiddle4.im * x211n.im
1220            + self.twiddle6.im * x310n.im
1221            + -self.twiddle5.im * x49n.im
1222            + -self.twiddle3.im * x58n.im
1223            + -self.twiddle1.im * x67n.im;
1224        let b310re_a = buffer.load(0).re
1225            + self.twiddle3.re * x112p.re
1226            + self.twiddle6.re * x211p.re
1227            + self.twiddle4.re * x310p.re
1228            + self.twiddle1.re * x49p.re
1229            + self.twiddle2.re * x58p.re
1230            + self.twiddle5.re * x67p.re;
1231        let b310re_b = self.twiddle3.im * x112n.im
1232            + self.twiddle6.im * x211n.im
1233            + -self.twiddle4.im * x310n.im
1234            + -self.twiddle1.im * x49n.im
1235            + self.twiddle2.im * x58n.im
1236            + self.twiddle5.im * x67n.im;
1237        let b49re_a = buffer.load(0).re
1238            + self.twiddle4.re * x112p.re
1239            + self.twiddle5.re * x211p.re
1240            + self.twiddle1.re * x310p.re
1241            + self.twiddle3.re * x49p.re
1242            + self.twiddle6.re * x58p.re
1243            + self.twiddle2.re * x67p.re;
1244        let b49re_b = self.twiddle4.im * x112n.im
1245            + -self.twiddle5.im * x211n.im
1246            + -self.twiddle1.im * x310n.im
1247            + self.twiddle3.im * x49n.im
1248            + -self.twiddle6.im * x58n.im
1249            + -self.twiddle2.im * x67n.im;
1250        let b58re_a = buffer.load(0).re
1251            + self.twiddle5.re * x112p.re
1252            + self.twiddle3.re * x211p.re
1253            + self.twiddle2.re * x310p.re
1254            + self.twiddle6.re * x49p.re
1255            + self.twiddle1.re * x58p.re
1256            + self.twiddle4.re * x67p.re;
1257        let b58re_b = self.twiddle5.im * x112n.im
1258            + -self.twiddle3.im * x211n.im
1259            + self.twiddle2.im * x310n.im
1260            + -self.twiddle6.im * x49n.im
1261            + -self.twiddle1.im * x58n.im
1262            + self.twiddle4.im * x67n.im;
1263        let b67re_a = buffer.load(0).re
1264            + self.twiddle6.re * x112p.re
1265            + self.twiddle1.re * x211p.re
1266            + self.twiddle5.re * x310p.re
1267            + self.twiddle2.re * x49p.re
1268            + self.twiddle4.re * x58p.re
1269            + self.twiddle3.re * x67p.re;
1270        let b67re_b = self.twiddle6.im * x112n.im
1271            + -self.twiddle1.im * x211n.im
1272            + self.twiddle5.im * x310n.im
1273            + -self.twiddle2.im * x49n.im
1274            + self.twiddle4.im * x58n.im
1275            + -self.twiddle3.im * x67n.im;
1276
1277        let b112im_a = buffer.load(0).im
1278            + self.twiddle1.re * x112p.im
1279            + self.twiddle2.re * x211p.im
1280            + self.twiddle3.re * x310p.im
1281            + self.twiddle4.re * x49p.im
1282            + self.twiddle5.re * x58p.im
1283            + self.twiddle6.re * x67p.im;
1284        let b112im_b = self.twiddle1.im * x112n.re
1285            + self.twiddle2.im * x211n.re
1286            + self.twiddle3.im * x310n.re
1287            + self.twiddle4.im * x49n.re
1288            + self.twiddle5.im * x58n.re
1289            + self.twiddle6.im * x67n.re;
1290        let b211im_a = buffer.load(0).im
1291            + self.twiddle2.re * x112p.im
1292            + self.twiddle4.re * x211p.im
1293            + self.twiddle6.re * x310p.im
1294            + self.twiddle5.re * x49p.im
1295            + self.twiddle3.re * x58p.im
1296            + self.twiddle1.re * x67p.im;
1297        let b211im_b = self.twiddle2.im * x112n.re
1298            + self.twiddle4.im * x211n.re
1299            + self.twiddle6.im * x310n.re
1300            + -self.twiddle5.im * x49n.re
1301            + -self.twiddle3.im * x58n.re
1302            + -self.twiddle1.im * x67n.re;
1303        let b310im_a = buffer.load(0).im
1304            + self.twiddle3.re * x112p.im
1305            + self.twiddle6.re * x211p.im
1306            + self.twiddle4.re * x310p.im
1307            + self.twiddle1.re * x49p.im
1308            + self.twiddle2.re * x58p.im
1309            + self.twiddle5.re * x67p.im;
1310        let b310im_b = self.twiddle3.im * x112n.re
1311            + self.twiddle6.im * x211n.re
1312            + -self.twiddle4.im * x310n.re
1313            + -self.twiddle1.im * x49n.re
1314            + self.twiddle2.im * x58n.re
1315            + self.twiddle5.im * x67n.re;
1316        let b49im_a = buffer.load(0).im
1317            + self.twiddle4.re * x112p.im
1318            + self.twiddle5.re * x211p.im
1319            + self.twiddle1.re * x310p.im
1320            + self.twiddle3.re * x49p.im
1321            + self.twiddle6.re * x58p.im
1322            + self.twiddle2.re * x67p.im;
1323        let b49im_b = self.twiddle4.im * x112n.re
1324            + -self.twiddle5.im * x211n.re
1325            + -self.twiddle1.im * x310n.re
1326            + self.twiddle3.im * x49n.re
1327            + -self.twiddle6.im * x58n.re
1328            + -self.twiddle2.im * x67n.re;
1329        let b58im_a = buffer.load(0).im
1330            + self.twiddle5.re * x112p.im
1331            + self.twiddle3.re * x211p.im
1332            + self.twiddle2.re * x310p.im
1333            + self.twiddle6.re * x49p.im
1334            + self.twiddle1.re * x58p.im
1335            + self.twiddle4.re * x67p.im;
1336        let b58im_b = self.twiddle5.im * x112n.re
1337            + -self.twiddle3.im * x211n.re
1338            + self.twiddle2.im * x310n.re
1339            + -self.twiddle6.im * x49n.re
1340            + -self.twiddle1.im * x58n.re
1341            + self.twiddle4.im * x67n.re;
1342        let b67im_a = buffer.load(0).im
1343            + self.twiddle6.re * x112p.im
1344            + self.twiddle1.re * x211p.im
1345            + self.twiddle5.re * x310p.im
1346            + self.twiddle2.re * x49p.im
1347            + self.twiddle4.re * x58p.im
1348            + self.twiddle3.re * x67p.im;
1349        let b67im_b = self.twiddle6.im * x112n.re
1350            + -self.twiddle1.im * x211n.re
1351            + self.twiddle5.im * x310n.re
1352            + -self.twiddle2.im * x49n.re
1353            + self.twiddle4.im * x58n.re
1354            + -self.twiddle3.im * x67n.re;
1355
1356        let out1re = b112re_a - b112re_b;
1357        let out1im = b112im_a + b112im_b;
1358        let out2re = b211re_a - b211re_b;
1359        let out2im = b211im_a + b211im_b;
1360        let out3re = b310re_a - b310re_b;
1361        let out3im = b310im_a + b310im_b;
1362        let out4re = b49re_a - b49re_b;
1363        let out4im = b49im_a + b49im_b;
1364        let out5re = b58re_a - b58re_b;
1365        let out5im = b58im_a + b58im_b;
1366        let out6re = b67re_a - b67re_b;
1367        let out6im = b67im_a + b67im_b;
1368        let out7re = b67re_a + b67re_b;
1369        let out7im = b67im_a - b67im_b;
1370        let out8re = b58re_a + b58re_b;
1371        let out8im = b58im_a - b58im_b;
1372        let out9re = b49re_a + b49re_b;
1373        let out9im = b49im_a - b49im_b;
1374        let out10re = b310re_a + b310re_b;
1375        let out10im = b310im_a - b310im_b;
1376        let out11re = b211re_a + b211re_b;
1377        let out11im = b211im_a - b211im_b;
1378        let out12re = b112re_a + b112re_b;
1379        let out12im = b112im_a - b112im_b;
1380        buffer.store(sum, 0);
1381        buffer.store(
1382            Complex {
1383                re: out1re,
1384                im: out1im,
1385            },
1386            1,
1387        );
1388        buffer.store(
1389            Complex {
1390                re: out2re,
1391                im: out2im,
1392            },
1393            2,
1394        );
1395        buffer.store(
1396            Complex {
1397                re: out3re,
1398                im: out3im,
1399            },
1400            3,
1401        );
1402        buffer.store(
1403            Complex {
1404                re: out4re,
1405                im: out4im,
1406            },
1407            4,
1408        );
1409        buffer.store(
1410            Complex {
1411                re: out5re,
1412                im: out5im,
1413            },
1414            5,
1415        );
1416        buffer.store(
1417            Complex {
1418                re: out6re,
1419                im: out6im,
1420            },
1421            6,
1422        );
1423        buffer.store(
1424            Complex {
1425                re: out7re,
1426                im: out7im,
1427            },
1428            7,
1429        );
1430        buffer.store(
1431            Complex {
1432                re: out8re,
1433                im: out8im,
1434            },
1435            8,
1436        );
1437        buffer.store(
1438            Complex {
1439                re: out9re,
1440                im: out9im,
1441            },
1442            9,
1443        );
1444        buffer.store(
1445            Complex {
1446                re: out10re,
1447                im: out10im,
1448            },
1449            10,
1450        );
1451        buffer.store(
1452            Complex {
1453                re: out11re,
1454                im: out11im,
1455            },
1456            11,
1457        );
1458        buffer.store(
1459            Complex {
1460                re: out12re,
1461                im: out12im,
1462            },
1463            12,
1464        );
1465    }
1466}
1467
1468pub struct Butterfly16<T> {
1469    butterfly8: Butterfly8<T>,
1470    twiddle1: Complex<T>,
1471    twiddle2: Complex<T>,
1472    twiddle3: Complex<T>,
1473}
1474boilerplate_fft_butterfly!(Butterfly16, 16, |this: &Butterfly16<_>| this
1475    .butterfly8
1476    .fft_direction());
1477impl<T: FftNum> Butterfly16<T> {
1478    #[inline(always)]
1479    pub fn new(direction: FftDirection) -> Self {
1480        Self {
1481            butterfly8: Butterfly8::new(direction),
1482            twiddle1: twiddles::compute_twiddle(1, 16, direction),
1483            twiddle2: twiddles::compute_twiddle(2, 16, direction),
1484            twiddle3: twiddles::compute_twiddle(3, 16, direction),
1485        }
1486    }
1487
1488    #[inline(never)]
1489    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1490        let butterfly4 = Butterfly4::new(self.fft_direction());
1491
1492        // we're going to hardcode a step of split radix
1493        // step 1: copy and reorder the  input into the scratch
1494        let mut scratch_evens = [
1495            buffer.load(0),
1496            buffer.load(2),
1497            buffer.load(4),
1498            buffer.load(6),
1499            buffer.load(8),
1500            buffer.load(10),
1501            buffer.load(12),
1502            buffer.load(14),
1503        ];
1504
1505        let mut scratch_odds_n1 = [
1506            buffer.load(1),
1507            buffer.load(5),
1508            buffer.load(9),
1509            buffer.load(13),
1510        ];
1511        let mut scratch_odds_n3 = [
1512            buffer.load(15),
1513            buffer.load(3),
1514            buffer.load(7),
1515            buffer.load(11),
1516        ];
1517
1518        // step 2: column FFTs
1519        self.butterfly8.perform_fft_contiguous(&mut scratch_evens);
1520        butterfly4.perform_fft_contiguous(&mut scratch_odds_n1);
1521        butterfly4.perform_fft_contiguous(&mut scratch_odds_n3);
1522
1523        // step 3: apply twiddle factors
1524        scratch_odds_n1[1] = scratch_odds_n1[1] * self.twiddle1;
1525        scratch_odds_n3[1] = scratch_odds_n3[1] * self.twiddle1.conj();
1526
1527        scratch_odds_n1[2] = scratch_odds_n1[2] * self.twiddle2;
1528        scratch_odds_n3[2] = scratch_odds_n3[2] * self.twiddle2.conj();
1529
1530        scratch_odds_n1[3] = scratch_odds_n1[3] * self.twiddle3;
1531        scratch_odds_n3[3] = scratch_odds_n3[3] * self.twiddle3.conj();
1532
1533        // step 4: cross FFTs
1534        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[0], &mut scratch_odds_n3[0]);
1535        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[1], &mut scratch_odds_n3[1]);
1536        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[2], &mut scratch_odds_n3[2]);
1537        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[3], &mut scratch_odds_n3[3]);
1538
1539        // apply the butterfly 4 twiddle factor, which is just a rotation
1540        scratch_odds_n3[0] = twiddles::rotate_90(scratch_odds_n3[0], self.fft_direction());
1541        scratch_odds_n3[1] = twiddles::rotate_90(scratch_odds_n3[1], self.fft_direction());
1542        scratch_odds_n3[2] = twiddles::rotate_90(scratch_odds_n3[2], self.fft_direction());
1543        scratch_odds_n3[3] = twiddles::rotate_90(scratch_odds_n3[3], self.fft_direction());
1544
1545        //step 5: copy/add/subtract data back to buffer
1546        buffer.store(scratch_evens[0] + scratch_odds_n1[0], 0);
1547        buffer.store(scratch_evens[1] + scratch_odds_n1[1], 1);
1548        buffer.store(scratch_evens[2] + scratch_odds_n1[2], 2);
1549        buffer.store(scratch_evens[3] + scratch_odds_n1[3], 3);
1550        buffer.store(scratch_evens[4] + scratch_odds_n3[0], 4);
1551        buffer.store(scratch_evens[5] + scratch_odds_n3[1], 5);
1552        buffer.store(scratch_evens[6] + scratch_odds_n3[2], 6);
1553        buffer.store(scratch_evens[7] + scratch_odds_n3[3], 7);
1554        buffer.store(scratch_evens[0] - scratch_odds_n1[0], 8);
1555        buffer.store(scratch_evens[1] - scratch_odds_n1[1], 9);
1556        buffer.store(scratch_evens[2] - scratch_odds_n1[2], 10);
1557        buffer.store(scratch_evens[3] - scratch_odds_n1[3], 11);
1558        buffer.store(scratch_evens[4] - scratch_odds_n3[0], 12);
1559        buffer.store(scratch_evens[5] - scratch_odds_n3[1], 13);
1560        buffer.store(scratch_evens[6] - scratch_odds_n3[2], 14);
1561        buffer.store(scratch_evens[7] - scratch_odds_n3[3], 15);
1562    }
1563}
1564
1565pub struct Butterfly17<T> {
1566    twiddle1: Complex<T>,
1567    twiddle2: Complex<T>,
1568    twiddle3: Complex<T>,
1569    twiddle4: Complex<T>,
1570    twiddle5: Complex<T>,
1571    twiddle6: Complex<T>,
1572    twiddle7: Complex<T>,
1573    twiddle8: Complex<T>,
1574    direction: FftDirection,
1575}
1576boilerplate_fft_butterfly!(Butterfly17, 17, |this: &Butterfly17<_>| this.direction);
1577impl<T: FftNum> Butterfly17<T> {
1578    pub fn new(direction: FftDirection) -> Self {
1579        let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 17, direction);
1580        let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 17, direction);
1581        let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 17, direction);
1582        let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 17, direction);
1583        let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 17, direction);
1584        let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 17, direction);
1585        let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 17, direction);
1586        let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 17, direction);
1587        Self {
1588            twiddle1,
1589            twiddle2,
1590            twiddle3,
1591            twiddle4,
1592            twiddle5,
1593            twiddle6,
1594            twiddle7,
1595            twiddle8,
1596            direction,
1597        }
1598    }
1599
1600    #[inline(never)]
1601    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
1602        // This function was derived in the same manner as the butterflies for length 3, 5 and 7.
1603        // However, instead of doing it by hand the actual code is autogenerated
1604        // with the `genbutterflies.py` script in the `tools` directory.
1605        let x116p = buffer.load(1) + buffer.load(16);
1606        let x116n = buffer.load(1) - buffer.load(16);
1607        let x215p = buffer.load(2) + buffer.load(15);
1608        let x215n = buffer.load(2) - buffer.load(15);
1609        let x314p = buffer.load(3) + buffer.load(14);
1610        let x314n = buffer.load(3) - buffer.load(14);
1611        let x413p = buffer.load(4) + buffer.load(13);
1612        let x413n = buffer.load(4) - buffer.load(13);
1613        let x512p = buffer.load(5) + buffer.load(12);
1614        let x512n = buffer.load(5) - buffer.load(12);
1615        let x611p = buffer.load(6) + buffer.load(11);
1616        let x611n = buffer.load(6) - buffer.load(11);
1617        let x710p = buffer.load(7) + buffer.load(10);
1618        let x710n = buffer.load(7) - buffer.load(10);
1619        let x89p = buffer.load(8) + buffer.load(9);
1620        let x89n = buffer.load(8) - buffer.load(9);
1621        let sum = buffer.load(0) + x116p + x215p + x314p + x413p + x512p + x611p + x710p + x89p;
1622        let b116re_a = buffer.load(0).re
1623            + self.twiddle1.re * x116p.re
1624            + self.twiddle2.re * x215p.re
1625            + self.twiddle3.re * x314p.re
1626            + self.twiddle4.re * x413p.re
1627            + self.twiddle5.re * x512p.re
1628            + self.twiddle6.re * x611p.re
1629            + self.twiddle7.re * x710p.re
1630            + self.twiddle8.re * x89p.re;
1631        let b116re_b = self.twiddle1.im * x116n.im
1632            + self.twiddle2.im * x215n.im
1633            + self.twiddle3.im * x314n.im
1634            + self.twiddle4.im * x413n.im
1635            + self.twiddle5.im * x512n.im
1636            + self.twiddle6.im * x611n.im
1637            + self.twiddle7.im * x710n.im
1638            + self.twiddle8.im * x89n.im;
1639        let b215re_a = buffer.load(0).re
1640            + self.twiddle2.re * x116p.re
1641            + self.twiddle4.re * x215p.re
1642            + self.twiddle6.re * x314p.re
1643            + self.twiddle8.re * x413p.re
1644            + self.twiddle7.re * x512p.re
1645            + self.twiddle5.re * x611p.re
1646            + self.twiddle3.re * x710p.re
1647            + self.twiddle1.re * x89p.re;
1648        let b215re_b = self.twiddle2.im * x116n.im
1649            + self.twiddle4.im * x215n.im
1650            + self.twiddle6.im * x314n.im
1651            + self.twiddle8.im * x413n.im
1652            + -self.twiddle7.im * x512n.im
1653            + -self.twiddle5.im * x611n.im
1654            + -self.twiddle3.im * x710n.im
1655            + -self.twiddle1.im * x89n.im;
1656        let b314re_a = buffer.load(0).re
1657            + self.twiddle3.re * x116p.re
1658            + self.twiddle6.re * x215p.re
1659            + self.twiddle8.re * x314p.re
1660            + self.twiddle5.re * x413p.re
1661            + self.twiddle2.re * x512p.re
1662            + self.twiddle1.re * x611p.re
1663            + self.twiddle4.re * x710p.re
1664            + self.twiddle7.re * x89p.re;
1665        let b314re_b = self.twiddle3.im * x116n.im
1666            + self.twiddle6.im * x215n.im
1667            + -self.twiddle8.im * x314n.im
1668            + -self.twiddle5.im * x413n.im
1669            + -self.twiddle2.im * x512n.im
1670            + self.twiddle1.im * x611n.im
1671            + self.twiddle4.im * x710n.im
1672            + self.twiddle7.im * x89n.im;
1673        let b413re_a = buffer.load(0).re
1674            + self.twiddle4.re * x116p.re
1675            + self.twiddle8.re * x215p.re
1676            + self.twiddle5.re * x314p.re
1677            + self.twiddle1.re * x413p.re
1678            + self.twiddle3.re * x512p.re
1679            + self.twiddle7.re * x611p.re
1680            + self.twiddle6.re * x710p.re
1681            + self.twiddle2.re * x89p.re;
1682        let b413re_b = self.twiddle4.im * x116n.im
1683            + self.twiddle8.im * x215n.im
1684            + -self.twiddle5.im * x314n.im
1685            + -self.twiddle1.im * x413n.im
1686            + self.twiddle3.im * x512n.im
1687            + self.twiddle7.im * x611n.im
1688            + -self.twiddle6.im * x710n.im
1689            + -self.twiddle2.im * x89n.im;
1690        let b512re_a = buffer.load(0).re
1691            + self.twiddle5.re * x116p.re
1692            + self.twiddle7.re * x215p.re
1693            + self.twiddle2.re * x314p.re
1694            + self.twiddle3.re * x413p.re
1695            + self.twiddle8.re * x512p.re
1696            + self.twiddle4.re * x611p.re
1697            + self.twiddle1.re * x710p.re
1698            + self.twiddle6.re * x89p.re;
1699        let b512re_b = self.twiddle5.im * x116n.im
1700            + -self.twiddle7.im * x215n.im
1701            + -self.twiddle2.im * x314n.im
1702            + self.twiddle3.im * x413n.im
1703            + self.twiddle8.im * x512n.im
1704            + -self.twiddle4.im * x611n.im
1705            + self.twiddle1.im * x710n.im
1706            + self.twiddle6.im * x89n.im;
1707        let b611re_a = buffer.load(0).re
1708            + self.twiddle6.re * x116p.re
1709            + self.twiddle5.re * x215p.re
1710            + self.twiddle1.re * x314p.re
1711            + self.twiddle7.re * x413p.re
1712            + self.twiddle4.re * x512p.re
1713            + self.twiddle2.re * x611p.re
1714            + self.twiddle8.re * x710p.re
1715            + self.twiddle3.re * x89p.re;
1716        let b611re_b = self.twiddle6.im * x116n.im
1717            + -self.twiddle5.im * x215n.im
1718            + self.twiddle1.im * x314n.im
1719            + self.twiddle7.im * x413n.im
1720            + -self.twiddle4.im * x512n.im
1721            + self.twiddle2.im * x611n.im
1722            + self.twiddle8.im * x710n.im
1723            + -self.twiddle3.im * x89n.im;
1724        let b710re_a = buffer.load(0).re
1725            + self.twiddle7.re * x116p.re
1726            + self.twiddle3.re * x215p.re
1727            + self.twiddle4.re * x314p.re
1728            + self.twiddle6.re * x413p.re
1729            + self.twiddle1.re * x512p.re
1730            + self.twiddle8.re * x611p.re
1731            + self.twiddle2.re * x710p.re
1732            + self.twiddle5.re * x89p.re;
1733        let b710re_b = self.twiddle7.im * x116n.im
1734            + -self.twiddle3.im * x215n.im
1735            + self.twiddle4.im * x314n.im
1736            + -self.twiddle6.im * x413n.im
1737            + self.twiddle1.im * x512n.im
1738            + self.twiddle8.im * x611n.im
1739            + -self.twiddle2.im * x710n.im
1740            + self.twiddle5.im * x89n.im;
1741        let b89re_a = buffer.load(0).re
1742            + self.twiddle8.re * x116p.re
1743            + self.twiddle1.re * x215p.re
1744            + self.twiddle7.re * x314p.re
1745            + self.twiddle2.re * x413p.re
1746            + self.twiddle6.re * x512p.re
1747            + self.twiddle3.re * x611p.re
1748            + self.twiddle5.re * x710p.re
1749            + self.twiddle4.re * x89p.re;
1750        let b89re_b = self.twiddle8.im * x116n.im
1751            + -self.twiddle1.im * x215n.im
1752            + self.twiddle7.im * x314n.im
1753            + -self.twiddle2.im * x413n.im
1754            + self.twiddle6.im * x512n.im
1755            + -self.twiddle3.im * x611n.im
1756            + self.twiddle5.im * x710n.im
1757            + -self.twiddle4.im * x89n.im;
1758
1759        let b116im_a = buffer.load(0).im
1760            + self.twiddle1.re * x116p.im
1761            + self.twiddle2.re * x215p.im
1762            + self.twiddle3.re * x314p.im
1763            + self.twiddle4.re * x413p.im
1764            + self.twiddle5.re * x512p.im
1765            + self.twiddle6.re * x611p.im
1766            + self.twiddle7.re * x710p.im
1767            + self.twiddle8.re * x89p.im;
1768        let b116im_b = self.twiddle1.im * x116n.re
1769            + self.twiddle2.im * x215n.re
1770            + self.twiddle3.im * x314n.re
1771            + self.twiddle4.im * x413n.re
1772            + self.twiddle5.im * x512n.re
1773            + self.twiddle6.im * x611n.re
1774            + self.twiddle7.im * x710n.re
1775            + self.twiddle8.im * x89n.re;
1776        let b215im_a = buffer.load(0).im
1777            + self.twiddle2.re * x116p.im
1778            + self.twiddle4.re * x215p.im
1779            + self.twiddle6.re * x314p.im
1780            + self.twiddle8.re * x413p.im
1781            + self.twiddle7.re * x512p.im
1782            + self.twiddle5.re * x611p.im
1783            + self.twiddle3.re * x710p.im
1784            + self.twiddle1.re * x89p.im;
1785        let b215im_b = self.twiddle2.im * x116n.re
1786            + self.twiddle4.im * x215n.re
1787            + self.twiddle6.im * x314n.re
1788            + self.twiddle8.im * x413n.re
1789            + -self.twiddle7.im * x512n.re
1790            + -self.twiddle5.im * x611n.re
1791            + -self.twiddle3.im * x710n.re
1792            + -self.twiddle1.im * x89n.re;
1793        let b314im_a = buffer.load(0).im
1794            + self.twiddle3.re * x116p.im
1795            + self.twiddle6.re * x215p.im
1796            + self.twiddle8.re * x314p.im
1797            + self.twiddle5.re * x413p.im
1798            + self.twiddle2.re * x512p.im
1799            + self.twiddle1.re * x611p.im
1800            + self.twiddle4.re * x710p.im
1801            + self.twiddle7.re * x89p.im;
1802        let b314im_b = self.twiddle3.im * x116n.re
1803            + self.twiddle6.im * x215n.re
1804            + -self.twiddle8.im * x314n.re
1805            + -self.twiddle5.im * x413n.re
1806            + -self.twiddle2.im * x512n.re
1807            + self.twiddle1.im * x611n.re
1808            + self.twiddle4.im * x710n.re
1809            + self.twiddle7.im * x89n.re;
1810        let b413im_a = buffer.load(0).im
1811            + self.twiddle4.re * x116p.im
1812            + self.twiddle8.re * x215p.im
1813            + self.twiddle5.re * x314p.im
1814            + self.twiddle1.re * x413p.im
1815            + self.twiddle3.re * x512p.im
1816            + self.twiddle7.re * x611p.im
1817            + self.twiddle6.re * x710p.im
1818            + self.twiddle2.re * x89p.im;
1819        let b413im_b = self.twiddle4.im * x116n.re
1820            + self.twiddle8.im * x215n.re
1821            + -self.twiddle5.im * x314n.re
1822            + -self.twiddle1.im * x413n.re
1823            + self.twiddle3.im * x512n.re
1824            + self.twiddle7.im * x611n.re
1825            + -self.twiddle6.im * x710n.re
1826            + -self.twiddle2.im * x89n.re;
1827        let b512im_a = buffer.load(0).im
1828            + self.twiddle5.re * x116p.im
1829            + self.twiddle7.re * x215p.im
1830            + self.twiddle2.re * x314p.im
1831            + self.twiddle3.re * x413p.im
1832            + self.twiddle8.re * x512p.im
1833            + self.twiddle4.re * x611p.im
1834            + self.twiddle1.re * x710p.im
1835            + self.twiddle6.re * x89p.im;
1836        let b512im_b = self.twiddle5.im * x116n.re
1837            + -self.twiddle7.im * x215n.re
1838            + -self.twiddle2.im * x314n.re
1839            + self.twiddle3.im * x413n.re
1840            + self.twiddle8.im * x512n.re
1841            + -self.twiddle4.im * x611n.re
1842            + self.twiddle1.im * x710n.re
1843            + self.twiddle6.im * x89n.re;
1844        let b611im_a = buffer.load(0).im
1845            + self.twiddle6.re * x116p.im
1846            + self.twiddle5.re * x215p.im
1847            + self.twiddle1.re * x314p.im
1848            + self.twiddle7.re * x413p.im
1849            + self.twiddle4.re * x512p.im
1850            + self.twiddle2.re * x611p.im
1851            + self.twiddle8.re * x710p.im
1852            + self.twiddle3.re * x89p.im;
1853        let b611im_b = self.twiddle6.im * x116n.re
1854            + -self.twiddle5.im * x215n.re
1855            + self.twiddle1.im * x314n.re
1856            + self.twiddle7.im * x413n.re
1857            + -self.twiddle4.im * x512n.re
1858            + self.twiddle2.im * x611n.re
1859            + self.twiddle8.im * x710n.re
1860            + -self.twiddle3.im * x89n.re;
1861        let b710im_a = buffer.load(0).im
1862            + self.twiddle7.re * x116p.im
1863            + self.twiddle3.re * x215p.im
1864            + self.twiddle4.re * x314p.im
1865            + self.twiddle6.re * x413p.im
1866            + self.twiddle1.re * x512p.im
1867            + self.twiddle8.re * x611p.im
1868            + self.twiddle2.re * x710p.im
1869            + self.twiddle5.re * x89p.im;
1870        let b710im_b = self.twiddle7.im * x116n.re
1871            + -self.twiddle3.im * x215n.re
1872            + self.twiddle4.im * x314n.re
1873            + -self.twiddle6.im * x413n.re
1874            + self.twiddle1.im * x512n.re
1875            + self.twiddle8.im * x611n.re
1876            + -self.twiddle2.im * x710n.re
1877            + self.twiddle5.im * x89n.re;
1878        let b89im_a = buffer.load(0).im
1879            + self.twiddle8.re * x116p.im
1880            + self.twiddle1.re * x215p.im
1881            + self.twiddle7.re * x314p.im
1882            + self.twiddle2.re * x413p.im
1883            + self.twiddle6.re * x512p.im
1884            + self.twiddle3.re * x611p.im
1885            + self.twiddle5.re * x710p.im
1886            + self.twiddle4.re * x89p.im;
1887        let b89im_b = self.twiddle8.im * x116n.re
1888            + -self.twiddle1.im * x215n.re
1889            + self.twiddle7.im * x314n.re
1890            + -self.twiddle2.im * x413n.re
1891            + self.twiddle6.im * x512n.re
1892            + -self.twiddle3.im * x611n.re
1893            + self.twiddle5.im * x710n.re
1894            + -self.twiddle4.im * x89n.re;
1895
1896        let out1re = b116re_a - b116re_b;
1897        let out1im = b116im_a + b116im_b;
1898        let out2re = b215re_a - b215re_b;
1899        let out2im = b215im_a + b215im_b;
1900        let out3re = b314re_a - b314re_b;
1901        let out3im = b314im_a + b314im_b;
1902        let out4re = b413re_a - b413re_b;
1903        let out4im = b413im_a + b413im_b;
1904        let out5re = b512re_a - b512re_b;
1905        let out5im = b512im_a + b512im_b;
1906        let out6re = b611re_a - b611re_b;
1907        let out6im = b611im_a + b611im_b;
1908        let out7re = b710re_a - b710re_b;
1909        let out7im = b710im_a + b710im_b;
1910        let out8re = b89re_a - b89re_b;
1911        let out8im = b89im_a + b89im_b;
1912        let out9re = b89re_a + b89re_b;
1913        let out9im = b89im_a - b89im_b;
1914        let out10re = b710re_a + b710re_b;
1915        let out10im = b710im_a - b710im_b;
1916        let out11re = b611re_a + b611re_b;
1917        let out11im = b611im_a - b611im_b;
1918        let out12re = b512re_a + b512re_b;
1919        let out12im = b512im_a - b512im_b;
1920        let out13re = b413re_a + b413re_b;
1921        let out13im = b413im_a - b413im_b;
1922        let out14re = b314re_a + b314re_b;
1923        let out14im = b314im_a - b314im_b;
1924        let out15re = b215re_a + b215re_b;
1925        let out15im = b215im_a - b215im_b;
1926        let out16re = b116re_a + b116re_b;
1927        let out16im = b116im_a - b116im_b;
1928        buffer.store(sum, 0);
1929        buffer.store(
1930            Complex {
1931                re: out1re,
1932                im: out1im,
1933            },
1934            1,
1935        );
1936        buffer.store(
1937            Complex {
1938                re: out2re,
1939                im: out2im,
1940            },
1941            2,
1942        );
1943        buffer.store(
1944            Complex {
1945                re: out3re,
1946                im: out3im,
1947            },
1948            3,
1949        );
1950        buffer.store(
1951            Complex {
1952                re: out4re,
1953                im: out4im,
1954            },
1955            4,
1956        );
1957        buffer.store(
1958            Complex {
1959                re: out5re,
1960                im: out5im,
1961            },
1962            5,
1963        );
1964        buffer.store(
1965            Complex {
1966                re: out6re,
1967                im: out6im,
1968            },
1969            6,
1970        );
1971        buffer.store(
1972            Complex {
1973                re: out7re,
1974                im: out7im,
1975            },
1976            7,
1977        );
1978        buffer.store(
1979            Complex {
1980                re: out8re,
1981                im: out8im,
1982            },
1983            8,
1984        );
1985        buffer.store(
1986            Complex {
1987                re: out9re,
1988                im: out9im,
1989            },
1990            9,
1991        );
1992        buffer.store(
1993            Complex {
1994                re: out10re,
1995                im: out10im,
1996            },
1997            10,
1998        );
1999        buffer.store(
2000            Complex {
2001                re: out11re,
2002                im: out11im,
2003            },
2004            11,
2005        );
2006        buffer.store(
2007            Complex {
2008                re: out12re,
2009                im: out12im,
2010            },
2011            12,
2012        );
2013        buffer.store(
2014            Complex {
2015                re: out13re,
2016                im: out13im,
2017            },
2018            13,
2019        );
2020        buffer.store(
2021            Complex {
2022                re: out14re,
2023                im: out14im,
2024            },
2025            14,
2026        );
2027        buffer.store(
2028            Complex {
2029                re: out15re,
2030                im: out15im,
2031            },
2032            15,
2033        );
2034        buffer.store(
2035            Complex {
2036                re: out16re,
2037                im: out16im,
2038            },
2039            16,
2040        );
2041    }
2042}
2043
2044pub struct Butterfly19<T> {
2045    twiddle1: Complex<T>,
2046    twiddle2: Complex<T>,
2047    twiddle3: Complex<T>,
2048    twiddle4: Complex<T>,
2049    twiddle5: Complex<T>,
2050    twiddle6: Complex<T>,
2051    twiddle7: Complex<T>,
2052    twiddle8: Complex<T>,
2053    twiddle9: Complex<T>,
2054    direction: FftDirection,
2055}
2056boilerplate_fft_butterfly!(Butterfly19, 19, |this: &Butterfly19<_>| this.direction);
2057impl<T: FftNum> Butterfly19<T> {
2058    pub fn new(direction: FftDirection) -> Self {
2059        let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 19, direction);
2060        let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 19, direction);
2061        let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 19, direction);
2062        let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 19, direction);
2063        let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 19, direction);
2064        let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 19, direction);
2065        let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 19, direction);
2066        let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 19, direction);
2067        let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 19, direction);
2068        Self {
2069            twiddle1,
2070            twiddle2,
2071            twiddle3,
2072            twiddle4,
2073            twiddle5,
2074            twiddle6,
2075            twiddle7,
2076            twiddle8,
2077            twiddle9,
2078            direction,
2079        }
2080    }
2081
2082    #[inline(never)]
2083    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
2084        // This function was derived in the same manner as the butterflies for length 3, 5 and 7.
2085        // However, instead of doing it by hand the actual code is autogenerated
2086        // with the `genbutterflies.py` script in the `tools` directory.
2087        let x118p = buffer.load(1) + buffer.load(18);
2088        let x118n = buffer.load(1) - buffer.load(18);
2089        let x217p = buffer.load(2) + buffer.load(17);
2090        let x217n = buffer.load(2) - buffer.load(17);
2091        let x316p = buffer.load(3) + buffer.load(16);
2092        let x316n = buffer.load(3) - buffer.load(16);
2093        let x415p = buffer.load(4) + buffer.load(15);
2094        let x415n = buffer.load(4) - buffer.load(15);
2095        let x514p = buffer.load(5) + buffer.load(14);
2096        let x514n = buffer.load(5) - buffer.load(14);
2097        let x613p = buffer.load(6) + buffer.load(13);
2098        let x613n = buffer.load(6) - buffer.load(13);
2099        let x712p = buffer.load(7) + buffer.load(12);
2100        let x712n = buffer.load(7) - buffer.load(12);
2101        let x811p = buffer.load(8) + buffer.load(11);
2102        let x811n = buffer.load(8) - buffer.load(11);
2103        let x910p = buffer.load(9) + buffer.load(10);
2104        let x910n = buffer.load(9) - buffer.load(10);
2105        let sum =
2106            buffer.load(0) + x118p + x217p + x316p + x415p + x514p + x613p + x712p + x811p + x910p;
2107        let b118re_a = buffer.load(0).re
2108            + self.twiddle1.re * x118p.re
2109            + self.twiddle2.re * x217p.re
2110            + self.twiddle3.re * x316p.re
2111            + self.twiddle4.re * x415p.re
2112            + self.twiddle5.re * x514p.re
2113            + self.twiddle6.re * x613p.re
2114            + self.twiddle7.re * x712p.re
2115            + self.twiddle8.re * x811p.re
2116            + self.twiddle9.re * x910p.re;
2117        let b118re_b = self.twiddle1.im * x118n.im
2118            + self.twiddle2.im * x217n.im
2119            + self.twiddle3.im * x316n.im
2120            + self.twiddle4.im * x415n.im
2121            + self.twiddle5.im * x514n.im
2122            + self.twiddle6.im * x613n.im
2123            + self.twiddle7.im * x712n.im
2124            + self.twiddle8.im * x811n.im
2125            + self.twiddle9.im * x910n.im;
2126        let b217re_a = buffer.load(0).re
2127            + self.twiddle2.re * x118p.re
2128            + self.twiddle4.re * x217p.re
2129            + self.twiddle6.re * x316p.re
2130            + self.twiddle8.re * x415p.re
2131            + self.twiddle9.re * x514p.re
2132            + self.twiddle7.re * x613p.re
2133            + self.twiddle5.re * x712p.re
2134            + self.twiddle3.re * x811p.re
2135            + self.twiddle1.re * x910p.re;
2136        let b217re_b = self.twiddle2.im * x118n.im
2137            + self.twiddle4.im * x217n.im
2138            + self.twiddle6.im * x316n.im
2139            + self.twiddle8.im * x415n.im
2140            + -self.twiddle9.im * x514n.im
2141            + -self.twiddle7.im * x613n.im
2142            + -self.twiddle5.im * x712n.im
2143            + -self.twiddle3.im * x811n.im
2144            + -self.twiddle1.im * x910n.im;
2145        let b316re_a = buffer.load(0).re
2146            + self.twiddle3.re * x118p.re
2147            + self.twiddle6.re * x217p.re
2148            + self.twiddle9.re * x316p.re
2149            + self.twiddle7.re * x415p.re
2150            + self.twiddle4.re * x514p.re
2151            + self.twiddle1.re * x613p.re
2152            + self.twiddle2.re * x712p.re
2153            + self.twiddle5.re * x811p.re
2154            + self.twiddle8.re * x910p.re;
2155        let b316re_b = self.twiddle3.im * x118n.im
2156            + self.twiddle6.im * x217n.im
2157            + self.twiddle9.im * x316n.im
2158            + -self.twiddle7.im * x415n.im
2159            + -self.twiddle4.im * x514n.im
2160            + -self.twiddle1.im * x613n.im
2161            + self.twiddle2.im * x712n.im
2162            + self.twiddle5.im * x811n.im
2163            + self.twiddle8.im * x910n.im;
2164        let b415re_a = buffer.load(0).re
2165            + self.twiddle4.re * x118p.re
2166            + self.twiddle8.re * x217p.re
2167            + self.twiddle7.re * x316p.re
2168            + self.twiddle3.re * x415p.re
2169            + self.twiddle1.re * x514p.re
2170            + self.twiddle5.re * x613p.re
2171            + self.twiddle9.re * x712p.re
2172            + self.twiddle6.re * x811p.re
2173            + self.twiddle2.re * x910p.re;
2174        let b415re_b = self.twiddle4.im * x118n.im
2175            + self.twiddle8.im * x217n.im
2176            + -self.twiddle7.im * x316n.im
2177            + -self.twiddle3.im * x415n.im
2178            + self.twiddle1.im * x514n.im
2179            + self.twiddle5.im * x613n.im
2180            + self.twiddle9.im * x712n.im
2181            + -self.twiddle6.im * x811n.im
2182            + -self.twiddle2.im * x910n.im;
2183        let b514re_a = buffer.load(0).re
2184            + self.twiddle5.re * x118p.re
2185            + self.twiddle9.re * x217p.re
2186            + self.twiddle4.re * x316p.re
2187            + self.twiddle1.re * x415p.re
2188            + self.twiddle6.re * x514p.re
2189            + self.twiddle8.re * x613p.re
2190            + self.twiddle3.re * x712p.re
2191            + self.twiddle2.re * x811p.re
2192            + self.twiddle7.re * x910p.re;
2193        let b514re_b = self.twiddle5.im * x118n.im
2194            + -self.twiddle9.im * x217n.im
2195            + -self.twiddle4.im * x316n.im
2196            + self.twiddle1.im * x415n.im
2197            + self.twiddle6.im * x514n.im
2198            + -self.twiddle8.im * x613n.im
2199            + -self.twiddle3.im * x712n.im
2200            + self.twiddle2.im * x811n.im
2201            + self.twiddle7.im * x910n.im;
2202        let b613re_a = buffer.load(0).re
2203            + self.twiddle6.re * x118p.re
2204            + self.twiddle7.re * x217p.re
2205            + self.twiddle1.re * x316p.re
2206            + self.twiddle5.re * x415p.re
2207            + self.twiddle8.re * x514p.re
2208            + self.twiddle2.re * x613p.re
2209            + self.twiddle4.re * x712p.re
2210            + self.twiddle9.re * x811p.re
2211            + self.twiddle3.re * x910p.re;
2212        let b613re_b = self.twiddle6.im * x118n.im
2213            + -self.twiddle7.im * x217n.im
2214            + -self.twiddle1.im * x316n.im
2215            + self.twiddle5.im * x415n.im
2216            + -self.twiddle8.im * x514n.im
2217            + -self.twiddle2.im * x613n.im
2218            + self.twiddle4.im * x712n.im
2219            + -self.twiddle9.im * x811n.im
2220            + -self.twiddle3.im * x910n.im;
2221        let b712re_a = buffer.load(0).re
2222            + self.twiddle7.re * x118p.re
2223            + self.twiddle5.re * x217p.re
2224            + self.twiddle2.re * x316p.re
2225            + self.twiddle9.re * x415p.re
2226            + self.twiddle3.re * x514p.re
2227            + self.twiddle4.re * x613p.re
2228            + self.twiddle8.re * x712p.re
2229            + self.twiddle1.re * x811p.re
2230            + self.twiddle6.re * x910p.re;
2231        let b712re_b = self.twiddle7.im * x118n.im
2232            + -self.twiddle5.im * x217n.im
2233            + self.twiddle2.im * x316n.im
2234            + self.twiddle9.im * x415n.im
2235            + -self.twiddle3.im * x514n.im
2236            + self.twiddle4.im * x613n.im
2237            + -self.twiddle8.im * x712n.im
2238            + -self.twiddle1.im * x811n.im
2239            + self.twiddle6.im * x910n.im;
2240        let b811re_a = buffer.load(0).re
2241            + self.twiddle8.re * x118p.re
2242            + self.twiddle3.re * x217p.re
2243            + self.twiddle5.re * x316p.re
2244            + self.twiddle6.re * x415p.re
2245            + self.twiddle2.re * x514p.re
2246            + self.twiddle9.re * x613p.re
2247            + self.twiddle1.re * x712p.re
2248            + self.twiddle7.re * x811p.re
2249            + self.twiddle4.re * x910p.re;
2250        let b811re_b = self.twiddle8.im * x118n.im
2251            + -self.twiddle3.im * x217n.im
2252            + self.twiddle5.im * x316n.im
2253            + -self.twiddle6.im * x415n.im
2254            + self.twiddle2.im * x514n.im
2255            + -self.twiddle9.im * x613n.im
2256            + -self.twiddle1.im * x712n.im
2257            + self.twiddle7.im * x811n.im
2258            + -self.twiddle4.im * x910n.im;
2259        let b910re_a = buffer.load(0).re
2260            + self.twiddle9.re * x118p.re
2261            + self.twiddle1.re * x217p.re
2262            + self.twiddle8.re * x316p.re
2263            + self.twiddle2.re * x415p.re
2264            + self.twiddle7.re * x514p.re
2265            + self.twiddle3.re * x613p.re
2266            + self.twiddle6.re * x712p.re
2267            + self.twiddle4.re * x811p.re
2268            + self.twiddle5.re * x910p.re;
2269        let b910re_b = self.twiddle9.im * x118n.im
2270            + -self.twiddle1.im * x217n.im
2271            + self.twiddle8.im * x316n.im
2272            + -self.twiddle2.im * x415n.im
2273            + self.twiddle7.im * x514n.im
2274            + -self.twiddle3.im * x613n.im
2275            + self.twiddle6.im * x712n.im
2276            + -self.twiddle4.im * x811n.im
2277            + self.twiddle5.im * x910n.im;
2278
2279        let b118im_a = buffer.load(0).im
2280            + self.twiddle1.re * x118p.im
2281            + self.twiddle2.re * x217p.im
2282            + self.twiddle3.re * x316p.im
2283            + self.twiddle4.re * x415p.im
2284            + self.twiddle5.re * x514p.im
2285            + self.twiddle6.re * x613p.im
2286            + self.twiddle7.re * x712p.im
2287            + self.twiddle8.re * x811p.im
2288            + self.twiddle9.re * x910p.im;
2289        let b118im_b = self.twiddle1.im * x118n.re
2290            + self.twiddle2.im * x217n.re
2291            + self.twiddle3.im * x316n.re
2292            + self.twiddle4.im * x415n.re
2293            + self.twiddle5.im * x514n.re
2294            + self.twiddle6.im * x613n.re
2295            + self.twiddle7.im * x712n.re
2296            + self.twiddle8.im * x811n.re
2297            + self.twiddle9.im * x910n.re;
2298        let b217im_a = buffer.load(0).im
2299            + self.twiddle2.re * x118p.im
2300            + self.twiddle4.re * x217p.im
2301            + self.twiddle6.re * x316p.im
2302            + self.twiddle8.re * x415p.im
2303            + self.twiddle9.re * x514p.im
2304            + self.twiddle7.re * x613p.im
2305            + self.twiddle5.re * x712p.im
2306            + self.twiddle3.re * x811p.im
2307            + self.twiddle1.re * x910p.im;
2308        let b217im_b = self.twiddle2.im * x118n.re
2309            + self.twiddle4.im * x217n.re
2310            + self.twiddle6.im * x316n.re
2311            + self.twiddle8.im * x415n.re
2312            + -self.twiddle9.im * x514n.re
2313            + -self.twiddle7.im * x613n.re
2314            + -self.twiddle5.im * x712n.re
2315            + -self.twiddle3.im * x811n.re
2316            + -self.twiddle1.im * x910n.re;
2317        let b316im_a = buffer.load(0).im
2318            + self.twiddle3.re * x118p.im
2319            + self.twiddle6.re * x217p.im
2320            + self.twiddle9.re * x316p.im
2321            + self.twiddle7.re * x415p.im
2322            + self.twiddle4.re * x514p.im
2323            + self.twiddle1.re * x613p.im
2324            + self.twiddle2.re * x712p.im
2325            + self.twiddle5.re * x811p.im
2326            + self.twiddle8.re * x910p.im;
2327        let b316im_b = self.twiddle3.im * x118n.re
2328            + self.twiddle6.im * x217n.re
2329            + self.twiddle9.im * x316n.re
2330            + -self.twiddle7.im * x415n.re
2331            + -self.twiddle4.im * x514n.re
2332            + -self.twiddle1.im * x613n.re
2333            + self.twiddle2.im * x712n.re
2334            + self.twiddle5.im * x811n.re
2335            + self.twiddle8.im * x910n.re;
2336        let b415im_a = buffer.load(0).im
2337            + self.twiddle4.re * x118p.im
2338            + self.twiddle8.re * x217p.im
2339            + self.twiddle7.re * x316p.im
2340            + self.twiddle3.re * x415p.im
2341            + self.twiddle1.re * x514p.im
2342            + self.twiddle5.re * x613p.im
2343            + self.twiddle9.re * x712p.im
2344            + self.twiddle6.re * x811p.im
2345            + self.twiddle2.re * x910p.im;
2346        let b415im_b = self.twiddle4.im * x118n.re
2347            + self.twiddle8.im * x217n.re
2348            + -self.twiddle7.im * x316n.re
2349            + -self.twiddle3.im * x415n.re
2350            + self.twiddle1.im * x514n.re
2351            + self.twiddle5.im * x613n.re
2352            + self.twiddle9.im * x712n.re
2353            + -self.twiddle6.im * x811n.re
2354            + -self.twiddle2.im * x910n.re;
2355        let b514im_a = buffer.load(0).im
2356            + self.twiddle5.re * x118p.im
2357            + self.twiddle9.re * x217p.im
2358            + self.twiddle4.re * x316p.im
2359            + self.twiddle1.re * x415p.im
2360            + self.twiddle6.re * x514p.im
2361            + self.twiddle8.re * x613p.im
2362            + self.twiddle3.re * x712p.im
2363            + self.twiddle2.re * x811p.im
2364            + self.twiddle7.re * x910p.im;
2365        let b514im_b = self.twiddle5.im * x118n.re
2366            + -self.twiddle9.im * x217n.re
2367            + -self.twiddle4.im * x316n.re
2368            + self.twiddle1.im * x415n.re
2369            + self.twiddle6.im * x514n.re
2370            + -self.twiddle8.im * x613n.re
2371            + -self.twiddle3.im * x712n.re
2372            + self.twiddle2.im * x811n.re
2373            + self.twiddle7.im * x910n.re;
2374        let b613im_a = buffer.load(0).im
2375            + self.twiddle6.re * x118p.im
2376            + self.twiddle7.re * x217p.im
2377            + self.twiddle1.re * x316p.im
2378            + self.twiddle5.re * x415p.im
2379            + self.twiddle8.re * x514p.im
2380            + self.twiddle2.re * x613p.im
2381            + self.twiddle4.re * x712p.im
2382            + self.twiddle9.re * x811p.im
2383            + self.twiddle3.re * x910p.im;
2384        let b613im_b = self.twiddle6.im * x118n.re
2385            + -self.twiddle7.im * x217n.re
2386            + -self.twiddle1.im * x316n.re
2387            + self.twiddle5.im * x415n.re
2388            + -self.twiddle8.im * x514n.re
2389            + -self.twiddle2.im * x613n.re
2390            + self.twiddle4.im * x712n.re
2391            + -self.twiddle9.im * x811n.re
2392            + -self.twiddle3.im * x910n.re;
2393        let b712im_a = buffer.load(0).im
2394            + self.twiddle7.re * x118p.im
2395            + self.twiddle5.re * x217p.im
2396            + self.twiddle2.re * x316p.im
2397            + self.twiddle9.re * x415p.im
2398            + self.twiddle3.re * x514p.im
2399            + self.twiddle4.re * x613p.im
2400            + self.twiddle8.re * x712p.im
2401            + self.twiddle1.re * x811p.im
2402            + self.twiddle6.re * x910p.im;
2403        let b712im_b = self.twiddle7.im * x118n.re
2404            + -self.twiddle5.im * x217n.re
2405            + self.twiddle2.im * x316n.re
2406            + self.twiddle9.im * x415n.re
2407            + -self.twiddle3.im * x514n.re
2408            + self.twiddle4.im * x613n.re
2409            + -self.twiddle8.im * x712n.re
2410            + -self.twiddle1.im * x811n.re
2411            + self.twiddle6.im * x910n.re;
2412        let b811im_a = buffer.load(0).im
2413            + self.twiddle8.re * x118p.im
2414            + self.twiddle3.re * x217p.im
2415            + self.twiddle5.re * x316p.im
2416            + self.twiddle6.re * x415p.im
2417            + self.twiddle2.re * x514p.im
2418            + self.twiddle9.re * x613p.im
2419            + self.twiddle1.re * x712p.im
2420            + self.twiddle7.re * x811p.im
2421            + self.twiddle4.re * x910p.im;
2422        let b811im_b = self.twiddle8.im * x118n.re
2423            + -self.twiddle3.im * x217n.re
2424            + self.twiddle5.im * x316n.re
2425            + -self.twiddle6.im * x415n.re
2426            + self.twiddle2.im * x514n.re
2427            + -self.twiddle9.im * x613n.re
2428            + -self.twiddle1.im * x712n.re
2429            + self.twiddle7.im * x811n.re
2430            + -self.twiddle4.im * x910n.re;
2431        let b910im_a = buffer.load(0).im
2432            + self.twiddle9.re * x118p.im
2433            + self.twiddle1.re * x217p.im
2434            + self.twiddle8.re * x316p.im
2435            + self.twiddle2.re * x415p.im
2436            + self.twiddle7.re * x514p.im
2437            + self.twiddle3.re * x613p.im
2438            + self.twiddle6.re * x712p.im
2439            + self.twiddle4.re * x811p.im
2440            + self.twiddle5.re * x910p.im;
2441        let b910im_b = self.twiddle9.im * x118n.re
2442            + -self.twiddle1.im * x217n.re
2443            + self.twiddle8.im * x316n.re
2444            + -self.twiddle2.im * x415n.re
2445            + self.twiddle7.im * x514n.re
2446            + -self.twiddle3.im * x613n.re
2447            + self.twiddle6.im * x712n.re
2448            + -self.twiddle4.im * x811n.re
2449            + self.twiddle5.im * x910n.re;
2450
2451        let out1re = b118re_a - b118re_b;
2452        let out1im = b118im_a + b118im_b;
2453        let out2re = b217re_a - b217re_b;
2454        let out2im = b217im_a + b217im_b;
2455        let out3re = b316re_a - b316re_b;
2456        let out3im = b316im_a + b316im_b;
2457        let out4re = b415re_a - b415re_b;
2458        let out4im = b415im_a + b415im_b;
2459        let out5re = b514re_a - b514re_b;
2460        let out5im = b514im_a + b514im_b;
2461        let out6re = b613re_a - b613re_b;
2462        let out6im = b613im_a + b613im_b;
2463        let out7re = b712re_a - b712re_b;
2464        let out7im = b712im_a + b712im_b;
2465        let out8re = b811re_a - b811re_b;
2466        let out8im = b811im_a + b811im_b;
2467        let out9re = b910re_a - b910re_b;
2468        let out9im = b910im_a + b910im_b;
2469        let out10re = b910re_a + b910re_b;
2470        let out10im = b910im_a - b910im_b;
2471        let out11re = b811re_a + b811re_b;
2472        let out11im = b811im_a - b811im_b;
2473        let out12re = b712re_a + b712re_b;
2474        let out12im = b712im_a - b712im_b;
2475        let out13re = b613re_a + b613re_b;
2476        let out13im = b613im_a - b613im_b;
2477        let out14re = b514re_a + b514re_b;
2478        let out14im = b514im_a - b514im_b;
2479        let out15re = b415re_a + b415re_b;
2480        let out15im = b415im_a - b415im_b;
2481        let out16re = b316re_a + b316re_b;
2482        let out16im = b316im_a - b316im_b;
2483        let out17re = b217re_a + b217re_b;
2484        let out17im = b217im_a - b217im_b;
2485        let out18re = b118re_a + b118re_b;
2486        let out18im = b118im_a - b118im_b;
2487        buffer.store(sum, 0);
2488        buffer.store(
2489            Complex {
2490                re: out1re,
2491                im: out1im,
2492            },
2493            1,
2494        );
2495        buffer.store(
2496            Complex {
2497                re: out2re,
2498                im: out2im,
2499            },
2500            2,
2501        );
2502        buffer.store(
2503            Complex {
2504                re: out3re,
2505                im: out3im,
2506            },
2507            3,
2508        );
2509        buffer.store(
2510            Complex {
2511                re: out4re,
2512                im: out4im,
2513            },
2514            4,
2515        );
2516        buffer.store(
2517            Complex {
2518                re: out5re,
2519                im: out5im,
2520            },
2521            5,
2522        );
2523        buffer.store(
2524            Complex {
2525                re: out6re,
2526                im: out6im,
2527            },
2528            6,
2529        );
2530        buffer.store(
2531            Complex {
2532                re: out7re,
2533                im: out7im,
2534            },
2535            7,
2536        );
2537        buffer.store(
2538            Complex {
2539                re: out8re,
2540                im: out8im,
2541            },
2542            8,
2543        );
2544        buffer.store(
2545            Complex {
2546                re: out9re,
2547                im: out9im,
2548            },
2549            9,
2550        );
2551        buffer.store(
2552            Complex {
2553                re: out10re,
2554                im: out10im,
2555            },
2556            10,
2557        );
2558        buffer.store(
2559            Complex {
2560                re: out11re,
2561                im: out11im,
2562            },
2563            11,
2564        );
2565        buffer.store(
2566            Complex {
2567                re: out12re,
2568                im: out12im,
2569            },
2570            12,
2571        );
2572        buffer.store(
2573            Complex {
2574                re: out13re,
2575                im: out13im,
2576            },
2577            13,
2578        );
2579        buffer.store(
2580            Complex {
2581                re: out14re,
2582                im: out14im,
2583            },
2584            14,
2585        );
2586        buffer.store(
2587            Complex {
2588                re: out15re,
2589                im: out15im,
2590            },
2591            15,
2592        );
2593        buffer.store(
2594            Complex {
2595                re: out16re,
2596                im: out16im,
2597            },
2598            16,
2599        );
2600        buffer.store(
2601            Complex {
2602                re: out17re,
2603                im: out17im,
2604            },
2605            17,
2606        );
2607        buffer.store(
2608            Complex {
2609                re: out18re,
2610                im: out18im,
2611            },
2612            18,
2613        );
2614    }
2615}
2616
2617pub struct Butterfly23<T> {
2618    twiddle1: Complex<T>,
2619    twiddle2: Complex<T>,
2620    twiddle3: Complex<T>,
2621    twiddle4: Complex<T>,
2622    twiddle5: Complex<T>,
2623    twiddle6: Complex<T>,
2624    twiddle7: Complex<T>,
2625    twiddle8: Complex<T>,
2626    twiddle9: Complex<T>,
2627    twiddle10: Complex<T>,
2628    twiddle11: Complex<T>,
2629    direction: FftDirection,
2630}
2631boilerplate_fft_butterfly!(Butterfly23, 23, |this: &Butterfly23<_>| this.direction);
2632impl<T: FftNum> Butterfly23<T> {
2633    pub fn new(direction: FftDirection) -> Self {
2634        let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 23, direction);
2635        let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 23, direction);
2636        let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 23, direction);
2637        let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 23, direction);
2638        let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 23, direction);
2639        let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 23, direction);
2640        let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 23, direction);
2641        let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 23, direction);
2642        let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 23, direction);
2643        let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 23, direction);
2644        let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 23, direction);
2645        Self {
2646            twiddle1,
2647            twiddle2,
2648            twiddle3,
2649            twiddle4,
2650            twiddle5,
2651            twiddle6,
2652            twiddle7,
2653            twiddle8,
2654            twiddle9,
2655            twiddle10,
2656            twiddle11,
2657            direction,
2658        }
2659    }
2660
2661    #[inline(never)]
2662    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
2663        // This function was derived in the same manner as the butterflies for length 3, 5 and 7.
2664        // However, instead of doing it by hand the actual code is autogenerated
2665        // with the `genbutterflies.py` script in the `tools` directory.
2666        let x122p = buffer.load(1) + buffer.load(22);
2667        let x122n = buffer.load(1) - buffer.load(22);
2668        let x221p = buffer.load(2) + buffer.load(21);
2669        let x221n = buffer.load(2) - buffer.load(21);
2670        let x320p = buffer.load(3) + buffer.load(20);
2671        let x320n = buffer.load(3) - buffer.load(20);
2672        let x419p = buffer.load(4) + buffer.load(19);
2673        let x419n = buffer.load(4) - buffer.load(19);
2674        let x518p = buffer.load(5) + buffer.load(18);
2675        let x518n = buffer.load(5) - buffer.load(18);
2676        let x617p = buffer.load(6) + buffer.load(17);
2677        let x617n = buffer.load(6) - buffer.load(17);
2678        let x716p = buffer.load(7) + buffer.load(16);
2679        let x716n = buffer.load(7) - buffer.load(16);
2680        let x815p = buffer.load(8) + buffer.load(15);
2681        let x815n = buffer.load(8) - buffer.load(15);
2682        let x914p = buffer.load(9) + buffer.load(14);
2683        let x914n = buffer.load(9) - buffer.load(14);
2684        let x1013p = buffer.load(10) + buffer.load(13);
2685        let x1013n = buffer.load(10) - buffer.load(13);
2686        let x1112p = buffer.load(11) + buffer.load(12);
2687        let x1112n = buffer.load(11) - buffer.load(12);
2688        let sum = buffer.load(0)
2689            + x122p
2690            + x221p
2691            + x320p
2692            + x419p
2693            + x518p
2694            + x617p
2695            + x716p
2696            + x815p
2697            + x914p
2698            + x1013p
2699            + x1112p;
2700        let b122re_a = buffer.load(0).re
2701            + self.twiddle1.re * x122p.re
2702            + self.twiddle2.re * x221p.re
2703            + self.twiddle3.re * x320p.re
2704            + self.twiddle4.re * x419p.re
2705            + self.twiddle5.re * x518p.re
2706            + self.twiddle6.re * x617p.re
2707            + self.twiddle7.re * x716p.re
2708            + self.twiddle8.re * x815p.re
2709            + self.twiddle9.re * x914p.re
2710            + self.twiddle10.re * x1013p.re
2711            + self.twiddle11.re * x1112p.re;
2712        let b122re_b = self.twiddle1.im * x122n.im
2713            + self.twiddle2.im * x221n.im
2714            + self.twiddle3.im * x320n.im
2715            + self.twiddle4.im * x419n.im
2716            + self.twiddle5.im * x518n.im
2717            + self.twiddle6.im * x617n.im
2718            + self.twiddle7.im * x716n.im
2719            + self.twiddle8.im * x815n.im
2720            + self.twiddle9.im * x914n.im
2721            + self.twiddle10.im * x1013n.im
2722            + self.twiddle11.im * x1112n.im;
2723        let b221re_a = buffer.load(0).re
2724            + self.twiddle2.re * x122p.re
2725            + self.twiddle4.re * x221p.re
2726            + self.twiddle6.re * x320p.re
2727            + self.twiddle8.re * x419p.re
2728            + self.twiddle10.re * x518p.re
2729            + self.twiddle11.re * x617p.re
2730            + self.twiddle9.re * x716p.re
2731            + self.twiddle7.re * x815p.re
2732            + self.twiddle5.re * x914p.re
2733            + self.twiddle3.re * x1013p.re
2734            + self.twiddle1.re * x1112p.re;
2735        let b221re_b = self.twiddle2.im * x122n.im
2736            + self.twiddle4.im * x221n.im
2737            + self.twiddle6.im * x320n.im
2738            + self.twiddle8.im * x419n.im
2739            + self.twiddle10.im * x518n.im
2740            + -self.twiddle11.im * x617n.im
2741            + -self.twiddle9.im * x716n.im
2742            + -self.twiddle7.im * x815n.im
2743            + -self.twiddle5.im * x914n.im
2744            + -self.twiddle3.im * x1013n.im
2745            + -self.twiddle1.im * x1112n.im;
2746        let b320re_a = buffer.load(0).re
2747            + self.twiddle3.re * x122p.re
2748            + self.twiddle6.re * x221p.re
2749            + self.twiddle9.re * x320p.re
2750            + self.twiddle11.re * x419p.re
2751            + self.twiddle8.re * x518p.re
2752            + self.twiddle5.re * x617p.re
2753            + self.twiddle2.re * x716p.re
2754            + self.twiddle1.re * x815p.re
2755            + self.twiddle4.re * x914p.re
2756            + self.twiddle7.re * x1013p.re
2757            + self.twiddle10.re * x1112p.re;
2758        let b320re_b = self.twiddle3.im * x122n.im
2759            + self.twiddle6.im * x221n.im
2760            + self.twiddle9.im * x320n.im
2761            + -self.twiddle11.im * x419n.im
2762            + -self.twiddle8.im * x518n.im
2763            + -self.twiddle5.im * x617n.im
2764            + -self.twiddle2.im * x716n.im
2765            + self.twiddle1.im * x815n.im
2766            + self.twiddle4.im * x914n.im
2767            + self.twiddle7.im * x1013n.im
2768            + self.twiddle10.im * x1112n.im;
2769        let b419re_a = buffer.load(0).re
2770            + self.twiddle4.re * x122p.re
2771            + self.twiddle8.re * x221p.re
2772            + self.twiddle11.re * x320p.re
2773            + self.twiddle7.re * x419p.re
2774            + self.twiddle3.re * x518p.re
2775            + self.twiddle1.re * x617p.re
2776            + self.twiddle5.re * x716p.re
2777            + self.twiddle9.re * x815p.re
2778            + self.twiddle10.re * x914p.re
2779            + self.twiddle6.re * x1013p.re
2780            + self.twiddle2.re * x1112p.re;
2781        let b419re_b = self.twiddle4.im * x122n.im
2782            + self.twiddle8.im * x221n.im
2783            + -self.twiddle11.im * x320n.im
2784            + -self.twiddle7.im * x419n.im
2785            + -self.twiddle3.im * x518n.im
2786            + self.twiddle1.im * x617n.im
2787            + self.twiddle5.im * x716n.im
2788            + self.twiddle9.im * x815n.im
2789            + -self.twiddle10.im * x914n.im
2790            + -self.twiddle6.im * x1013n.im
2791            + -self.twiddle2.im * x1112n.im;
2792        let b518re_a = buffer.load(0).re
2793            + self.twiddle5.re * x122p.re
2794            + self.twiddle10.re * x221p.re
2795            + self.twiddle8.re * x320p.re
2796            + self.twiddle3.re * x419p.re
2797            + self.twiddle2.re * x518p.re
2798            + self.twiddle7.re * x617p.re
2799            + self.twiddle11.re * x716p.re
2800            + self.twiddle6.re * x815p.re
2801            + self.twiddle1.re * x914p.re
2802            + self.twiddle4.re * x1013p.re
2803            + self.twiddle9.re * x1112p.re;
2804        let b518re_b = self.twiddle5.im * x122n.im
2805            + self.twiddle10.im * x221n.im
2806            + -self.twiddle8.im * x320n.im
2807            + -self.twiddle3.im * x419n.im
2808            + self.twiddle2.im * x518n.im
2809            + self.twiddle7.im * x617n.im
2810            + -self.twiddle11.im * x716n.im
2811            + -self.twiddle6.im * x815n.im
2812            + -self.twiddle1.im * x914n.im
2813            + self.twiddle4.im * x1013n.im
2814            + self.twiddle9.im * x1112n.im;
2815        let b617re_a = buffer.load(0).re
2816            + self.twiddle6.re * x122p.re
2817            + self.twiddle11.re * x221p.re
2818            + self.twiddle5.re * x320p.re
2819            + self.twiddle1.re * x419p.re
2820            + self.twiddle7.re * x518p.re
2821            + self.twiddle10.re * x617p.re
2822            + self.twiddle4.re * x716p.re
2823            + self.twiddle2.re * x815p.re
2824            + self.twiddle8.re * x914p.re
2825            + self.twiddle9.re * x1013p.re
2826            + self.twiddle3.re * x1112p.re;
2827        let b617re_b = self.twiddle6.im * x122n.im
2828            + -self.twiddle11.im * x221n.im
2829            + -self.twiddle5.im * x320n.im
2830            + self.twiddle1.im * x419n.im
2831            + self.twiddle7.im * x518n.im
2832            + -self.twiddle10.im * x617n.im
2833            + -self.twiddle4.im * x716n.im
2834            + self.twiddle2.im * x815n.im
2835            + self.twiddle8.im * x914n.im
2836            + -self.twiddle9.im * x1013n.im
2837            + -self.twiddle3.im * x1112n.im;
2838        let b716re_a = buffer.load(0).re
2839            + self.twiddle7.re * x122p.re
2840            + self.twiddle9.re * x221p.re
2841            + self.twiddle2.re * x320p.re
2842            + self.twiddle5.re * x419p.re
2843            + self.twiddle11.re * x518p.re
2844            + self.twiddle4.re * x617p.re
2845            + self.twiddle3.re * x716p.re
2846            + self.twiddle10.re * x815p.re
2847            + self.twiddle6.re * x914p.re
2848            + self.twiddle1.re * x1013p.re
2849            + self.twiddle8.re * x1112p.re;
2850        let b716re_b = self.twiddle7.im * x122n.im
2851            + -self.twiddle9.im * x221n.im
2852            + -self.twiddle2.im * x320n.im
2853            + self.twiddle5.im * x419n.im
2854            + -self.twiddle11.im * x518n.im
2855            + -self.twiddle4.im * x617n.im
2856            + self.twiddle3.im * x716n.im
2857            + self.twiddle10.im * x815n.im
2858            + -self.twiddle6.im * x914n.im
2859            + self.twiddle1.im * x1013n.im
2860            + self.twiddle8.im * x1112n.im;
2861        let b815re_a = buffer.load(0).re
2862            + self.twiddle8.re * x122p.re
2863            + self.twiddle7.re * x221p.re
2864            + self.twiddle1.re * x320p.re
2865            + self.twiddle9.re * x419p.re
2866            + self.twiddle6.re * x518p.re
2867            + self.twiddle2.re * x617p.re
2868            + self.twiddle10.re * x716p.re
2869            + self.twiddle5.re * x815p.re
2870            + self.twiddle3.re * x914p.re
2871            + self.twiddle11.re * x1013p.re
2872            + self.twiddle4.re * x1112p.re;
2873        let b815re_b = self.twiddle8.im * x122n.im
2874            + -self.twiddle7.im * x221n.im
2875            + self.twiddle1.im * x320n.im
2876            + self.twiddle9.im * x419n.im
2877            + -self.twiddle6.im * x518n.im
2878            + self.twiddle2.im * x617n.im
2879            + self.twiddle10.im * x716n.im
2880            + -self.twiddle5.im * x815n.im
2881            + self.twiddle3.im * x914n.im
2882            + self.twiddle11.im * x1013n.im
2883            + -self.twiddle4.im * x1112n.im;
2884        let b914re_a = buffer.load(0).re
2885            + self.twiddle9.re * x122p.re
2886            + self.twiddle5.re * x221p.re
2887            + self.twiddle4.re * x320p.re
2888            + self.twiddle10.re * x419p.re
2889            + self.twiddle1.re * x518p.re
2890            + self.twiddle8.re * x617p.re
2891            + self.twiddle6.re * x716p.re
2892            + self.twiddle3.re * x815p.re
2893            + self.twiddle11.re * x914p.re
2894            + self.twiddle2.re * x1013p.re
2895            + self.twiddle7.re * x1112p.re;
2896        let b914re_b = self.twiddle9.im * x122n.im
2897            + -self.twiddle5.im * x221n.im
2898            + self.twiddle4.im * x320n.im
2899            + -self.twiddle10.im * x419n.im
2900            + -self.twiddle1.im * x518n.im
2901            + self.twiddle8.im * x617n.im
2902            + -self.twiddle6.im * x716n.im
2903            + self.twiddle3.im * x815n.im
2904            + -self.twiddle11.im * x914n.im
2905            + -self.twiddle2.im * x1013n.im
2906            + self.twiddle7.im * x1112n.im;
2907        let b1013re_a = buffer.load(0).re
2908            + self.twiddle10.re * x122p.re
2909            + self.twiddle3.re * x221p.re
2910            + self.twiddle7.re * x320p.re
2911            + self.twiddle6.re * x419p.re
2912            + self.twiddle4.re * x518p.re
2913            + self.twiddle9.re * x617p.re
2914            + self.twiddle1.re * x716p.re
2915            + self.twiddle11.re * x815p.re
2916            + self.twiddle2.re * x914p.re
2917            + self.twiddle8.re * x1013p.re
2918            + self.twiddle5.re * x1112p.re;
2919        let b1013re_b = self.twiddle10.im * x122n.im
2920            + -self.twiddle3.im * x221n.im
2921            + self.twiddle7.im * x320n.im
2922            + -self.twiddle6.im * x419n.im
2923            + self.twiddle4.im * x518n.im
2924            + -self.twiddle9.im * x617n.im
2925            + self.twiddle1.im * x716n.im
2926            + self.twiddle11.im * x815n.im
2927            + -self.twiddle2.im * x914n.im
2928            + self.twiddle8.im * x1013n.im
2929            + -self.twiddle5.im * x1112n.im;
2930        let b1112re_a = buffer.load(0).re
2931            + self.twiddle11.re * x122p.re
2932            + self.twiddle1.re * x221p.re
2933            + self.twiddle10.re * x320p.re
2934            + self.twiddle2.re * x419p.re
2935            + self.twiddle9.re * x518p.re
2936            + self.twiddle3.re * x617p.re
2937            + self.twiddle8.re * x716p.re
2938            + self.twiddle4.re * x815p.re
2939            + self.twiddle7.re * x914p.re
2940            + self.twiddle5.re * x1013p.re
2941            + self.twiddle6.re * x1112p.re;
2942        let b1112re_b = self.twiddle11.im * x122n.im
2943            + -self.twiddle1.im * x221n.im
2944            + self.twiddle10.im * x320n.im
2945            + -self.twiddle2.im * x419n.im
2946            + self.twiddle9.im * x518n.im
2947            + -self.twiddle3.im * x617n.im
2948            + self.twiddle8.im * x716n.im
2949            + -self.twiddle4.im * x815n.im
2950            + self.twiddle7.im * x914n.im
2951            + -self.twiddle5.im * x1013n.im
2952            + self.twiddle6.im * x1112n.im;
2953
2954        let b122im_a = buffer.load(0).im
2955            + self.twiddle1.re * x122p.im
2956            + self.twiddle2.re * x221p.im
2957            + self.twiddle3.re * x320p.im
2958            + self.twiddle4.re * x419p.im
2959            + self.twiddle5.re * x518p.im
2960            + self.twiddle6.re * x617p.im
2961            + self.twiddle7.re * x716p.im
2962            + self.twiddle8.re * x815p.im
2963            + self.twiddle9.re * x914p.im
2964            + self.twiddle10.re * x1013p.im
2965            + self.twiddle11.re * x1112p.im;
2966        let b122im_b = self.twiddle1.im * x122n.re
2967            + self.twiddle2.im * x221n.re
2968            + self.twiddle3.im * x320n.re
2969            + self.twiddle4.im * x419n.re
2970            + self.twiddle5.im * x518n.re
2971            + self.twiddle6.im * x617n.re
2972            + self.twiddle7.im * x716n.re
2973            + self.twiddle8.im * x815n.re
2974            + self.twiddle9.im * x914n.re
2975            + self.twiddle10.im * x1013n.re
2976            + self.twiddle11.im * x1112n.re;
2977        let b221im_a = buffer.load(0).im
2978            + self.twiddle2.re * x122p.im
2979            + self.twiddle4.re * x221p.im
2980            + self.twiddle6.re * x320p.im
2981            + self.twiddle8.re * x419p.im
2982            + self.twiddle10.re * x518p.im
2983            + self.twiddle11.re * x617p.im
2984            + self.twiddle9.re * x716p.im
2985            + self.twiddle7.re * x815p.im
2986            + self.twiddle5.re * x914p.im
2987            + self.twiddle3.re * x1013p.im
2988            + self.twiddle1.re * x1112p.im;
2989        let b221im_b = self.twiddle2.im * x122n.re
2990            + self.twiddle4.im * x221n.re
2991            + self.twiddle6.im * x320n.re
2992            + self.twiddle8.im * x419n.re
2993            + self.twiddle10.im * x518n.re
2994            + -self.twiddle11.im * x617n.re
2995            + -self.twiddle9.im * x716n.re
2996            + -self.twiddle7.im * x815n.re
2997            + -self.twiddle5.im * x914n.re
2998            + -self.twiddle3.im * x1013n.re
2999            + -self.twiddle1.im * x1112n.re;
3000        let b320im_a = buffer.load(0).im
3001            + self.twiddle3.re * x122p.im
3002            + self.twiddle6.re * x221p.im
3003            + self.twiddle9.re * x320p.im
3004            + self.twiddle11.re * x419p.im
3005            + self.twiddle8.re * x518p.im
3006            + self.twiddle5.re * x617p.im
3007            + self.twiddle2.re * x716p.im
3008            + self.twiddle1.re * x815p.im
3009            + self.twiddle4.re * x914p.im
3010            + self.twiddle7.re * x1013p.im
3011            + self.twiddle10.re * x1112p.im;
3012        let b320im_b = self.twiddle3.im * x122n.re
3013            + self.twiddle6.im * x221n.re
3014            + self.twiddle9.im * x320n.re
3015            + -self.twiddle11.im * x419n.re
3016            + -self.twiddle8.im * x518n.re
3017            + -self.twiddle5.im * x617n.re
3018            + -self.twiddle2.im * x716n.re
3019            + self.twiddle1.im * x815n.re
3020            + self.twiddle4.im * x914n.re
3021            + self.twiddle7.im * x1013n.re
3022            + self.twiddle10.im * x1112n.re;
3023        let b419im_a = buffer.load(0).im
3024            + self.twiddle4.re * x122p.im
3025            + self.twiddle8.re * x221p.im
3026            + self.twiddle11.re * x320p.im
3027            + self.twiddle7.re * x419p.im
3028            + self.twiddle3.re * x518p.im
3029            + self.twiddle1.re * x617p.im
3030            + self.twiddle5.re * x716p.im
3031            + self.twiddle9.re * x815p.im
3032            + self.twiddle10.re * x914p.im
3033            + self.twiddle6.re * x1013p.im
3034            + self.twiddle2.re * x1112p.im;
3035        let b419im_b = self.twiddle4.im * x122n.re
3036            + self.twiddle8.im * x221n.re
3037            + -self.twiddle11.im * x320n.re
3038            + -self.twiddle7.im * x419n.re
3039            + -self.twiddle3.im * x518n.re
3040            + self.twiddle1.im * x617n.re
3041            + self.twiddle5.im * x716n.re
3042            + self.twiddle9.im * x815n.re
3043            + -self.twiddle10.im * x914n.re
3044            + -self.twiddle6.im * x1013n.re
3045            + -self.twiddle2.im * x1112n.re;
3046        let b518im_a = buffer.load(0).im
3047            + self.twiddle5.re * x122p.im
3048            + self.twiddle10.re * x221p.im
3049            + self.twiddle8.re * x320p.im
3050            + self.twiddle3.re * x419p.im
3051            + self.twiddle2.re * x518p.im
3052            + self.twiddle7.re * x617p.im
3053            + self.twiddle11.re * x716p.im
3054            + self.twiddle6.re * x815p.im
3055            + self.twiddle1.re * x914p.im
3056            + self.twiddle4.re * x1013p.im
3057            + self.twiddle9.re * x1112p.im;
3058        let b518im_b = self.twiddle5.im * x122n.re
3059            + self.twiddle10.im * x221n.re
3060            + -self.twiddle8.im * x320n.re
3061            + -self.twiddle3.im * x419n.re
3062            + self.twiddle2.im * x518n.re
3063            + self.twiddle7.im * x617n.re
3064            + -self.twiddle11.im * x716n.re
3065            + -self.twiddle6.im * x815n.re
3066            + -self.twiddle1.im * x914n.re
3067            + self.twiddle4.im * x1013n.re
3068            + self.twiddle9.im * x1112n.re;
3069        let b617im_a = buffer.load(0).im
3070            + self.twiddle6.re * x122p.im
3071            + self.twiddle11.re * x221p.im
3072            + self.twiddle5.re * x320p.im
3073            + self.twiddle1.re * x419p.im
3074            + self.twiddle7.re * x518p.im
3075            + self.twiddle10.re * x617p.im
3076            + self.twiddle4.re * x716p.im
3077            + self.twiddle2.re * x815p.im
3078            + self.twiddle8.re * x914p.im
3079            + self.twiddle9.re * x1013p.im
3080            + self.twiddle3.re * x1112p.im;
3081        let b617im_b = self.twiddle6.im * x122n.re
3082            + -self.twiddle11.im * x221n.re
3083            + -self.twiddle5.im * x320n.re
3084            + self.twiddle1.im * x419n.re
3085            + self.twiddle7.im * x518n.re
3086            + -self.twiddle10.im * x617n.re
3087            + -self.twiddle4.im * x716n.re
3088            + self.twiddle2.im * x815n.re
3089            + self.twiddle8.im * x914n.re
3090            + -self.twiddle9.im * x1013n.re
3091            + -self.twiddle3.im * x1112n.re;
3092        let b716im_a = buffer.load(0).im
3093            + self.twiddle7.re * x122p.im
3094            + self.twiddle9.re * x221p.im
3095            + self.twiddle2.re * x320p.im
3096            + self.twiddle5.re * x419p.im
3097            + self.twiddle11.re * x518p.im
3098            + self.twiddle4.re * x617p.im
3099            + self.twiddle3.re * x716p.im
3100            + self.twiddle10.re * x815p.im
3101            + self.twiddle6.re * x914p.im
3102            + self.twiddle1.re * x1013p.im
3103            + self.twiddle8.re * x1112p.im;
3104        let b716im_b = self.twiddle7.im * x122n.re
3105            + -self.twiddle9.im * x221n.re
3106            + -self.twiddle2.im * x320n.re
3107            + self.twiddle5.im * x419n.re
3108            + -self.twiddle11.im * x518n.re
3109            + -self.twiddle4.im * x617n.re
3110            + self.twiddle3.im * x716n.re
3111            + self.twiddle10.im * x815n.re
3112            + -self.twiddle6.im * x914n.re
3113            + self.twiddle1.im * x1013n.re
3114            + self.twiddle8.im * x1112n.re;
3115        let b815im_a = buffer.load(0).im
3116            + self.twiddle8.re * x122p.im
3117            + self.twiddle7.re * x221p.im
3118            + self.twiddle1.re * x320p.im
3119            + self.twiddle9.re * x419p.im
3120            + self.twiddle6.re * x518p.im
3121            + self.twiddle2.re * x617p.im
3122            + self.twiddle10.re * x716p.im
3123            + self.twiddle5.re * x815p.im
3124            + self.twiddle3.re * x914p.im
3125            + self.twiddle11.re * x1013p.im
3126            + self.twiddle4.re * x1112p.im;
3127        let b815im_b = self.twiddle8.im * x122n.re
3128            + -self.twiddle7.im * x221n.re
3129            + self.twiddle1.im * x320n.re
3130            + self.twiddle9.im * x419n.re
3131            + -self.twiddle6.im * x518n.re
3132            + self.twiddle2.im * x617n.re
3133            + self.twiddle10.im * x716n.re
3134            + -self.twiddle5.im * x815n.re
3135            + self.twiddle3.im * x914n.re
3136            + self.twiddle11.im * x1013n.re
3137            + -self.twiddle4.im * x1112n.re;
3138        let b914im_a = buffer.load(0).im
3139            + self.twiddle9.re * x122p.im
3140            + self.twiddle5.re * x221p.im
3141            + self.twiddle4.re * x320p.im
3142            + self.twiddle10.re * x419p.im
3143            + self.twiddle1.re * x518p.im
3144            + self.twiddle8.re * x617p.im
3145            + self.twiddle6.re * x716p.im
3146            + self.twiddle3.re * x815p.im
3147            + self.twiddle11.re * x914p.im
3148            + self.twiddle2.re * x1013p.im
3149            + self.twiddle7.re * x1112p.im;
3150        let b914im_b = self.twiddle9.im * x122n.re
3151            + -self.twiddle5.im * x221n.re
3152            + self.twiddle4.im * x320n.re
3153            + -self.twiddle10.im * x419n.re
3154            + -self.twiddle1.im * x518n.re
3155            + self.twiddle8.im * x617n.re
3156            + -self.twiddle6.im * x716n.re
3157            + self.twiddle3.im * x815n.re
3158            + -self.twiddle11.im * x914n.re
3159            + -self.twiddle2.im * x1013n.re
3160            + self.twiddle7.im * x1112n.re;
3161        let b1013im_a = buffer.load(0).im
3162            + self.twiddle10.re * x122p.im
3163            + self.twiddle3.re * x221p.im
3164            + self.twiddle7.re * x320p.im
3165            + self.twiddle6.re * x419p.im
3166            + self.twiddle4.re * x518p.im
3167            + self.twiddle9.re * x617p.im
3168            + self.twiddle1.re * x716p.im
3169            + self.twiddle11.re * x815p.im
3170            + self.twiddle2.re * x914p.im
3171            + self.twiddle8.re * x1013p.im
3172            + self.twiddle5.re * x1112p.im;
3173        let b1013im_b = self.twiddle10.im * x122n.re
3174            + -self.twiddle3.im * x221n.re
3175            + self.twiddle7.im * x320n.re
3176            + -self.twiddle6.im * x419n.re
3177            + self.twiddle4.im * x518n.re
3178            + -self.twiddle9.im * x617n.re
3179            + self.twiddle1.im * x716n.re
3180            + self.twiddle11.im * x815n.re
3181            + -self.twiddle2.im * x914n.re
3182            + self.twiddle8.im * x1013n.re
3183            + -self.twiddle5.im * x1112n.re;
3184        let b1112im_a = buffer.load(0).im
3185            + self.twiddle11.re * x122p.im
3186            + self.twiddle1.re * x221p.im
3187            + self.twiddle10.re * x320p.im
3188            + self.twiddle2.re * x419p.im
3189            + self.twiddle9.re * x518p.im
3190            + self.twiddle3.re * x617p.im
3191            + self.twiddle8.re * x716p.im
3192            + self.twiddle4.re * x815p.im
3193            + self.twiddle7.re * x914p.im
3194            + self.twiddle5.re * x1013p.im
3195            + self.twiddle6.re * x1112p.im;
3196        let b1112im_b = self.twiddle11.im * x122n.re
3197            + -self.twiddle1.im * x221n.re
3198            + self.twiddle10.im * x320n.re
3199            + -self.twiddle2.im * x419n.re
3200            + self.twiddle9.im * x518n.re
3201            + -self.twiddle3.im * x617n.re
3202            + self.twiddle8.im * x716n.re
3203            + -self.twiddle4.im * x815n.re
3204            + self.twiddle7.im * x914n.re
3205            + -self.twiddle5.im * x1013n.re
3206            + self.twiddle6.im * x1112n.re;
3207
3208        let out1re = b122re_a - b122re_b;
3209        let out1im = b122im_a + b122im_b;
3210        let out2re = b221re_a - b221re_b;
3211        let out2im = b221im_a + b221im_b;
3212        let out3re = b320re_a - b320re_b;
3213        let out3im = b320im_a + b320im_b;
3214        let out4re = b419re_a - b419re_b;
3215        let out4im = b419im_a + b419im_b;
3216        let out5re = b518re_a - b518re_b;
3217        let out5im = b518im_a + b518im_b;
3218        let out6re = b617re_a - b617re_b;
3219        let out6im = b617im_a + b617im_b;
3220        let out7re = b716re_a - b716re_b;
3221        let out7im = b716im_a + b716im_b;
3222        let out8re = b815re_a - b815re_b;
3223        let out8im = b815im_a + b815im_b;
3224        let out9re = b914re_a - b914re_b;
3225        let out9im = b914im_a + b914im_b;
3226        let out10re = b1013re_a - b1013re_b;
3227        let out10im = b1013im_a + b1013im_b;
3228        let out11re = b1112re_a - b1112re_b;
3229        let out11im = b1112im_a + b1112im_b;
3230        let out12re = b1112re_a + b1112re_b;
3231        let out12im = b1112im_a - b1112im_b;
3232        let out13re = b1013re_a + b1013re_b;
3233        let out13im = b1013im_a - b1013im_b;
3234        let out14re = b914re_a + b914re_b;
3235        let out14im = b914im_a - b914im_b;
3236        let out15re = b815re_a + b815re_b;
3237        let out15im = b815im_a - b815im_b;
3238        let out16re = b716re_a + b716re_b;
3239        let out16im = b716im_a - b716im_b;
3240        let out17re = b617re_a + b617re_b;
3241        let out17im = b617im_a - b617im_b;
3242        let out18re = b518re_a + b518re_b;
3243        let out18im = b518im_a - b518im_b;
3244        let out19re = b419re_a + b419re_b;
3245        let out19im = b419im_a - b419im_b;
3246        let out20re = b320re_a + b320re_b;
3247        let out20im = b320im_a - b320im_b;
3248        let out21re = b221re_a + b221re_b;
3249        let out21im = b221im_a - b221im_b;
3250        let out22re = b122re_a + b122re_b;
3251        let out22im = b122im_a - b122im_b;
3252        buffer.store(sum, 0);
3253        buffer.store(
3254            Complex {
3255                re: out1re,
3256                im: out1im,
3257            },
3258            1,
3259        );
3260        buffer.store(
3261            Complex {
3262                re: out2re,
3263                im: out2im,
3264            },
3265            2,
3266        );
3267        buffer.store(
3268            Complex {
3269                re: out3re,
3270                im: out3im,
3271            },
3272            3,
3273        );
3274        buffer.store(
3275            Complex {
3276                re: out4re,
3277                im: out4im,
3278            },
3279            4,
3280        );
3281        buffer.store(
3282            Complex {
3283                re: out5re,
3284                im: out5im,
3285            },
3286            5,
3287        );
3288        buffer.store(
3289            Complex {
3290                re: out6re,
3291                im: out6im,
3292            },
3293            6,
3294        );
3295        buffer.store(
3296            Complex {
3297                re: out7re,
3298                im: out7im,
3299            },
3300            7,
3301        );
3302        buffer.store(
3303            Complex {
3304                re: out8re,
3305                im: out8im,
3306            },
3307            8,
3308        );
3309        buffer.store(
3310            Complex {
3311                re: out9re,
3312                im: out9im,
3313            },
3314            9,
3315        );
3316        buffer.store(
3317            Complex {
3318                re: out10re,
3319                im: out10im,
3320            },
3321            10,
3322        );
3323        buffer.store(
3324            Complex {
3325                re: out11re,
3326                im: out11im,
3327            },
3328            11,
3329        );
3330        buffer.store(
3331            Complex {
3332                re: out12re,
3333                im: out12im,
3334            },
3335            12,
3336        );
3337        buffer.store(
3338            Complex {
3339                re: out13re,
3340                im: out13im,
3341            },
3342            13,
3343        );
3344        buffer.store(
3345            Complex {
3346                re: out14re,
3347                im: out14im,
3348            },
3349            14,
3350        );
3351        buffer.store(
3352            Complex {
3353                re: out15re,
3354                im: out15im,
3355            },
3356            15,
3357        );
3358        buffer.store(
3359            Complex {
3360                re: out16re,
3361                im: out16im,
3362            },
3363            16,
3364        );
3365        buffer.store(
3366            Complex {
3367                re: out17re,
3368                im: out17im,
3369            },
3370            17,
3371        );
3372        buffer.store(
3373            Complex {
3374                re: out18re,
3375                im: out18im,
3376            },
3377            18,
3378        );
3379        buffer.store(
3380            Complex {
3381                re: out19re,
3382                im: out19im,
3383            },
3384            19,
3385        );
3386        buffer.store(
3387            Complex {
3388                re: out20re,
3389                im: out20im,
3390            },
3391            20,
3392        );
3393        buffer.store(
3394            Complex {
3395                re: out21re,
3396                im: out21im,
3397            },
3398            21,
3399        );
3400        buffer.store(
3401            Complex {
3402                re: out22re,
3403                im: out22im,
3404            },
3405            22,
3406        );
3407    }
3408}
3409
3410pub struct Butterfly24<T> {
3411    butterfly4: Butterfly4<T>,
3412    butterfly6: Butterfly6<T>,
3413    twiddle1: Complex<T>,
3414    twiddle2: Complex<T>,
3415    twiddle4: Complex<T>,
3416    twiddle5: Complex<T>,
3417    twiddle8: Complex<T>,
3418    twiddle10: Complex<T>,
3419    root2: T,
3420}
3421boilerplate_fft_butterfly!(Butterfly24, 24, |this: &Butterfly24<_>| this
3422    .butterfly4
3423    .fft_direction());
3424impl<T: FftNum> Butterfly24<T> {
3425    #[inline(always)]
3426    pub fn new(direction: FftDirection) -> Self {
3427        Self {
3428            butterfly4: Butterfly4::new(direction),
3429            butterfly6: Butterfly6::new(direction),
3430            twiddle1: twiddles::compute_twiddle(1, 24, direction),
3431            twiddle2: twiddles::compute_twiddle(2, 24, direction),
3432            twiddle4: twiddles::compute_twiddle(4, 24, direction),
3433            twiddle5: twiddles::compute_twiddle(5, 24, direction),
3434            twiddle8: twiddles::compute_twiddle(8, 24, direction),
3435            twiddle10: twiddles::compute_twiddle(10, 24, direction),
3436            root2: T::from_f64(0.5f64.sqrt()).unwrap(),
3437        }
3438    }
3439    #[inline(never)]
3440    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3441        // algorithm: 6x4 mixed radix
3442        // step 1: transpose the input directly into the scratch.
3443        let mut scratch0 = [
3444            buffer.load(0),
3445            buffer.load(4),
3446            buffer.load(8),
3447            buffer.load(12),
3448            buffer.load(16),
3449            buffer.load(20),
3450        ];
3451        let mut scratch1 = [
3452            buffer.load(1),
3453            buffer.load(5),
3454            buffer.load(9),
3455            buffer.load(13),
3456            buffer.load(17),
3457            buffer.load(21),
3458        ];
3459        let mut scratch2 = [
3460            buffer.load(2),
3461            buffer.load(6),
3462            buffer.load(10),
3463            buffer.load(14),
3464            buffer.load(18),
3465            buffer.load(22),
3466        ];
3467        let mut scratch3 = [
3468            buffer.load(3),
3469            buffer.load(7),
3470            buffer.load(11),
3471            buffer.load(15),
3472            buffer.load(19),
3473            buffer.load(23),
3474        ];
3475
3476        // step 2: column FFTs
3477        self.butterfly6.perform_fft_contiguous(&mut scratch0);
3478        self.butterfly6.perform_fft_contiguous(&mut scratch1);
3479        self.butterfly6.perform_fft_contiguous(&mut scratch2);
3480        self.butterfly6.perform_fft_contiguous(&mut scratch3);
3481
3482        // step 3: apply twiddle factors
3483        scratch1[1] = scratch1[1] * self.twiddle1;
3484        scratch1[2] = scratch1[2] * self.twiddle2;
3485        scratch1[3] =
3486            (twiddles::rotate_90(scratch1[3], self.fft_direction()) + scratch1[3]) * self.root2;
3487        scratch1[4] = scratch1[4] * self.twiddle4;
3488        scratch1[5] = scratch1[5] * self.twiddle5;
3489        scratch2[1] = scratch2[1] * self.twiddle2;
3490        scratch2[2] = scratch2[2] * self.twiddle4;
3491        scratch2[3] = twiddles::rotate_90(scratch2[3], self.fft_direction());
3492        scratch2[4] = scratch2[4] * self.twiddle8;
3493        scratch2[5] = scratch2[5] * self.twiddle10;
3494        scratch3[1] =
3495            (twiddles::rotate_90(scratch3[1], self.fft_direction()) + scratch3[1]) * self.root2;
3496        scratch3[2] = twiddles::rotate_90(scratch3[2], self.fft_direction());
3497        scratch3[3] =
3498            (twiddles::rotate_90(scratch3[3], self.fft_direction()) - scratch3[3]) * self.root2;
3499        scratch3[4] = -scratch3[4];
3500        scratch3[5] =
3501            (twiddles::rotate_90(scratch3[5], self.fft_direction()) + scratch3[5]) * -self.root2;
3502
3503        // step 4: SKIPPED because the next FFTs will be non-contiguous
3504
3505        // step 5: row FFTs
3506        self.butterfly4.perform_fft_strided(
3507            &mut scratch0[0],
3508            &mut scratch1[0],
3509            &mut scratch2[0],
3510            &mut scratch3[0],
3511        );
3512        self.butterfly4.perform_fft_strided(
3513            &mut scratch0[1],
3514            &mut scratch1[1],
3515            &mut scratch2[1],
3516            &mut scratch3[1],
3517        );
3518        self.butterfly4.perform_fft_strided(
3519            &mut scratch0[2],
3520            &mut scratch1[2],
3521            &mut scratch2[2],
3522            &mut scratch3[2],
3523        );
3524        self.butterfly4.perform_fft_strided(
3525            &mut scratch0[3],
3526            &mut scratch1[3],
3527            &mut scratch2[3],
3528            &mut scratch3[3],
3529        );
3530        self.butterfly4.perform_fft_strided(
3531            &mut scratch0[4],
3532            &mut scratch1[4],
3533            &mut scratch2[4],
3534            &mut scratch3[4],
3535        );
3536        self.butterfly4.perform_fft_strided(
3537            &mut scratch0[5],
3538            &mut scratch1[5],
3539            &mut scratch2[5],
3540            &mut scratch3[5],
3541        );
3542
3543        // step 6: copy back to the buffer. we can skip the transpose, because we skipped step 4
3544        buffer.store(scratch0[0], 0);
3545        buffer.store(scratch0[1], 1);
3546        buffer.store(scratch0[2], 2);
3547        buffer.store(scratch0[3], 3);
3548        buffer.store(scratch0[4], 4);
3549        buffer.store(scratch0[5], 5);
3550        buffer.store(scratch1[0], 6);
3551        buffer.store(scratch1[1], 7);
3552        buffer.store(scratch1[2], 8);
3553        buffer.store(scratch1[3], 9);
3554        buffer.store(scratch1[4], 10);
3555        buffer.store(scratch1[5], 11);
3556        buffer.store(scratch2[0], 12);
3557        buffer.store(scratch2[1], 13);
3558        buffer.store(scratch2[2], 14);
3559        buffer.store(scratch2[3], 15);
3560        buffer.store(scratch2[4], 16);
3561        buffer.store(scratch2[5], 17);
3562        buffer.store(scratch3[0], 18);
3563        buffer.store(scratch3[1], 19);
3564        buffer.store(scratch3[2], 20);
3565        buffer.store(scratch3[3], 21);
3566        buffer.store(scratch3[4], 22);
3567        buffer.store(scratch3[5], 23);
3568    }
3569}
3570
3571pub struct Butterfly27<T> {
3572    butterfly9: Butterfly9<T>,
3573    twiddles: [Complex<T>; 12],
3574}
3575boilerplate_fft_butterfly!(Butterfly27, 27, |this: &Butterfly27<_>| this
3576    .butterfly9
3577    .fft_direction());
3578impl<T: FftNum> Butterfly27<T> {
3579    #[inline(always)]
3580    pub fn new(direction: FftDirection) -> Self {
3581        Self {
3582            butterfly9: Butterfly9::new(direction),
3583            twiddles: [
3584                twiddles::compute_twiddle(1, 27, direction),
3585                twiddles::compute_twiddle(2, 27, direction),
3586                twiddles::compute_twiddle(3, 27, direction),
3587                twiddles::compute_twiddle(4, 27, direction),
3588                twiddles::compute_twiddle(5, 27, direction),
3589                twiddles::compute_twiddle(6, 27, direction),
3590                twiddles::compute_twiddle(7, 27, direction),
3591                twiddles::compute_twiddle(8, 27, direction),
3592                twiddles::compute_twiddle(10, 27, direction),
3593                twiddles::compute_twiddle(12, 27, direction),
3594                twiddles::compute_twiddle(14, 27, direction),
3595                twiddles::compute_twiddle(16, 27, direction),
3596            ],
3597        }
3598    }
3599
3600    #[inline(always)]
3601    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3602        // algorithm: mixed radix with width=9 and height=3
3603
3604        // step 1: transpose the input into the scratch
3605        let mut scratch0 = [
3606            buffer.load(0),
3607            buffer.load(3),
3608            buffer.load(6),
3609            buffer.load(9),
3610            buffer.load(12),
3611            buffer.load(15),
3612            buffer.load(18),
3613            buffer.load(21),
3614            buffer.load(24),
3615        ];
3616        let mut scratch1 = [
3617            buffer.load(1 + 0),
3618            buffer.load(1 + 3),
3619            buffer.load(1 + 6),
3620            buffer.load(1 + 9),
3621            buffer.load(1 + 12),
3622            buffer.load(1 + 15),
3623            buffer.load(1 + 18),
3624            buffer.load(1 + 21),
3625            buffer.load(1 + 24),
3626        ];
3627        let mut scratch2 = [
3628            buffer.load(2 + 0),
3629            buffer.load(2 + 3),
3630            buffer.load(2 + 6),
3631            buffer.load(2 + 9),
3632            buffer.load(2 + 12),
3633            buffer.load(2 + 15),
3634            buffer.load(2 + 18),
3635            buffer.load(2 + 21),
3636            buffer.load(2 + 24),
3637        ];
3638
3639        // step 2: column FFTs
3640        self.butterfly9.perform_fft_contiguous(&mut scratch0);
3641        self.butterfly9.perform_fft_contiguous(&mut scratch1);
3642        self.butterfly9.perform_fft_contiguous(&mut scratch2);
3643
3644        // step 3: apply twiddle factors
3645        scratch1[1] = scratch1[1] * self.twiddles[0];
3646        scratch1[2] = scratch1[2] * self.twiddles[1];
3647        scratch1[3] = scratch1[3] * self.twiddles[2];
3648        scratch1[4] = scratch1[4] * self.twiddles[3];
3649        scratch1[5] = scratch1[5] * self.twiddles[4];
3650        scratch1[6] = scratch1[6] * self.twiddles[5];
3651        scratch1[7] = scratch1[7] * self.twiddles[6];
3652        scratch1[8] = scratch1[8] * self.twiddles[7];
3653        scratch2[1] = scratch2[1] * self.twiddles[1];
3654        scratch2[2] = scratch2[2] * self.twiddles[3];
3655        scratch2[3] = scratch2[3] * self.twiddles[5];
3656        scratch2[4] = scratch2[4] * self.twiddles[7];
3657        scratch2[5] = scratch2[5] * self.twiddles[8];
3658        scratch2[6] = scratch2[6] * self.twiddles[9];
3659        scratch2[7] = scratch2[7] * self.twiddles[10];
3660        scratch2[8] = scratch2[8] * self.twiddles[11];
3661
3662        // step 4: SKIPPED because the next FFTs will be non-contiguous
3663
3664        // step 5: row FFTs
3665        self.butterfly9.butterfly3.perform_fft_strided(
3666            &mut scratch0[0],
3667            &mut scratch1[0],
3668            &mut scratch2[0],
3669        );
3670        self.butterfly9.butterfly3.perform_fft_strided(
3671            &mut scratch0[1],
3672            &mut scratch1[1],
3673            &mut scratch2[1],
3674        );
3675        self.butterfly9.butterfly3.perform_fft_strided(
3676            &mut scratch0[2],
3677            &mut scratch1[2],
3678            &mut scratch2[2],
3679        );
3680        self.butterfly9.butterfly3.perform_fft_strided(
3681            &mut scratch0[3],
3682            &mut scratch1[3],
3683            &mut scratch2[3],
3684        );
3685        self.butterfly9.butterfly3.perform_fft_strided(
3686            &mut scratch0[4],
3687            &mut scratch1[4],
3688            &mut scratch2[4],
3689        );
3690        self.butterfly9.butterfly3.perform_fft_strided(
3691            &mut scratch0[5],
3692            &mut scratch1[5],
3693            &mut scratch2[5],
3694        );
3695        self.butterfly9.butterfly3.perform_fft_strided(
3696            &mut scratch0[6],
3697            &mut scratch1[6],
3698            &mut scratch2[6],
3699        );
3700        self.butterfly9.butterfly3.perform_fft_strided(
3701            &mut scratch0[7],
3702            &mut scratch1[7],
3703            &mut scratch2[7],
3704        );
3705        self.butterfly9.butterfly3.perform_fft_strided(
3706            &mut scratch0[8],
3707            &mut scratch1[8],
3708            &mut scratch2[8],
3709        );
3710
3711        // step 6: copy the result into the output. normally we'd need to do a transpose here, but we can skip it because we skipped the transpose in step 4
3712        buffer.store(scratch0[0], 0);
3713        buffer.store(scratch0[1], 1);
3714        buffer.store(scratch0[2], 2);
3715        buffer.store(scratch0[3], 3);
3716        buffer.store(scratch0[4], 4);
3717        buffer.store(scratch0[5], 5);
3718        buffer.store(scratch0[6], 6);
3719        buffer.store(scratch0[7], 7);
3720        buffer.store(scratch0[8], 8);
3721
3722        buffer.store(scratch1[0], 9 + 0);
3723        buffer.store(scratch1[1], 9 + 1);
3724        buffer.store(scratch1[2], 9 + 2);
3725        buffer.store(scratch1[3], 9 + 3);
3726        buffer.store(scratch1[4], 9 + 4);
3727        buffer.store(scratch1[5], 9 + 5);
3728        buffer.store(scratch1[6], 9 + 6);
3729        buffer.store(scratch1[7], 9 + 7);
3730        buffer.store(scratch1[8], 9 + 8);
3731
3732        buffer.store(scratch2[0], 18 + 0);
3733        buffer.store(scratch2[1], 18 + 1);
3734        buffer.store(scratch2[2], 18 + 2);
3735        buffer.store(scratch2[3], 18 + 3);
3736        buffer.store(scratch2[4], 18 + 4);
3737        buffer.store(scratch2[5], 18 + 5);
3738        buffer.store(scratch2[6], 18 + 6);
3739        buffer.store(scratch2[7], 18 + 7);
3740        buffer.store(scratch2[8], 18 + 8);
3741    }
3742}
3743
3744pub struct Butterfly29<T> {
3745    twiddle1: Complex<T>,
3746    twiddle2: Complex<T>,
3747    twiddle3: Complex<T>,
3748    twiddle4: Complex<T>,
3749    twiddle5: Complex<T>,
3750    twiddle6: Complex<T>,
3751    twiddle7: Complex<T>,
3752    twiddle8: Complex<T>,
3753    twiddle9: Complex<T>,
3754    twiddle10: Complex<T>,
3755    twiddle11: Complex<T>,
3756    twiddle12: Complex<T>,
3757    twiddle13: Complex<T>,
3758    twiddle14: Complex<T>,
3759    direction: FftDirection,
3760}
3761boilerplate_fft_butterfly!(Butterfly29, 29, |this: &Butterfly29<_>| this.direction);
3762impl<T: FftNum> Butterfly29<T> {
3763    pub fn new(direction: FftDirection) -> Self {
3764        let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 29, direction);
3765        let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 29, direction);
3766        let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 29, direction);
3767        let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 29, direction);
3768        let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 29, direction);
3769        let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 29, direction);
3770        let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 29, direction);
3771        let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 29, direction);
3772        let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 29, direction);
3773        let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 29, direction);
3774        let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 29, direction);
3775        let twiddle12: Complex<T> = twiddles::compute_twiddle(12, 29, direction);
3776        let twiddle13: Complex<T> = twiddles::compute_twiddle(13, 29, direction);
3777        let twiddle14: Complex<T> = twiddles::compute_twiddle(14, 29, direction);
3778        Self {
3779            twiddle1,
3780            twiddle2,
3781            twiddle3,
3782            twiddle4,
3783            twiddle5,
3784            twiddle6,
3785            twiddle7,
3786            twiddle8,
3787            twiddle9,
3788            twiddle10,
3789            twiddle11,
3790            twiddle12,
3791            twiddle13,
3792            twiddle14,
3793            direction,
3794        }
3795    }
3796
3797    #[inline(never)]
3798    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
3799        // This function was derived in the same manner as the butterflies for length 3, 5 and 7.
3800        // However, instead of doing it by hand the actual code is autogenerated
3801        // with the `genbutterflies.py` script in the `tools` directory.
3802        let x128p = buffer.load(1) + buffer.load(28);
3803        let x128n = buffer.load(1) - buffer.load(28);
3804        let x227p = buffer.load(2) + buffer.load(27);
3805        let x227n = buffer.load(2) - buffer.load(27);
3806        let x326p = buffer.load(3) + buffer.load(26);
3807        let x326n = buffer.load(3) - buffer.load(26);
3808        let x425p = buffer.load(4) + buffer.load(25);
3809        let x425n = buffer.load(4) - buffer.load(25);
3810        let x524p = buffer.load(5) + buffer.load(24);
3811        let x524n = buffer.load(5) - buffer.load(24);
3812        let x623p = buffer.load(6) + buffer.load(23);
3813        let x623n = buffer.load(6) - buffer.load(23);
3814        let x722p = buffer.load(7) + buffer.load(22);
3815        let x722n = buffer.load(7) - buffer.load(22);
3816        let x821p = buffer.load(8) + buffer.load(21);
3817        let x821n = buffer.load(8) - buffer.load(21);
3818        let x920p = buffer.load(9) + buffer.load(20);
3819        let x920n = buffer.load(9) - buffer.load(20);
3820        let x1019p = buffer.load(10) + buffer.load(19);
3821        let x1019n = buffer.load(10) - buffer.load(19);
3822        let x1118p = buffer.load(11) + buffer.load(18);
3823        let x1118n = buffer.load(11) - buffer.load(18);
3824        let x1217p = buffer.load(12) + buffer.load(17);
3825        let x1217n = buffer.load(12) - buffer.load(17);
3826        let x1316p = buffer.load(13) + buffer.load(16);
3827        let x1316n = buffer.load(13) - buffer.load(16);
3828        let x1415p = buffer.load(14) + buffer.load(15);
3829        let x1415n = buffer.load(14) - buffer.load(15);
3830        let sum = buffer.load(0)
3831            + x128p
3832            + x227p
3833            + x326p
3834            + x425p
3835            + x524p
3836            + x623p
3837            + x722p
3838            + x821p
3839            + x920p
3840            + x1019p
3841            + x1118p
3842            + x1217p
3843            + x1316p
3844            + x1415p;
3845        let b128re_a = buffer.load(0).re
3846            + self.twiddle1.re * x128p.re
3847            + self.twiddle2.re * x227p.re
3848            + self.twiddle3.re * x326p.re
3849            + self.twiddle4.re * x425p.re
3850            + self.twiddle5.re * x524p.re
3851            + self.twiddle6.re * x623p.re
3852            + self.twiddle7.re * x722p.re
3853            + self.twiddle8.re * x821p.re
3854            + self.twiddle9.re * x920p.re
3855            + self.twiddle10.re * x1019p.re
3856            + self.twiddle11.re * x1118p.re
3857            + self.twiddle12.re * x1217p.re
3858            + self.twiddle13.re * x1316p.re
3859            + self.twiddle14.re * x1415p.re;
3860        let b128re_b = self.twiddle1.im * x128n.im
3861            + self.twiddle2.im * x227n.im
3862            + self.twiddle3.im * x326n.im
3863            + self.twiddle4.im * x425n.im
3864            + self.twiddle5.im * x524n.im
3865            + self.twiddle6.im * x623n.im
3866            + self.twiddle7.im * x722n.im
3867            + self.twiddle8.im * x821n.im
3868            + self.twiddle9.im * x920n.im
3869            + self.twiddle10.im * x1019n.im
3870            + self.twiddle11.im * x1118n.im
3871            + self.twiddle12.im * x1217n.im
3872            + self.twiddle13.im * x1316n.im
3873            + self.twiddle14.im * x1415n.im;
3874        let b227re_a = buffer.load(0).re
3875            + self.twiddle2.re * x128p.re
3876            + self.twiddle4.re * x227p.re
3877            + self.twiddle6.re * x326p.re
3878            + self.twiddle8.re * x425p.re
3879            + self.twiddle10.re * x524p.re
3880            + self.twiddle12.re * x623p.re
3881            + self.twiddle14.re * x722p.re
3882            + self.twiddle13.re * x821p.re
3883            + self.twiddle11.re * x920p.re
3884            + self.twiddle9.re * x1019p.re
3885            + self.twiddle7.re * x1118p.re
3886            + self.twiddle5.re * x1217p.re
3887            + self.twiddle3.re * x1316p.re
3888            + self.twiddle1.re * x1415p.re;
3889        let b227re_b = self.twiddle2.im * x128n.im
3890            + self.twiddle4.im * x227n.im
3891            + self.twiddle6.im * x326n.im
3892            + self.twiddle8.im * x425n.im
3893            + self.twiddle10.im * x524n.im
3894            + self.twiddle12.im * x623n.im
3895            + self.twiddle14.im * x722n.im
3896            + -self.twiddle13.im * x821n.im
3897            + -self.twiddle11.im * x920n.im
3898            + -self.twiddle9.im * x1019n.im
3899            + -self.twiddle7.im * x1118n.im
3900            + -self.twiddle5.im * x1217n.im
3901            + -self.twiddle3.im * x1316n.im
3902            + -self.twiddle1.im * x1415n.im;
3903        let b326re_a = buffer.load(0).re
3904            + self.twiddle3.re * x128p.re
3905            + self.twiddle6.re * x227p.re
3906            + self.twiddle9.re * x326p.re
3907            + self.twiddle12.re * x425p.re
3908            + self.twiddle14.re * x524p.re
3909            + self.twiddle11.re * x623p.re
3910            + self.twiddle8.re * x722p.re
3911            + self.twiddle5.re * x821p.re
3912            + self.twiddle2.re * x920p.re
3913            + self.twiddle1.re * x1019p.re
3914            + self.twiddle4.re * x1118p.re
3915            + self.twiddle7.re * x1217p.re
3916            + self.twiddle10.re * x1316p.re
3917            + self.twiddle13.re * x1415p.re;
3918        let b326re_b = self.twiddle3.im * x128n.im
3919            + self.twiddle6.im * x227n.im
3920            + self.twiddle9.im * x326n.im
3921            + self.twiddle12.im * x425n.im
3922            + -self.twiddle14.im * x524n.im
3923            + -self.twiddle11.im * x623n.im
3924            + -self.twiddle8.im * x722n.im
3925            + -self.twiddle5.im * x821n.im
3926            + -self.twiddle2.im * x920n.im
3927            + self.twiddle1.im * x1019n.im
3928            + self.twiddle4.im * x1118n.im
3929            + self.twiddle7.im * x1217n.im
3930            + self.twiddle10.im * x1316n.im
3931            + self.twiddle13.im * x1415n.im;
3932        let b425re_a = buffer.load(0).re
3933            + self.twiddle4.re * x128p.re
3934            + self.twiddle8.re * x227p.re
3935            + self.twiddle12.re * x326p.re
3936            + self.twiddle13.re * x425p.re
3937            + self.twiddle9.re * x524p.re
3938            + self.twiddle5.re * x623p.re
3939            + self.twiddle1.re * x722p.re
3940            + self.twiddle3.re * x821p.re
3941            + self.twiddle7.re * x920p.re
3942            + self.twiddle11.re * x1019p.re
3943            + self.twiddle14.re * x1118p.re
3944            + self.twiddle10.re * x1217p.re
3945            + self.twiddle6.re * x1316p.re
3946            + self.twiddle2.re * x1415p.re;
3947        let b425re_b = self.twiddle4.im * x128n.im
3948            + self.twiddle8.im * x227n.im
3949            + self.twiddle12.im * x326n.im
3950            + -self.twiddle13.im * x425n.im
3951            + -self.twiddle9.im * x524n.im
3952            + -self.twiddle5.im * x623n.im
3953            + -self.twiddle1.im * x722n.im
3954            + self.twiddle3.im * x821n.im
3955            + self.twiddle7.im * x920n.im
3956            + self.twiddle11.im * x1019n.im
3957            + -self.twiddle14.im * x1118n.im
3958            + -self.twiddle10.im * x1217n.im
3959            + -self.twiddle6.im * x1316n.im
3960            + -self.twiddle2.im * x1415n.im;
3961        let b524re_a = buffer.load(0).re
3962            + self.twiddle5.re * x128p.re
3963            + self.twiddle10.re * x227p.re
3964            + self.twiddle14.re * x326p.re
3965            + self.twiddle9.re * x425p.re
3966            + self.twiddle4.re * x524p.re
3967            + self.twiddle1.re * x623p.re
3968            + self.twiddle6.re * x722p.re
3969            + self.twiddle11.re * x821p.re
3970            + self.twiddle13.re * x920p.re
3971            + self.twiddle8.re * x1019p.re
3972            + self.twiddle3.re * x1118p.re
3973            + self.twiddle2.re * x1217p.re
3974            + self.twiddle7.re * x1316p.re
3975            + self.twiddle12.re * x1415p.re;
3976        let b524re_b = self.twiddle5.im * x128n.im
3977            + self.twiddle10.im * x227n.im
3978            + -self.twiddle14.im * x326n.im
3979            + -self.twiddle9.im * x425n.im
3980            + -self.twiddle4.im * x524n.im
3981            + self.twiddle1.im * x623n.im
3982            + self.twiddle6.im * x722n.im
3983            + self.twiddle11.im * x821n.im
3984            + -self.twiddle13.im * x920n.im
3985            + -self.twiddle8.im * x1019n.im
3986            + -self.twiddle3.im * x1118n.im
3987            + self.twiddle2.im * x1217n.im
3988            + self.twiddle7.im * x1316n.im
3989            + self.twiddle12.im * x1415n.im;
3990        let b623re_a = buffer.load(0).re
3991            + self.twiddle6.re * x128p.re
3992            + self.twiddle12.re * x227p.re
3993            + self.twiddle11.re * x326p.re
3994            + self.twiddle5.re * x425p.re
3995            + self.twiddle1.re * x524p.re
3996            + self.twiddle7.re * x623p.re
3997            + self.twiddle13.re * x722p.re
3998            + self.twiddle10.re * x821p.re
3999            + self.twiddle4.re * x920p.re
4000            + self.twiddle2.re * x1019p.re
4001            + self.twiddle8.re * x1118p.re
4002            + self.twiddle14.re * x1217p.re
4003            + self.twiddle9.re * x1316p.re
4004            + self.twiddle3.re * x1415p.re;
4005        let b623re_b = self.twiddle6.im * x128n.im
4006            + self.twiddle12.im * x227n.im
4007            + -self.twiddle11.im * x326n.im
4008            + -self.twiddle5.im * x425n.im
4009            + self.twiddle1.im * x524n.im
4010            + self.twiddle7.im * x623n.im
4011            + self.twiddle13.im * x722n.im
4012            + -self.twiddle10.im * x821n.im
4013            + -self.twiddle4.im * x920n.im
4014            + self.twiddle2.im * x1019n.im
4015            + self.twiddle8.im * x1118n.im
4016            + self.twiddle14.im * x1217n.im
4017            + -self.twiddle9.im * x1316n.im
4018            + -self.twiddle3.im * x1415n.im;
4019        let b722re_a = buffer.load(0).re
4020            + self.twiddle7.re * x128p.re
4021            + self.twiddle14.re * x227p.re
4022            + self.twiddle8.re * x326p.re
4023            + self.twiddle1.re * x425p.re
4024            + self.twiddle6.re * x524p.re
4025            + self.twiddle13.re * x623p.re
4026            + self.twiddle9.re * x722p.re
4027            + self.twiddle2.re * x821p.re
4028            + self.twiddle5.re * x920p.re
4029            + self.twiddle12.re * x1019p.re
4030            + self.twiddle10.re * x1118p.re
4031            + self.twiddle3.re * x1217p.re
4032            + self.twiddle4.re * x1316p.re
4033            + self.twiddle11.re * x1415p.re;
4034        let b722re_b = self.twiddle7.im * x128n.im
4035            + self.twiddle14.im * x227n.im
4036            + -self.twiddle8.im * x326n.im
4037            + -self.twiddle1.im * x425n.im
4038            + self.twiddle6.im * x524n.im
4039            + self.twiddle13.im * x623n.im
4040            + -self.twiddle9.im * x722n.im
4041            + -self.twiddle2.im * x821n.im
4042            + self.twiddle5.im * x920n.im
4043            + self.twiddle12.im * x1019n.im
4044            + -self.twiddle10.im * x1118n.im
4045            + -self.twiddle3.im * x1217n.im
4046            + self.twiddle4.im * x1316n.im
4047            + self.twiddle11.im * x1415n.im;
4048        let b821re_a = buffer.load(0).re
4049            + self.twiddle8.re * x128p.re
4050            + self.twiddle13.re * x227p.re
4051            + self.twiddle5.re * x326p.re
4052            + self.twiddle3.re * x425p.re
4053            + self.twiddle11.re * x524p.re
4054            + self.twiddle10.re * x623p.re
4055            + self.twiddle2.re * x722p.re
4056            + self.twiddle6.re * x821p.re
4057            + self.twiddle14.re * x920p.re
4058            + self.twiddle7.re * x1019p.re
4059            + self.twiddle1.re * x1118p.re
4060            + self.twiddle9.re * x1217p.re
4061            + self.twiddle12.re * x1316p.re
4062            + self.twiddle4.re * x1415p.re;
4063        let b821re_b = self.twiddle8.im * x128n.im
4064            + -self.twiddle13.im * x227n.im
4065            + -self.twiddle5.im * x326n.im
4066            + self.twiddle3.im * x425n.im
4067            + self.twiddle11.im * x524n.im
4068            + -self.twiddle10.im * x623n.im
4069            + -self.twiddle2.im * x722n.im
4070            + self.twiddle6.im * x821n.im
4071            + self.twiddle14.im * x920n.im
4072            + -self.twiddle7.im * x1019n.im
4073            + self.twiddle1.im * x1118n.im
4074            + self.twiddle9.im * x1217n.im
4075            + -self.twiddle12.im * x1316n.im
4076            + -self.twiddle4.im * x1415n.im;
4077        let b920re_a = buffer.load(0).re
4078            + self.twiddle9.re * x128p.re
4079            + self.twiddle11.re * x227p.re
4080            + self.twiddle2.re * x326p.re
4081            + self.twiddle7.re * x425p.re
4082            + self.twiddle13.re * x524p.re
4083            + self.twiddle4.re * x623p.re
4084            + self.twiddle5.re * x722p.re
4085            + self.twiddle14.re * x821p.re
4086            + self.twiddle6.re * x920p.re
4087            + self.twiddle3.re * x1019p.re
4088            + self.twiddle12.re * x1118p.re
4089            + self.twiddle8.re * x1217p.re
4090            + self.twiddle1.re * x1316p.re
4091            + self.twiddle10.re * x1415p.re;
4092        let b920re_b = self.twiddle9.im * x128n.im
4093            + -self.twiddle11.im * x227n.im
4094            + -self.twiddle2.im * x326n.im
4095            + self.twiddle7.im * x425n.im
4096            + -self.twiddle13.im * x524n.im
4097            + -self.twiddle4.im * x623n.im
4098            + self.twiddle5.im * x722n.im
4099            + self.twiddle14.im * x821n.im
4100            + -self.twiddle6.im * x920n.im
4101            + self.twiddle3.im * x1019n.im
4102            + self.twiddle12.im * x1118n.im
4103            + -self.twiddle8.im * x1217n.im
4104            + self.twiddle1.im * x1316n.im
4105            + self.twiddle10.im * x1415n.im;
4106        let b1019re_a = buffer.load(0).re
4107            + self.twiddle10.re * x128p.re
4108            + self.twiddle9.re * x227p.re
4109            + self.twiddle1.re * x326p.re
4110            + self.twiddle11.re * x425p.re
4111            + self.twiddle8.re * x524p.re
4112            + self.twiddle2.re * x623p.re
4113            + self.twiddle12.re * x722p.re
4114            + self.twiddle7.re * x821p.re
4115            + self.twiddle3.re * x920p.re
4116            + self.twiddle13.re * x1019p.re
4117            + self.twiddle6.re * x1118p.re
4118            + self.twiddle4.re * x1217p.re
4119            + self.twiddle14.re * x1316p.re
4120            + self.twiddle5.re * x1415p.re;
4121        let b1019re_b = self.twiddle10.im * x128n.im
4122            + -self.twiddle9.im * x227n.im
4123            + self.twiddle1.im * x326n.im
4124            + self.twiddle11.im * x425n.im
4125            + -self.twiddle8.im * x524n.im
4126            + self.twiddle2.im * x623n.im
4127            + self.twiddle12.im * x722n.im
4128            + -self.twiddle7.im * x821n.im
4129            + self.twiddle3.im * x920n.im
4130            + self.twiddle13.im * x1019n.im
4131            + -self.twiddle6.im * x1118n.im
4132            + self.twiddle4.im * x1217n.im
4133            + self.twiddle14.im * x1316n.im
4134            + -self.twiddle5.im * x1415n.im;
4135        let b1118re_a = buffer.load(0).re
4136            + self.twiddle11.re * x128p.re
4137            + self.twiddle7.re * x227p.re
4138            + self.twiddle4.re * x326p.re
4139            + self.twiddle14.re * x425p.re
4140            + self.twiddle3.re * x524p.re
4141            + self.twiddle8.re * x623p.re
4142            + self.twiddle10.re * x722p.re
4143            + self.twiddle1.re * x821p.re
4144            + self.twiddle12.re * x920p.re
4145            + self.twiddle6.re * x1019p.re
4146            + self.twiddle5.re * x1118p.re
4147            + self.twiddle13.re * x1217p.re
4148            + self.twiddle2.re * x1316p.re
4149            + self.twiddle9.re * x1415p.re;
4150        let b1118re_b = self.twiddle11.im * x128n.im
4151            + -self.twiddle7.im * x227n.im
4152            + self.twiddle4.im * x326n.im
4153            + -self.twiddle14.im * x425n.im
4154            + -self.twiddle3.im * x524n.im
4155            + self.twiddle8.im * x623n.im
4156            + -self.twiddle10.im * x722n.im
4157            + self.twiddle1.im * x821n.im
4158            + self.twiddle12.im * x920n.im
4159            + -self.twiddle6.im * x1019n.im
4160            + self.twiddle5.im * x1118n.im
4161            + -self.twiddle13.im * x1217n.im
4162            + -self.twiddle2.im * x1316n.im
4163            + self.twiddle9.im * x1415n.im;
4164        let b1217re_a = buffer.load(0).re
4165            + self.twiddle12.re * x128p.re
4166            + self.twiddle5.re * x227p.re
4167            + self.twiddle7.re * x326p.re
4168            + self.twiddle10.re * x425p.re
4169            + self.twiddle2.re * x524p.re
4170            + self.twiddle14.re * x623p.re
4171            + self.twiddle3.re * x722p.re
4172            + self.twiddle9.re * x821p.re
4173            + self.twiddle8.re * x920p.re
4174            + self.twiddle4.re * x1019p.re
4175            + self.twiddle13.re * x1118p.re
4176            + self.twiddle1.re * x1217p.re
4177            + self.twiddle11.re * x1316p.re
4178            + self.twiddle6.re * x1415p.re;
4179        let b1217re_b = self.twiddle12.im * x128n.im
4180            + -self.twiddle5.im * x227n.im
4181            + self.twiddle7.im * x326n.im
4182            + -self.twiddle10.im * x425n.im
4183            + self.twiddle2.im * x524n.im
4184            + self.twiddle14.im * x623n.im
4185            + -self.twiddle3.im * x722n.im
4186            + self.twiddle9.im * x821n.im
4187            + -self.twiddle8.im * x920n.im
4188            + self.twiddle4.im * x1019n.im
4189            + -self.twiddle13.im * x1118n.im
4190            + -self.twiddle1.im * x1217n.im
4191            + self.twiddle11.im * x1316n.im
4192            + -self.twiddle6.im * x1415n.im;
4193        let b1316re_a = buffer.load(0).re
4194            + self.twiddle13.re * x128p.re
4195            + self.twiddle3.re * x227p.re
4196            + self.twiddle10.re * x326p.re
4197            + self.twiddle6.re * x425p.re
4198            + self.twiddle7.re * x524p.re
4199            + self.twiddle9.re * x623p.re
4200            + self.twiddle4.re * x722p.re
4201            + self.twiddle12.re * x821p.re
4202            + self.twiddle1.re * x920p.re
4203            + self.twiddle14.re * x1019p.re
4204            + self.twiddle2.re * x1118p.re
4205            + self.twiddle11.re * x1217p.re
4206            + self.twiddle5.re * x1316p.re
4207            + self.twiddle8.re * x1415p.re;
4208        let b1316re_b = self.twiddle13.im * x128n.im
4209            + -self.twiddle3.im * x227n.im
4210            + self.twiddle10.im * x326n.im
4211            + -self.twiddle6.im * x425n.im
4212            + self.twiddle7.im * x524n.im
4213            + -self.twiddle9.im * x623n.im
4214            + self.twiddle4.im * x722n.im
4215            + -self.twiddle12.im * x821n.im
4216            + self.twiddle1.im * x920n.im
4217            + self.twiddle14.im * x1019n.im
4218            + -self.twiddle2.im * x1118n.im
4219            + self.twiddle11.im * x1217n.im
4220            + -self.twiddle5.im * x1316n.im
4221            + self.twiddle8.im * x1415n.im;
4222        let b1415re_a = buffer.load(0).re
4223            + self.twiddle14.re * x128p.re
4224            + self.twiddle1.re * x227p.re
4225            + self.twiddle13.re * x326p.re
4226            + self.twiddle2.re * x425p.re
4227            + self.twiddle12.re * x524p.re
4228            + self.twiddle3.re * x623p.re
4229            + self.twiddle11.re * x722p.re
4230            + self.twiddle4.re * x821p.re
4231            + self.twiddle10.re * x920p.re
4232            + self.twiddle5.re * x1019p.re
4233            + self.twiddle9.re * x1118p.re
4234            + self.twiddle6.re * x1217p.re
4235            + self.twiddle8.re * x1316p.re
4236            + self.twiddle7.re * x1415p.re;
4237        let b1415re_b = self.twiddle14.im * x128n.im
4238            + -self.twiddle1.im * x227n.im
4239            + self.twiddle13.im * x326n.im
4240            + -self.twiddle2.im * x425n.im
4241            + self.twiddle12.im * x524n.im
4242            + -self.twiddle3.im * x623n.im
4243            + self.twiddle11.im * x722n.im
4244            + -self.twiddle4.im * x821n.im
4245            + self.twiddle10.im * x920n.im
4246            + -self.twiddle5.im * x1019n.im
4247            + self.twiddle9.im * x1118n.im
4248            + -self.twiddle6.im * x1217n.im
4249            + self.twiddle8.im * x1316n.im
4250            + -self.twiddle7.im * x1415n.im;
4251
4252        let b128im_a = buffer.load(0).im
4253            + self.twiddle1.re * x128p.im
4254            + self.twiddle2.re * x227p.im
4255            + self.twiddle3.re * x326p.im
4256            + self.twiddle4.re * x425p.im
4257            + self.twiddle5.re * x524p.im
4258            + self.twiddle6.re * x623p.im
4259            + self.twiddle7.re * x722p.im
4260            + self.twiddle8.re * x821p.im
4261            + self.twiddle9.re * x920p.im
4262            + self.twiddle10.re * x1019p.im
4263            + self.twiddle11.re * x1118p.im
4264            + self.twiddle12.re * x1217p.im
4265            + self.twiddle13.re * x1316p.im
4266            + self.twiddle14.re * x1415p.im;
4267        let b128im_b = self.twiddle1.im * x128n.re
4268            + self.twiddle2.im * x227n.re
4269            + self.twiddle3.im * x326n.re
4270            + self.twiddle4.im * x425n.re
4271            + self.twiddle5.im * x524n.re
4272            + self.twiddle6.im * x623n.re
4273            + self.twiddle7.im * x722n.re
4274            + self.twiddle8.im * x821n.re
4275            + self.twiddle9.im * x920n.re
4276            + self.twiddle10.im * x1019n.re
4277            + self.twiddle11.im * x1118n.re
4278            + self.twiddle12.im * x1217n.re
4279            + self.twiddle13.im * x1316n.re
4280            + self.twiddle14.im * x1415n.re;
4281        let b227im_a = buffer.load(0).im
4282            + self.twiddle2.re * x128p.im
4283            + self.twiddle4.re * x227p.im
4284            + self.twiddle6.re * x326p.im
4285            + self.twiddle8.re * x425p.im
4286            + self.twiddle10.re * x524p.im
4287            + self.twiddle12.re * x623p.im
4288            + self.twiddle14.re * x722p.im
4289            + self.twiddle13.re * x821p.im
4290            + self.twiddle11.re * x920p.im
4291            + self.twiddle9.re * x1019p.im
4292            + self.twiddle7.re * x1118p.im
4293            + self.twiddle5.re * x1217p.im
4294            + self.twiddle3.re * x1316p.im
4295            + self.twiddle1.re * x1415p.im;
4296        let b227im_b = self.twiddle2.im * x128n.re
4297            + self.twiddle4.im * x227n.re
4298            + self.twiddle6.im * x326n.re
4299            + self.twiddle8.im * x425n.re
4300            + self.twiddle10.im * x524n.re
4301            + self.twiddle12.im * x623n.re
4302            + self.twiddle14.im * x722n.re
4303            + -self.twiddle13.im * x821n.re
4304            + -self.twiddle11.im * x920n.re
4305            + -self.twiddle9.im * x1019n.re
4306            + -self.twiddle7.im * x1118n.re
4307            + -self.twiddle5.im * x1217n.re
4308            + -self.twiddle3.im * x1316n.re
4309            + -self.twiddle1.im * x1415n.re;
4310        let b326im_a = buffer.load(0).im
4311            + self.twiddle3.re * x128p.im
4312            + self.twiddle6.re * x227p.im
4313            + self.twiddle9.re * x326p.im
4314            + self.twiddle12.re * x425p.im
4315            + self.twiddle14.re * x524p.im
4316            + self.twiddle11.re * x623p.im
4317            + self.twiddle8.re * x722p.im
4318            + self.twiddle5.re * x821p.im
4319            + self.twiddle2.re * x920p.im
4320            + self.twiddle1.re * x1019p.im
4321            + self.twiddle4.re * x1118p.im
4322            + self.twiddle7.re * x1217p.im
4323            + self.twiddle10.re * x1316p.im
4324            + self.twiddle13.re * x1415p.im;
4325        let b326im_b = self.twiddle3.im * x128n.re
4326            + self.twiddle6.im * x227n.re
4327            + self.twiddle9.im * x326n.re
4328            + self.twiddle12.im * x425n.re
4329            + -self.twiddle14.im * x524n.re
4330            + -self.twiddle11.im * x623n.re
4331            + -self.twiddle8.im * x722n.re
4332            + -self.twiddle5.im * x821n.re
4333            + -self.twiddle2.im * x920n.re
4334            + self.twiddle1.im * x1019n.re
4335            + self.twiddle4.im * x1118n.re
4336            + self.twiddle7.im * x1217n.re
4337            + self.twiddle10.im * x1316n.re
4338            + self.twiddle13.im * x1415n.re;
4339        let b425im_a = buffer.load(0).im
4340            + self.twiddle4.re * x128p.im
4341            + self.twiddle8.re * x227p.im
4342            + self.twiddle12.re * x326p.im
4343            + self.twiddle13.re * x425p.im
4344            + self.twiddle9.re * x524p.im
4345            + self.twiddle5.re * x623p.im
4346            + self.twiddle1.re * x722p.im
4347            + self.twiddle3.re * x821p.im
4348            + self.twiddle7.re * x920p.im
4349            + self.twiddle11.re * x1019p.im
4350            + self.twiddle14.re * x1118p.im
4351            + self.twiddle10.re * x1217p.im
4352            + self.twiddle6.re * x1316p.im
4353            + self.twiddle2.re * x1415p.im;
4354        let b425im_b = self.twiddle4.im * x128n.re
4355            + self.twiddle8.im * x227n.re
4356            + self.twiddle12.im * x326n.re
4357            + -self.twiddle13.im * x425n.re
4358            + -self.twiddle9.im * x524n.re
4359            + -self.twiddle5.im * x623n.re
4360            + -self.twiddle1.im * x722n.re
4361            + self.twiddle3.im * x821n.re
4362            + self.twiddle7.im * x920n.re
4363            + self.twiddle11.im * x1019n.re
4364            + -self.twiddle14.im * x1118n.re
4365            + -self.twiddle10.im * x1217n.re
4366            + -self.twiddle6.im * x1316n.re
4367            + -self.twiddle2.im * x1415n.re;
4368        let b524im_a = buffer.load(0).im
4369            + self.twiddle5.re * x128p.im
4370            + self.twiddle10.re * x227p.im
4371            + self.twiddle14.re * x326p.im
4372            + self.twiddle9.re * x425p.im
4373            + self.twiddle4.re * x524p.im
4374            + self.twiddle1.re * x623p.im
4375            + self.twiddle6.re * x722p.im
4376            + self.twiddle11.re * x821p.im
4377            + self.twiddle13.re * x920p.im
4378            + self.twiddle8.re * x1019p.im
4379            + self.twiddle3.re * x1118p.im
4380            + self.twiddle2.re * x1217p.im
4381            + self.twiddle7.re * x1316p.im
4382            + self.twiddle12.re * x1415p.im;
4383        let b524im_b = self.twiddle5.im * x128n.re
4384            + self.twiddle10.im * x227n.re
4385            + -self.twiddle14.im * x326n.re
4386            + -self.twiddle9.im * x425n.re
4387            + -self.twiddle4.im * x524n.re
4388            + self.twiddle1.im * x623n.re
4389            + self.twiddle6.im * x722n.re
4390            + self.twiddle11.im * x821n.re
4391            + -self.twiddle13.im * x920n.re
4392            + -self.twiddle8.im * x1019n.re
4393            + -self.twiddle3.im * x1118n.re
4394            + self.twiddle2.im * x1217n.re
4395            + self.twiddle7.im * x1316n.re
4396            + self.twiddle12.im * x1415n.re;
4397        let b623im_a = buffer.load(0).im
4398            + self.twiddle6.re * x128p.im
4399            + self.twiddle12.re * x227p.im
4400            + self.twiddle11.re * x326p.im
4401            + self.twiddle5.re * x425p.im
4402            + self.twiddle1.re * x524p.im
4403            + self.twiddle7.re * x623p.im
4404            + self.twiddle13.re * x722p.im
4405            + self.twiddle10.re * x821p.im
4406            + self.twiddle4.re * x920p.im
4407            + self.twiddle2.re * x1019p.im
4408            + self.twiddle8.re * x1118p.im
4409            + self.twiddle14.re * x1217p.im
4410            + self.twiddle9.re * x1316p.im
4411            + self.twiddle3.re * x1415p.im;
4412        let b623im_b = self.twiddle6.im * x128n.re
4413            + self.twiddle12.im * x227n.re
4414            + -self.twiddle11.im * x326n.re
4415            + -self.twiddle5.im * x425n.re
4416            + self.twiddle1.im * x524n.re
4417            + self.twiddle7.im * x623n.re
4418            + self.twiddle13.im * x722n.re
4419            + -self.twiddle10.im * x821n.re
4420            + -self.twiddle4.im * x920n.re
4421            + self.twiddle2.im * x1019n.re
4422            + self.twiddle8.im * x1118n.re
4423            + self.twiddle14.im * x1217n.re
4424            + -self.twiddle9.im * x1316n.re
4425            + -self.twiddle3.im * x1415n.re;
4426        let b722im_a = buffer.load(0).im
4427            + self.twiddle7.re * x128p.im
4428            + self.twiddle14.re * x227p.im
4429            + self.twiddle8.re * x326p.im
4430            + self.twiddle1.re * x425p.im
4431            + self.twiddle6.re * x524p.im
4432            + self.twiddle13.re * x623p.im
4433            + self.twiddle9.re * x722p.im
4434            + self.twiddle2.re * x821p.im
4435            + self.twiddle5.re * x920p.im
4436            + self.twiddle12.re * x1019p.im
4437            + self.twiddle10.re * x1118p.im
4438            + self.twiddle3.re * x1217p.im
4439            + self.twiddle4.re * x1316p.im
4440            + self.twiddle11.re * x1415p.im;
4441        let b722im_b = self.twiddle7.im * x128n.re
4442            + self.twiddle14.im * x227n.re
4443            + -self.twiddle8.im * x326n.re
4444            + -self.twiddle1.im * x425n.re
4445            + self.twiddle6.im * x524n.re
4446            + self.twiddle13.im * x623n.re
4447            + -self.twiddle9.im * x722n.re
4448            + -self.twiddle2.im * x821n.re
4449            + self.twiddle5.im * x920n.re
4450            + self.twiddle12.im * x1019n.re
4451            + -self.twiddle10.im * x1118n.re
4452            + -self.twiddle3.im * x1217n.re
4453            + self.twiddle4.im * x1316n.re
4454            + self.twiddle11.im * x1415n.re;
4455        let b821im_a = buffer.load(0).im
4456            + self.twiddle8.re * x128p.im
4457            + self.twiddle13.re * x227p.im
4458            + self.twiddle5.re * x326p.im
4459            + self.twiddle3.re * x425p.im
4460            + self.twiddle11.re * x524p.im
4461            + self.twiddle10.re * x623p.im
4462            + self.twiddle2.re * x722p.im
4463            + self.twiddle6.re * x821p.im
4464            + self.twiddle14.re * x920p.im
4465            + self.twiddle7.re * x1019p.im
4466            + self.twiddle1.re * x1118p.im
4467            + self.twiddle9.re * x1217p.im
4468            + self.twiddle12.re * x1316p.im
4469            + self.twiddle4.re * x1415p.im;
4470        let b821im_b = self.twiddle8.im * x128n.re
4471            + -self.twiddle13.im * x227n.re
4472            + -self.twiddle5.im * x326n.re
4473            + self.twiddle3.im * x425n.re
4474            + self.twiddle11.im * x524n.re
4475            + -self.twiddle10.im * x623n.re
4476            + -self.twiddle2.im * x722n.re
4477            + self.twiddle6.im * x821n.re
4478            + self.twiddle14.im * x920n.re
4479            + -self.twiddle7.im * x1019n.re
4480            + self.twiddle1.im * x1118n.re
4481            + self.twiddle9.im * x1217n.re
4482            + -self.twiddle12.im * x1316n.re
4483            + -self.twiddle4.im * x1415n.re;
4484        let b920im_a = buffer.load(0).im
4485            + self.twiddle9.re * x128p.im
4486            + self.twiddle11.re * x227p.im
4487            + self.twiddle2.re * x326p.im
4488            + self.twiddle7.re * x425p.im
4489            + self.twiddle13.re * x524p.im
4490            + self.twiddle4.re * x623p.im
4491            + self.twiddle5.re * x722p.im
4492            + self.twiddle14.re * x821p.im
4493            + self.twiddle6.re * x920p.im
4494            + self.twiddle3.re * x1019p.im
4495            + self.twiddle12.re * x1118p.im
4496            + self.twiddle8.re * x1217p.im
4497            + self.twiddle1.re * x1316p.im
4498            + self.twiddle10.re * x1415p.im;
4499        let b920im_b = self.twiddle9.im * x128n.re
4500            + -self.twiddle11.im * x227n.re
4501            + -self.twiddle2.im * x326n.re
4502            + self.twiddle7.im * x425n.re
4503            + -self.twiddle13.im * x524n.re
4504            + -self.twiddle4.im * x623n.re
4505            + self.twiddle5.im * x722n.re
4506            + self.twiddle14.im * x821n.re
4507            + -self.twiddle6.im * x920n.re
4508            + self.twiddle3.im * x1019n.re
4509            + self.twiddle12.im * x1118n.re
4510            + -self.twiddle8.im * x1217n.re
4511            + self.twiddle1.im * x1316n.re
4512            + self.twiddle10.im * x1415n.re;
4513        let b1019im_a = buffer.load(0).im
4514            + self.twiddle10.re * x128p.im
4515            + self.twiddle9.re * x227p.im
4516            + self.twiddle1.re * x326p.im
4517            + self.twiddle11.re * x425p.im
4518            + self.twiddle8.re * x524p.im
4519            + self.twiddle2.re * x623p.im
4520            + self.twiddle12.re * x722p.im
4521            + self.twiddle7.re * x821p.im
4522            + self.twiddle3.re * x920p.im
4523            + self.twiddle13.re * x1019p.im
4524            + self.twiddle6.re * x1118p.im
4525            + self.twiddle4.re * x1217p.im
4526            + self.twiddle14.re * x1316p.im
4527            + self.twiddle5.re * x1415p.im;
4528        let b1019im_b = self.twiddle10.im * x128n.re
4529            + -self.twiddle9.im * x227n.re
4530            + self.twiddle1.im * x326n.re
4531            + self.twiddle11.im * x425n.re
4532            + -self.twiddle8.im * x524n.re
4533            + self.twiddle2.im * x623n.re
4534            + self.twiddle12.im * x722n.re
4535            + -self.twiddle7.im * x821n.re
4536            + self.twiddle3.im * x920n.re
4537            + self.twiddle13.im * x1019n.re
4538            + -self.twiddle6.im * x1118n.re
4539            + self.twiddle4.im * x1217n.re
4540            + self.twiddle14.im * x1316n.re
4541            + -self.twiddle5.im * x1415n.re;
4542        let b1118im_a = buffer.load(0).im
4543            + self.twiddle11.re * x128p.im
4544            + self.twiddle7.re * x227p.im
4545            + self.twiddle4.re * x326p.im
4546            + self.twiddle14.re * x425p.im
4547            + self.twiddle3.re * x524p.im
4548            + self.twiddle8.re * x623p.im
4549            + self.twiddle10.re * x722p.im
4550            + self.twiddle1.re * x821p.im
4551            + self.twiddle12.re * x920p.im
4552            + self.twiddle6.re * x1019p.im
4553            + self.twiddle5.re * x1118p.im
4554            + self.twiddle13.re * x1217p.im
4555            + self.twiddle2.re * x1316p.im
4556            + self.twiddle9.re * x1415p.im;
4557        let b1118im_b = self.twiddle11.im * x128n.re
4558            + -self.twiddle7.im * x227n.re
4559            + self.twiddle4.im * x326n.re
4560            + -self.twiddle14.im * x425n.re
4561            + -self.twiddle3.im * x524n.re
4562            + self.twiddle8.im * x623n.re
4563            + -self.twiddle10.im * x722n.re
4564            + self.twiddle1.im * x821n.re
4565            + self.twiddle12.im * x920n.re
4566            + -self.twiddle6.im * x1019n.re
4567            + self.twiddle5.im * x1118n.re
4568            + -self.twiddle13.im * x1217n.re
4569            + -self.twiddle2.im * x1316n.re
4570            + self.twiddle9.im * x1415n.re;
4571        let b1217im_a = buffer.load(0).im
4572            + self.twiddle12.re * x128p.im
4573            + self.twiddle5.re * x227p.im
4574            + self.twiddle7.re * x326p.im
4575            + self.twiddle10.re * x425p.im
4576            + self.twiddle2.re * x524p.im
4577            + self.twiddle14.re * x623p.im
4578            + self.twiddle3.re * x722p.im
4579            + self.twiddle9.re * x821p.im
4580            + self.twiddle8.re * x920p.im
4581            + self.twiddle4.re * x1019p.im
4582            + self.twiddle13.re * x1118p.im
4583            + self.twiddle1.re * x1217p.im
4584            + self.twiddle11.re * x1316p.im
4585            + self.twiddle6.re * x1415p.im;
4586        let b1217im_b = self.twiddle12.im * x128n.re
4587            + -self.twiddle5.im * x227n.re
4588            + self.twiddle7.im * x326n.re
4589            + -self.twiddle10.im * x425n.re
4590            + self.twiddle2.im * x524n.re
4591            + self.twiddle14.im * x623n.re
4592            + -self.twiddle3.im * x722n.re
4593            + self.twiddle9.im * x821n.re
4594            + -self.twiddle8.im * x920n.re
4595            + self.twiddle4.im * x1019n.re
4596            + -self.twiddle13.im * x1118n.re
4597            + -self.twiddle1.im * x1217n.re
4598            + self.twiddle11.im * x1316n.re
4599            + -self.twiddle6.im * x1415n.re;
4600        let b1316im_a = buffer.load(0).im
4601            + self.twiddle13.re * x128p.im
4602            + self.twiddle3.re * x227p.im
4603            + self.twiddle10.re * x326p.im
4604            + self.twiddle6.re * x425p.im
4605            + self.twiddle7.re * x524p.im
4606            + self.twiddle9.re * x623p.im
4607            + self.twiddle4.re * x722p.im
4608            + self.twiddle12.re * x821p.im
4609            + self.twiddle1.re * x920p.im
4610            + self.twiddle14.re * x1019p.im
4611            + self.twiddle2.re * x1118p.im
4612            + self.twiddle11.re * x1217p.im
4613            + self.twiddle5.re * x1316p.im
4614            + self.twiddle8.re * x1415p.im;
4615        let b1316im_b = self.twiddle13.im * x128n.re
4616            + -self.twiddle3.im * x227n.re
4617            + self.twiddle10.im * x326n.re
4618            + -self.twiddle6.im * x425n.re
4619            + self.twiddle7.im * x524n.re
4620            + -self.twiddle9.im * x623n.re
4621            + self.twiddle4.im * x722n.re
4622            + -self.twiddle12.im * x821n.re
4623            + self.twiddle1.im * x920n.re
4624            + self.twiddle14.im * x1019n.re
4625            + -self.twiddle2.im * x1118n.re
4626            + self.twiddle11.im * x1217n.re
4627            + -self.twiddle5.im * x1316n.re
4628            + self.twiddle8.im * x1415n.re;
4629        let b1415im_a = buffer.load(0).im
4630            + self.twiddle14.re * x128p.im
4631            + self.twiddle1.re * x227p.im
4632            + self.twiddle13.re * x326p.im
4633            + self.twiddle2.re * x425p.im
4634            + self.twiddle12.re * x524p.im
4635            + self.twiddle3.re * x623p.im
4636            + self.twiddle11.re * x722p.im
4637            + self.twiddle4.re * x821p.im
4638            + self.twiddle10.re * x920p.im
4639            + self.twiddle5.re * x1019p.im
4640            + self.twiddle9.re * x1118p.im
4641            + self.twiddle6.re * x1217p.im
4642            + self.twiddle8.re * x1316p.im
4643            + self.twiddle7.re * x1415p.im;
4644        let b1415im_b = self.twiddle14.im * x128n.re
4645            + -self.twiddle1.im * x227n.re
4646            + self.twiddle13.im * x326n.re
4647            + -self.twiddle2.im * x425n.re
4648            + self.twiddle12.im * x524n.re
4649            + -self.twiddle3.im * x623n.re
4650            + self.twiddle11.im * x722n.re
4651            + -self.twiddle4.im * x821n.re
4652            + self.twiddle10.im * x920n.re
4653            + -self.twiddle5.im * x1019n.re
4654            + self.twiddle9.im * x1118n.re
4655            + -self.twiddle6.im * x1217n.re
4656            + self.twiddle8.im * x1316n.re
4657            + -self.twiddle7.im * x1415n.re;
4658
4659        let out1re = b128re_a - b128re_b;
4660        let out1im = b128im_a + b128im_b;
4661        let out2re = b227re_a - b227re_b;
4662        let out2im = b227im_a + b227im_b;
4663        let out3re = b326re_a - b326re_b;
4664        let out3im = b326im_a + b326im_b;
4665        let out4re = b425re_a - b425re_b;
4666        let out4im = b425im_a + b425im_b;
4667        let out5re = b524re_a - b524re_b;
4668        let out5im = b524im_a + b524im_b;
4669        let out6re = b623re_a - b623re_b;
4670        let out6im = b623im_a + b623im_b;
4671        let out7re = b722re_a - b722re_b;
4672        let out7im = b722im_a + b722im_b;
4673        let out8re = b821re_a - b821re_b;
4674        let out8im = b821im_a + b821im_b;
4675        let out9re = b920re_a - b920re_b;
4676        let out9im = b920im_a + b920im_b;
4677        let out10re = b1019re_a - b1019re_b;
4678        let out10im = b1019im_a + b1019im_b;
4679        let out11re = b1118re_a - b1118re_b;
4680        let out11im = b1118im_a + b1118im_b;
4681        let out12re = b1217re_a - b1217re_b;
4682        let out12im = b1217im_a + b1217im_b;
4683        let out13re = b1316re_a - b1316re_b;
4684        let out13im = b1316im_a + b1316im_b;
4685        let out14re = b1415re_a - b1415re_b;
4686        let out14im = b1415im_a + b1415im_b;
4687        let out15re = b1415re_a + b1415re_b;
4688        let out15im = b1415im_a - b1415im_b;
4689        let out16re = b1316re_a + b1316re_b;
4690        let out16im = b1316im_a - b1316im_b;
4691        let out17re = b1217re_a + b1217re_b;
4692        let out17im = b1217im_a - b1217im_b;
4693        let out18re = b1118re_a + b1118re_b;
4694        let out18im = b1118im_a - b1118im_b;
4695        let out19re = b1019re_a + b1019re_b;
4696        let out19im = b1019im_a - b1019im_b;
4697        let out20re = b920re_a + b920re_b;
4698        let out20im = b920im_a - b920im_b;
4699        let out21re = b821re_a + b821re_b;
4700        let out21im = b821im_a - b821im_b;
4701        let out22re = b722re_a + b722re_b;
4702        let out22im = b722im_a - b722im_b;
4703        let out23re = b623re_a + b623re_b;
4704        let out23im = b623im_a - b623im_b;
4705        let out24re = b524re_a + b524re_b;
4706        let out24im = b524im_a - b524im_b;
4707        let out25re = b425re_a + b425re_b;
4708        let out25im = b425im_a - b425im_b;
4709        let out26re = b326re_a + b326re_b;
4710        let out26im = b326im_a - b326im_b;
4711        let out27re = b227re_a + b227re_b;
4712        let out27im = b227im_a - b227im_b;
4713        let out28re = b128re_a + b128re_b;
4714        let out28im = b128im_a - b128im_b;
4715        buffer.store(sum, 0);
4716        buffer.store(
4717            Complex {
4718                re: out1re,
4719                im: out1im,
4720            },
4721            1,
4722        );
4723        buffer.store(
4724            Complex {
4725                re: out2re,
4726                im: out2im,
4727            },
4728            2,
4729        );
4730        buffer.store(
4731            Complex {
4732                re: out3re,
4733                im: out3im,
4734            },
4735            3,
4736        );
4737        buffer.store(
4738            Complex {
4739                re: out4re,
4740                im: out4im,
4741            },
4742            4,
4743        );
4744        buffer.store(
4745            Complex {
4746                re: out5re,
4747                im: out5im,
4748            },
4749            5,
4750        );
4751        buffer.store(
4752            Complex {
4753                re: out6re,
4754                im: out6im,
4755            },
4756            6,
4757        );
4758        buffer.store(
4759            Complex {
4760                re: out7re,
4761                im: out7im,
4762            },
4763            7,
4764        );
4765        buffer.store(
4766            Complex {
4767                re: out8re,
4768                im: out8im,
4769            },
4770            8,
4771        );
4772        buffer.store(
4773            Complex {
4774                re: out9re,
4775                im: out9im,
4776            },
4777            9,
4778        );
4779        buffer.store(
4780            Complex {
4781                re: out10re,
4782                im: out10im,
4783            },
4784            10,
4785        );
4786        buffer.store(
4787            Complex {
4788                re: out11re,
4789                im: out11im,
4790            },
4791            11,
4792        );
4793        buffer.store(
4794            Complex {
4795                re: out12re,
4796                im: out12im,
4797            },
4798            12,
4799        );
4800        buffer.store(
4801            Complex {
4802                re: out13re,
4803                im: out13im,
4804            },
4805            13,
4806        );
4807        buffer.store(
4808            Complex {
4809                re: out14re,
4810                im: out14im,
4811            },
4812            14,
4813        );
4814        buffer.store(
4815            Complex {
4816                re: out15re,
4817                im: out15im,
4818            },
4819            15,
4820        );
4821        buffer.store(
4822            Complex {
4823                re: out16re,
4824                im: out16im,
4825            },
4826            16,
4827        );
4828        buffer.store(
4829            Complex {
4830                re: out17re,
4831                im: out17im,
4832            },
4833            17,
4834        );
4835        buffer.store(
4836            Complex {
4837                re: out18re,
4838                im: out18im,
4839            },
4840            18,
4841        );
4842        buffer.store(
4843            Complex {
4844                re: out19re,
4845                im: out19im,
4846            },
4847            19,
4848        );
4849        buffer.store(
4850            Complex {
4851                re: out20re,
4852                im: out20im,
4853            },
4854            20,
4855        );
4856        buffer.store(
4857            Complex {
4858                re: out21re,
4859                im: out21im,
4860            },
4861            21,
4862        );
4863        buffer.store(
4864            Complex {
4865                re: out22re,
4866                im: out22im,
4867            },
4868            22,
4869        );
4870        buffer.store(
4871            Complex {
4872                re: out23re,
4873                im: out23im,
4874            },
4875            23,
4876        );
4877        buffer.store(
4878            Complex {
4879                re: out24re,
4880                im: out24im,
4881            },
4882            24,
4883        );
4884        buffer.store(
4885            Complex {
4886                re: out25re,
4887                im: out25im,
4888            },
4889            25,
4890        );
4891        buffer.store(
4892            Complex {
4893                re: out26re,
4894                im: out26im,
4895            },
4896            26,
4897        );
4898        buffer.store(
4899            Complex {
4900                re: out27re,
4901                im: out27im,
4902            },
4903            27,
4904        );
4905        buffer.store(
4906            Complex {
4907                re: out28re,
4908                im: out28im,
4909            },
4910            28,
4911        );
4912    }
4913}
4914pub struct Butterfly31<T> {
4915    twiddle1: Complex<T>,
4916    twiddle2: Complex<T>,
4917    twiddle3: Complex<T>,
4918    twiddle4: Complex<T>,
4919    twiddle5: Complex<T>,
4920    twiddle6: Complex<T>,
4921    twiddle7: Complex<T>,
4922    twiddle8: Complex<T>,
4923    twiddle9: Complex<T>,
4924    twiddle10: Complex<T>,
4925    twiddle11: Complex<T>,
4926    twiddle12: Complex<T>,
4927    twiddle13: Complex<T>,
4928    twiddle14: Complex<T>,
4929    twiddle15: Complex<T>,
4930    direction: FftDirection,
4931}
4932boilerplate_fft_butterfly!(Butterfly31, 31, |this: &Butterfly31<_>| this.direction);
4933impl<T: FftNum> Butterfly31<T> {
4934    pub fn new(direction: FftDirection) -> Self {
4935        let twiddle1: Complex<T> = twiddles::compute_twiddle(1, 31, direction);
4936        let twiddle2: Complex<T> = twiddles::compute_twiddle(2, 31, direction);
4937        let twiddle3: Complex<T> = twiddles::compute_twiddle(3, 31, direction);
4938        let twiddle4: Complex<T> = twiddles::compute_twiddle(4, 31, direction);
4939        let twiddle5: Complex<T> = twiddles::compute_twiddle(5, 31, direction);
4940        let twiddle6: Complex<T> = twiddles::compute_twiddle(6, 31, direction);
4941        let twiddle7: Complex<T> = twiddles::compute_twiddle(7, 31, direction);
4942        let twiddle8: Complex<T> = twiddles::compute_twiddle(8, 31, direction);
4943        let twiddle9: Complex<T> = twiddles::compute_twiddle(9, 31, direction);
4944        let twiddle10: Complex<T> = twiddles::compute_twiddle(10, 31, direction);
4945        let twiddle11: Complex<T> = twiddles::compute_twiddle(11, 31, direction);
4946        let twiddle12: Complex<T> = twiddles::compute_twiddle(12, 31, direction);
4947        let twiddle13: Complex<T> = twiddles::compute_twiddle(13, 31, direction);
4948        let twiddle14: Complex<T> = twiddles::compute_twiddle(14, 31, direction);
4949        let twiddle15: Complex<T> = twiddles::compute_twiddle(15, 31, direction);
4950        Self {
4951            twiddle1,
4952            twiddle2,
4953            twiddle3,
4954            twiddle4,
4955            twiddle5,
4956            twiddle6,
4957            twiddle7,
4958            twiddle8,
4959            twiddle9,
4960            twiddle10,
4961            twiddle11,
4962            twiddle12,
4963            twiddle13,
4964            twiddle14,
4965            twiddle15,
4966            direction,
4967        }
4968    }
4969
4970    #[inline(never)]
4971    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
4972        // This function was derived in the same manner as the butterflies for length 3, 5 and 7.
4973        // However, instead of doing it by hand the actual code is autogenerated
4974        // with the `genbutterflies.py` script in the `tools` directory.
4975        let x130p = buffer.load(1) + buffer.load(30);
4976        let x130n = buffer.load(1) - buffer.load(30);
4977        let x229p = buffer.load(2) + buffer.load(29);
4978        let x229n = buffer.load(2) - buffer.load(29);
4979        let x328p = buffer.load(3) + buffer.load(28);
4980        let x328n = buffer.load(3) - buffer.load(28);
4981        let x427p = buffer.load(4) + buffer.load(27);
4982        let x427n = buffer.load(4) - buffer.load(27);
4983        let x526p = buffer.load(5) + buffer.load(26);
4984        let x526n = buffer.load(5) - buffer.load(26);
4985        let x625p = buffer.load(6) + buffer.load(25);
4986        let x625n = buffer.load(6) - buffer.load(25);
4987        let x724p = buffer.load(7) + buffer.load(24);
4988        let x724n = buffer.load(7) - buffer.load(24);
4989        let x823p = buffer.load(8) + buffer.load(23);
4990        let x823n = buffer.load(8) - buffer.load(23);
4991        let x922p = buffer.load(9) + buffer.load(22);
4992        let x922n = buffer.load(9) - buffer.load(22);
4993        let x1021p = buffer.load(10) + buffer.load(21);
4994        let x1021n = buffer.load(10) - buffer.load(21);
4995        let x1120p = buffer.load(11) + buffer.load(20);
4996        let x1120n = buffer.load(11) - buffer.load(20);
4997        let x1219p = buffer.load(12) + buffer.load(19);
4998        let x1219n = buffer.load(12) - buffer.load(19);
4999        let x1318p = buffer.load(13) + buffer.load(18);
5000        let x1318n = buffer.load(13) - buffer.load(18);
5001        let x1417p = buffer.load(14) + buffer.load(17);
5002        let x1417n = buffer.load(14) - buffer.load(17);
5003        let x1516p = buffer.load(15) + buffer.load(16);
5004        let x1516n = buffer.load(15) - buffer.load(16);
5005        let sum = buffer.load(0)
5006            + x130p
5007            + x229p
5008            + x328p
5009            + x427p
5010            + x526p
5011            + x625p
5012            + x724p
5013            + x823p
5014            + x922p
5015            + x1021p
5016            + x1120p
5017            + x1219p
5018            + x1318p
5019            + x1417p
5020            + x1516p;
5021        let b130re_a = buffer.load(0).re
5022            + self.twiddle1.re * x130p.re
5023            + self.twiddle2.re * x229p.re
5024            + self.twiddle3.re * x328p.re
5025            + self.twiddle4.re * x427p.re
5026            + self.twiddle5.re * x526p.re
5027            + self.twiddle6.re * x625p.re
5028            + self.twiddle7.re * x724p.re
5029            + self.twiddle8.re * x823p.re
5030            + self.twiddle9.re * x922p.re
5031            + self.twiddle10.re * x1021p.re
5032            + self.twiddle11.re * x1120p.re
5033            + self.twiddle12.re * x1219p.re
5034            + self.twiddle13.re * x1318p.re
5035            + self.twiddle14.re * x1417p.re
5036            + self.twiddle15.re * x1516p.re;
5037        let b130re_b = self.twiddle1.im * x130n.im
5038            + self.twiddle2.im * x229n.im
5039            + self.twiddle3.im * x328n.im
5040            + self.twiddle4.im * x427n.im
5041            + self.twiddle5.im * x526n.im
5042            + self.twiddle6.im * x625n.im
5043            + self.twiddle7.im * x724n.im
5044            + self.twiddle8.im * x823n.im
5045            + self.twiddle9.im * x922n.im
5046            + self.twiddle10.im * x1021n.im
5047            + self.twiddle11.im * x1120n.im
5048            + self.twiddle12.im * x1219n.im
5049            + self.twiddle13.im * x1318n.im
5050            + self.twiddle14.im * x1417n.im
5051            + self.twiddle15.im * x1516n.im;
5052        let b229re_a = buffer.load(0).re
5053            + self.twiddle2.re * x130p.re
5054            + self.twiddle4.re * x229p.re
5055            + self.twiddle6.re * x328p.re
5056            + self.twiddle8.re * x427p.re
5057            + self.twiddle10.re * x526p.re
5058            + self.twiddle12.re * x625p.re
5059            + self.twiddle14.re * x724p.re
5060            + self.twiddle15.re * x823p.re
5061            + self.twiddle13.re * x922p.re
5062            + self.twiddle11.re * x1021p.re
5063            + self.twiddle9.re * x1120p.re
5064            + self.twiddle7.re * x1219p.re
5065            + self.twiddle5.re * x1318p.re
5066            + self.twiddle3.re * x1417p.re
5067            + self.twiddle1.re * x1516p.re;
5068        let b229re_b = self.twiddle2.im * x130n.im
5069            + self.twiddle4.im * x229n.im
5070            + self.twiddle6.im * x328n.im
5071            + self.twiddle8.im * x427n.im
5072            + self.twiddle10.im * x526n.im
5073            + self.twiddle12.im * x625n.im
5074            + self.twiddle14.im * x724n.im
5075            + -self.twiddle15.im * x823n.im
5076            + -self.twiddle13.im * x922n.im
5077            + -self.twiddle11.im * x1021n.im
5078            + -self.twiddle9.im * x1120n.im
5079            + -self.twiddle7.im * x1219n.im
5080            + -self.twiddle5.im * x1318n.im
5081            + -self.twiddle3.im * x1417n.im
5082            + -self.twiddle1.im * x1516n.im;
5083        let b328re_a = buffer.load(0).re
5084            + self.twiddle3.re * x130p.re
5085            + self.twiddle6.re * x229p.re
5086            + self.twiddle9.re * x328p.re
5087            + self.twiddle12.re * x427p.re
5088            + self.twiddle15.re * x526p.re
5089            + self.twiddle13.re * x625p.re
5090            + self.twiddle10.re * x724p.re
5091            + self.twiddle7.re * x823p.re
5092            + self.twiddle4.re * x922p.re
5093            + self.twiddle1.re * x1021p.re
5094            + self.twiddle2.re * x1120p.re
5095            + self.twiddle5.re * x1219p.re
5096            + self.twiddle8.re * x1318p.re
5097            + self.twiddle11.re * x1417p.re
5098            + self.twiddle14.re * x1516p.re;
5099        let b328re_b = self.twiddle3.im * x130n.im
5100            + self.twiddle6.im * x229n.im
5101            + self.twiddle9.im * x328n.im
5102            + self.twiddle12.im * x427n.im
5103            + self.twiddle15.im * x526n.im
5104            + -self.twiddle13.im * x625n.im
5105            + -self.twiddle10.im * x724n.im
5106            + -self.twiddle7.im * x823n.im
5107            + -self.twiddle4.im * x922n.im
5108            + -self.twiddle1.im * x1021n.im
5109            + self.twiddle2.im * x1120n.im
5110            + self.twiddle5.im * x1219n.im
5111            + self.twiddle8.im * x1318n.im
5112            + self.twiddle11.im * x1417n.im
5113            + self.twiddle14.im * x1516n.im;
5114        let b427re_a = buffer.load(0).re
5115            + self.twiddle4.re * x130p.re
5116            + self.twiddle8.re * x229p.re
5117            + self.twiddle12.re * x328p.re
5118            + self.twiddle15.re * x427p.re
5119            + self.twiddle11.re * x526p.re
5120            + self.twiddle7.re * x625p.re
5121            + self.twiddle3.re * x724p.re
5122            + self.twiddle1.re * x823p.re
5123            + self.twiddle5.re * x922p.re
5124            + self.twiddle9.re * x1021p.re
5125            + self.twiddle13.re * x1120p.re
5126            + self.twiddle14.re * x1219p.re
5127            + self.twiddle10.re * x1318p.re
5128            + self.twiddle6.re * x1417p.re
5129            + self.twiddle2.re * x1516p.re;
5130        let b427re_b = self.twiddle4.im * x130n.im
5131            + self.twiddle8.im * x229n.im
5132            + self.twiddle12.im * x328n.im
5133            + -self.twiddle15.im * x427n.im
5134            + -self.twiddle11.im * x526n.im
5135            + -self.twiddle7.im * x625n.im
5136            + -self.twiddle3.im * x724n.im
5137            + self.twiddle1.im * x823n.im
5138            + self.twiddle5.im * x922n.im
5139            + self.twiddle9.im * x1021n.im
5140            + self.twiddle13.im * x1120n.im
5141            + -self.twiddle14.im * x1219n.im
5142            + -self.twiddle10.im * x1318n.im
5143            + -self.twiddle6.im * x1417n.im
5144            + -self.twiddle2.im * x1516n.im;
5145        let b526re_a = buffer.load(0).re
5146            + self.twiddle5.re * x130p.re
5147            + self.twiddle10.re * x229p.re
5148            + self.twiddle15.re * x328p.re
5149            + self.twiddle11.re * x427p.re
5150            + self.twiddle6.re * x526p.re
5151            + self.twiddle1.re * x625p.re
5152            + self.twiddle4.re * x724p.re
5153            + self.twiddle9.re * x823p.re
5154            + self.twiddle14.re * x922p.re
5155            + self.twiddle12.re * x1021p.re
5156            + self.twiddle7.re * x1120p.re
5157            + self.twiddle2.re * x1219p.re
5158            + self.twiddle3.re * x1318p.re
5159            + self.twiddle8.re * x1417p.re
5160            + self.twiddle13.re * x1516p.re;
5161        let b526re_b = self.twiddle5.im * x130n.im
5162            + self.twiddle10.im * x229n.im
5163            + self.twiddle15.im * x328n.im
5164            + -self.twiddle11.im * x427n.im
5165            + -self.twiddle6.im * x526n.im
5166            + -self.twiddle1.im * x625n.im
5167            + self.twiddle4.im * x724n.im
5168            + self.twiddle9.im * x823n.im
5169            + self.twiddle14.im * x922n.im
5170            + -self.twiddle12.im * x1021n.im
5171            + -self.twiddle7.im * x1120n.im
5172            + -self.twiddle2.im * x1219n.im
5173            + self.twiddle3.im * x1318n.im
5174            + self.twiddle8.im * x1417n.im
5175            + self.twiddle13.im * x1516n.im;
5176        let b625re_a = buffer.load(0).re
5177            + self.twiddle6.re * x130p.re
5178            + self.twiddle12.re * x229p.re
5179            + self.twiddle13.re * x328p.re
5180            + self.twiddle7.re * x427p.re
5181            + self.twiddle1.re * x526p.re
5182            + self.twiddle5.re * x625p.re
5183            + self.twiddle11.re * x724p.re
5184            + self.twiddle14.re * x823p.re
5185            + self.twiddle8.re * x922p.re
5186            + self.twiddle2.re * x1021p.re
5187            + self.twiddle4.re * x1120p.re
5188            + self.twiddle10.re * x1219p.re
5189            + self.twiddle15.re * x1318p.re
5190            + self.twiddle9.re * x1417p.re
5191            + self.twiddle3.re * x1516p.re;
5192        let b625re_b = self.twiddle6.im * x130n.im
5193            + self.twiddle12.im * x229n.im
5194            + -self.twiddle13.im * x328n.im
5195            + -self.twiddle7.im * x427n.im
5196            + -self.twiddle1.im * x526n.im
5197            + self.twiddle5.im * x625n.im
5198            + self.twiddle11.im * x724n.im
5199            + -self.twiddle14.im * x823n.im
5200            + -self.twiddle8.im * x922n.im
5201            + -self.twiddle2.im * x1021n.im
5202            + self.twiddle4.im * x1120n.im
5203            + self.twiddle10.im * x1219n.im
5204            + -self.twiddle15.im * x1318n.im
5205            + -self.twiddle9.im * x1417n.im
5206            + -self.twiddle3.im * x1516n.im;
5207        let b724re_a = buffer.load(0).re
5208            + self.twiddle7.re * x130p.re
5209            + self.twiddle14.re * x229p.re
5210            + self.twiddle10.re * x328p.re
5211            + self.twiddle3.re * x427p.re
5212            + self.twiddle4.re * x526p.re
5213            + self.twiddle11.re * x625p.re
5214            + self.twiddle13.re * x724p.re
5215            + self.twiddle6.re * x823p.re
5216            + self.twiddle1.re * x922p.re
5217            + self.twiddle8.re * x1021p.re
5218            + self.twiddle15.re * x1120p.re
5219            + self.twiddle9.re * x1219p.re
5220            + self.twiddle2.re * x1318p.re
5221            + self.twiddle5.re * x1417p.re
5222            + self.twiddle12.re * x1516p.re;
5223        let b724re_b = self.twiddle7.im * x130n.im
5224            + self.twiddle14.im * x229n.im
5225            + -self.twiddle10.im * x328n.im
5226            + -self.twiddle3.im * x427n.im
5227            + self.twiddle4.im * x526n.im
5228            + self.twiddle11.im * x625n.im
5229            + -self.twiddle13.im * x724n.im
5230            + -self.twiddle6.im * x823n.im
5231            + self.twiddle1.im * x922n.im
5232            + self.twiddle8.im * x1021n.im
5233            + self.twiddle15.im * x1120n.im
5234            + -self.twiddle9.im * x1219n.im
5235            + -self.twiddle2.im * x1318n.im
5236            + self.twiddle5.im * x1417n.im
5237            + self.twiddle12.im * x1516n.im;
5238        let b823re_a = buffer.load(0).re
5239            + self.twiddle8.re * x130p.re
5240            + self.twiddle15.re * x229p.re
5241            + self.twiddle7.re * x328p.re
5242            + self.twiddle1.re * x427p.re
5243            + self.twiddle9.re * x526p.re
5244            + self.twiddle14.re * x625p.re
5245            + self.twiddle6.re * x724p.re
5246            + self.twiddle2.re * x823p.re
5247            + self.twiddle10.re * x922p.re
5248            + self.twiddle13.re * x1021p.re
5249            + self.twiddle5.re * x1120p.re
5250            + self.twiddle3.re * x1219p.re
5251            + self.twiddle11.re * x1318p.re
5252            + self.twiddle12.re * x1417p.re
5253            + self.twiddle4.re * x1516p.re;
5254        let b823re_b = self.twiddle8.im * x130n.im
5255            + -self.twiddle15.im * x229n.im
5256            + -self.twiddle7.im * x328n.im
5257            + self.twiddle1.im * x427n.im
5258            + self.twiddle9.im * x526n.im
5259            + -self.twiddle14.im * x625n.im
5260            + -self.twiddle6.im * x724n.im
5261            + self.twiddle2.im * x823n.im
5262            + self.twiddle10.im * x922n.im
5263            + -self.twiddle13.im * x1021n.im
5264            + -self.twiddle5.im * x1120n.im
5265            + self.twiddle3.im * x1219n.im
5266            + self.twiddle11.im * x1318n.im
5267            + -self.twiddle12.im * x1417n.im
5268            + -self.twiddle4.im * x1516n.im;
5269        let b922re_a = buffer.load(0).re
5270            + self.twiddle9.re * x130p.re
5271            + self.twiddle13.re * x229p.re
5272            + self.twiddle4.re * x328p.re
5273            + self.twiddle5.re * x427p.re
5274            + self.twiddle14.re * x526p.re
5275            + self.twiddle8.re * x625p.re
5276            + self.twiddle1.re * x724p.re
5277            + self.twiddle10.re * x823p.re
5278            + self.twiddle12.re * x922p.re
5279            + self.twiddle3.re * x1021p.re
5280            + self.twiddle6.re * x1120p.re
5281            + self.twiddle15.re * x1219p.re
5282            + self.twiddle7.re * x1318p.re
5283            + self.twiddle2.re * x1417p.re
5284            + self.twiddle11.re * x1516p.re;
5285        let b922re_b = self.twiddle9.im * x130n.im
5286            + -self.twiddle13.im * x229n.im
5287            + -self.twiddle4.im * x328n.im
5288            + self.twiddle5.im * x427n.im
5289            + self.twiddle14.im * x526n.im
5290            + -self.twiddle8.im * x625n.im
5291            + self.twiddle1.im * x724n.im
5292            + self.twiddle10.im * x823n.im
5293            + -self.twiddle12.im * x922n.im
5294            + -self.twiddle3.im * x1021n.im
5295            + self.twiddle6.im * x1120n.im
5296            + self.twiddle15.im * x1219n.im
5297            + -self.twiddle7.im * x1318n.im
5298            + self.twiddle2.im * x1417n.im
5299            + self.twiddle11.im * x1516n.im;
5300        let b1021re_a = buffer.load(0).re
5301            + self.twiddle10.re * x130p.re
5302            + self.twiddle11.re * x229p.re
5303            + self.twiddle1.re * x328p.re
5304            + self.twiddle9.re * x427p.re
5305            + self.twiddle12.re * x526p.re
5306            + self.twiddle2.re * x625p.re
5307            + self.twiddle8.re * x724p.re
5308            + self.twiddle13.re * x823p.re
5309            + self.twiddle3.re * x922p.re
5310            + self.twiddle7.re * x1021p.re
5311            + self.twiddle14.re * x1120p.re
5312            + self.twiddle4.re * x1219p.re
5313            + self.twiddle6.re * x1318p.re
5314            + self.twiddle15.re * x1417p.re
5315            + self.twiddle5.re * x1516p.re;
5316        let b1021re_b = self.twiddle10.im * x130n.im
5317            + -self.twiddle11.im * x229n.im
5318            + -self.twiddle1.im * x328n.im
5319            + self.twiddle9.im * x427n.im
5320            + -self.twiddle12.im * x526n.im
5321            + -self.twiddle2.im * x625n.im
5322            + self.twiddle8.im * x724n.im
5323            + -self.twiddle13.im * x823n.im
5324            + -self.twiddle3.im * x922n.im
5325            + self.twiddle7.im * x1021n.im
5326            + -self.twiddle14.im * x1120n.im
5327            + -self.twiddle4.im * x1219n.im
5328            + self.twiddle6.im * x1318n.im
5329            + -self.twiddle15.im * x1417n.im
5330            + -self.twiddle5.im * x1516n.im;
5331        let b1120re_a = buffer.load(0).re
5332            + self.twiddle11.re * x130p.re
5333            + self.twiddle9.re * x229p.re
5334            + self.twiddle2.re * x328p.re
5335            + self.twiddle13.re * x427p.re
5336            + self.twiddle7.re * x526p.re
5337            + self.twiddle4.re * x625p.re
5338            + self.twiddle15.re * x724p.re
5339            + self.twiddle5.re * x823p.re
5340            + self.twiddle6.re * x922p.re
5341            + self.twiddle14.re * x1021p.re
5342            + self.twiddle3.re * x1120p.re
5343            + self.twiddle8.re * x1219p.re
5344            + self.twiddle12.re * x1318p.re
5345            + self.twiddle1.re * x1417p.re
5346            + self.twiddle10.re * x1516p.re;
5347        let b1120re_b = self.twiddle11.im * x130n.im
5348            + -self.twiddle9.im * x229n.im
5349            + self.twiddle2.im * x328n.im
5350            + self.twiddle13.im * x427n.im
5351            + -self.twiddle7.im * x526n.im
5352            + self.twiddle4.im * x625n.im
5353            + self.twiddle15.im * x724n.im
5354            + -self.twiddle5.im * x823n.im
5355            + self.twiddle6.im * x922n.im
5356            + -self.twiddle14.im * x1021n.im
5357            + -self.twiddle3.im * x1120n.im
5358            + self.twiddle8.im * x1219n.im
5359            + -self.twiddle12.im * x1318n.im
5360            + -self.twiddle1.im * x1417n.im
5361            + self.twiddle10.im * x1516n.im;
5362        let b1219re_a = buffer.load(0).re
5363            + self.twiddle12.re * x130p.re
5364            + self.twiddle7.re * x229p.re
5365            + self.twiddle5.re * x328p.re
5366            + self.twiddle14.re * x427p.re
5367            + self.twiddle2.re * x526p.re
5368            + self.twiddle10.re * x625p.re
5369            + self.twiddle9.re * x724p.re
5370            + self.twiddle3.re * x823p.re
5371            + self.twiddle15.re * x922p.re
5372            + self.twiddle4.re * x1021p.re
5373            + self.twiddle8.re * x1120p.re
5374            + self.twiddle11.re * x1219p.re
5375            + self.twiddle1.re * x1318p.re
5376            + self.twiddle13.re * x1417p.re
5377            + self.twiddle6.re * x1516p.re;
5378        let b1219re_b = self.twiddle12.im * x130n.im
5379            + -self.twiddle7.im * x229n.im
5380            + self.twiddle5.im * x328n.im
5381            + -self.twiddle14.im * x427n.im
5382            + -self.twiddle2.im * x526n.im
5383            + self.twiddle10.im * x625n.im
5384            + -self.twiddle9.im * x724n.im
5385            + self.twiddle3.im * x823n.im
5386            + self.twiddle15.im * x922n.im
5387            + -self.twiddle4.im * x1021n.im
5388            + self.twiddle8.im * x1120n.im
5389            + -self.twiddle11.im * x1219n.im
5390            + self.twiddle1.im * x1318n.im
5391            + self.twiddle13.im * x1417n.im
5392            + -self.twiddle6.im * x1516n.im;
5393        let b1318re_a = buffer.load(0).re
5394            + self.twiddle13.re * x130p.re
5395            + self.twiddle5.re * x229p.re
5396            + self.twiddle8.re * x328p.re
5397            + self.twiddle10.re * x427p.re
5398            + self.twiddle3.re * x526p.re
5399            + self.twiddle15.re * x625p.re
5400            + self.twiddle2.re * x724p.re
5401            + self.twiddle11.re * x823p.re
5402            + self.twiddle7.re * x922p.re
5403            + self.twiddle6.re * x1021p.re
5404            + self.twiddle12.re * x1120p.re
5405            + self.twiddle1.re * x1219p.re
5406            + self.twiddle14.re * x1318p.re
5407            + self.twiddle4.re * x1417p.re
5408            + self.twiddle9.re * x1516p.re;
5409        let b1318re_b = self.twiddle13.im * x130n.im
5410            + -self.twiddle5.im * x229n.im
5411            + self.twiddle8.im * x328n.im
5412            + -self.twiddle10.im * x427n.im
5413            + self.twiddle3.im * x526n.im
5414            + -self.twiddle15.im * x625n.im
5415            + -self.twiddle2.im * x724n.im
5416            + self.twiddle11.im * x823n.im
5417            + -self.twiddle7.im * x922n.im
5418            + self.twiddle6.im * x1021n.im
5419            + -self.twiddle12.im * x1120n.im
5420            + self.twiddle1.im * x1219n.im
5421            + self.twiddle14.im * x1318n.im
5422            + -self.twiddle4.im * x1417n.im
5423            + self.twiddle9.im * x1516n.im;
5424        let b1417re_a = buffer.load(0).re
5425            + self.twiddle14.re * x130p.re
5426            + self.twiddle3.re * x229p.re
5427            + self.twiddle11.re * x328p.re
5428            + self.twiddle6.re * x427p.re
5429            + self.twiddle8.re * x526p.re
5430            + self.twiddle9.re * x625p.re
5431            + self.twiddle5.re * x724p.re
5432            + self.twiddle12.re * x823p.re
5433            + self.twiddle2.re * x922p.re
5434            + self.twiddle15.re * x1021p.re
5435            + self.twiddle1.re * x1120p.re
5436            + self.twiddle13.re * x1219p.re
5437            + self.twiddle4.re * x1318p.re
5438            + self.twiddle10.re * x1417p.re
5439            + self.twiddle7.re * x1516p.re;
5440        let b1417re_b = self.twiddle14.im * x130n.im
5441            + -self.twiddle3.im * x229n.im
5442            + self.twiddle11.im * x328n.im
5443            + -self.twiddle6.im * x427n.im
5444            + self.twiddle8.im * x526n.im
5445            + -self.twiddle9.im * x625n.im
5446            + self.twiddle5.im * x724n.im
5447            + -self.twiddle12.im * x823n.im
5448            + self.twiddle2.im * x922n.im
5449            + -self.twiddle15.im * x1021n.im
5450            + -self.twiddle1.im * x1120n.im
5451            + self.twiddle13.im * x1219n.im
5452            + -self.twiddle4.im * x1318n.im
5453            + self.twiddle10.im * x1417n.im
5454            + -self.twiddle7.im * x1516n.im;
5455        let b1516re_a = buffer.load(0).re
5456            + self.twiddle15.re * x130p.re
5457            + self.twiddle1.re * x229p.re
5458            + self.twiddle14.re * x328p.re
5459            + self.twiddle2.re * x427p.re
5460            + self.twiddle13.re * x526p.re
5461            + self.twiddle3.re * x625p.re
5462            + self.twiddle12.re * x724p.re
5463            + self.twiddle4.re * x823p.re
5464            + self.twiddle11.re * x922p.re
5465            + self.twiddle5.re * x1021p.re
5466            + self.twiddle10.re * x1120p.re
5467            + self.twiddle6.re * x1219p.re
5468            + self.twiddle9.re * x1318p.re
5469            + self.twiddle7.re * x1417p.re
5470            + self.twiddle8.re * x1516p.re;
5471        let b1516re_b = self.twiddle15.im * x130n.im
5472            + -self.twiddle1.im * x229n.im
5473            + self.twiddle14.im * x328n.im
5474            + -self.twiddle2.im * x427n.im
5475            + self.twiddle13.im * x526n.im
5476            + -self.twiddle3.im * x625n.im
5477            + self.twiddle12.im * x724n.im
5478            + -self.twiddle4.im * x823n.im
5479            + self.twiddle11.im * x922n.im
5480            + -self.twiddle5.im * x1021n.im
5481            + self.twiddle10.im * x1120n.im
5482            + -self.twiddle6.im * x1219n.im
5483            + self.twiddle9.im * x1318n.im
5484            + -self.twiddle7.im * x1417n.im
5485            + self.twiddle8.im * x1516n.im;
5486
5487        let b130im_a = buffer.load(0).im
5488            + self.twiddle1.re * x130p.im
5489            + self.twiddle2.re * x229p.im
5490            + self.twiddle3.re * x328p.im
5491            + self.twiddle4.re * x427p.im
5492            + self.twiddle5.re * x526p.im
5493            + self.twiddle6.re * x625p.im
5494            + self.twiddle7.re * x724p.im
5495            + self.twiddle8.re * x823p.im
5496            + self.twiddle9.re * x922p.im
5497            + self.twiddle10.re * x1021p.im
5498            + self.twiddle11.re * x1120p.im
5499            + self.twiddle12.re * x1219p.im
5500            + self.twiddle13.re * x1318p.im
5501            + self.twiddle14.re * x1417p.im
5502            + self.twiddle15.re * x1516p.im;
5503        let b130im_b = self.twiddle1.im * x130n.re
5504            + self.twiddle2.im * x229n.re
5505            + self.twiddle3.im * x328n.re
5506            + self.twiddle4.im * x427n.re
5507            + self.twiddle5.im * x526n.re
5508            + self.twiddle6.im * x625n.re
5509            + self.twiddle7.im * x724n.re
5510            + self.twiddle8.im * x823n.re
5511            + self.twiddle9.im * x922n.re
5512            + self.twiddle10.im * x1021n.re
5513            + self.twiddle11.im * x1120n.re
5514            + self.twiddle12.im * x1219n.re
5515            + self.twiddle13.im * x1318n.re
5516            + self.twiddle14.im * x1417n.re
5517            + self.twiddle15.im * x1516n.re;
5518        let b229im_a = buffer.load(0).im
5519            + self.twiddle2.re * x130p.im
5520            + self.twiddle4.re * x229p.im
5521            + self.twiddle6.re * x328p.im
5522            + self.twiddle8.re * x427p.im
5523            + self.twiddle10.re * x526p.im
5524            + self.twiddle12.re * x625p.im
5525            + self.twiddle14.re * x724p.im
5526            + self.twiddle15.re * x823p.im
5527            + self.twiddle13.re * x922p.im
5528            + self.twiddle11.re * x1021p.im
5529            + self.twiddle9.re * x1120p.im
5530            + self.twiddle7.re * x1219p.im
5531            + self.twiddle5.re * x1318p.im
5532            + self.twiddle3.re * x1417p.im
5533            + self.twiddle1.re * x1516p.im;
5534        let b229im_b = self.twiddle2.im * x130n.re
5535            + self.twiddle4.im * x229n.re
5536            + self.twiddle6.im * x328n.re
5537            + self.twiddle8.im * x427n.re
5538            + self.twiddle10.im * x526n.re
5539            + self.twiddle12.im * x625n.re
5540            + self.twiddle14.im * x724n.re
5541            + -self.twiddle15.im * x823n.re
5542            + -self.twiddle13.im * x922n.re
5543            + -self.twiddle11.im * x1021n.re
5544            + -self.twiddle9.im * x1120n.re
5545            + -self.twiddle7.im * x1219n.re
5546            + -self.twiddle5.im * x1318n.re
5547            + -self.twiddle3.im * x1417n.re
5548            + -self.twiddle1.im * x1516n.re;
5549        let b328im_a = buffer.load(0).im
5550            + self.twiddle3.re * x130p.im
5551            + self.twiddle6.re * x229p.im
5552            + self.twiddle9.re * x328p.im
5553            + self.twiddle12.re * x427p.im
5554            + self.twiddle15.re * x526p.im
5555            + self.twiddle13.re * x625p.im
5556            + self.twiddle10.re * x724p.im
5557            + self.twiddle7.re * x823p.im
5558            + self.twiddle4.re * x922p.im
5559            + self.twiddle1.re * x1021p.im
5560            + self.twiddle2.re * x1120p.im
5561            + self.twiddle5.re * x1219p.im
5562            + self.twiddle8.re * x1318p.im
5563            + self.twiddle11.re * x1417p.im
5564            + self.twiddle14.re * x1516p.im;
5565        let b328im_b = self.twiddle3.im * x130n.re
5566            + self.twiddle6.im * x229n.re
5567            + self.twiddle9.im * x328n.re
5568            + self.twiddle12.im * x427n.re
5569            + self.twiddle15.im * x526n.re
5570            + -self.twiddle13.im * x625n.re
5571            + -self.twiddle10.im * x724n.re
5572            + -self.twiddle7.im * x823n.re
5573            + -self.twiddle4.im * x922n.re
5574            + -self.twiddle1.im * x1021n.re
5575            + self.twiddle2.im * x1120n.re
5576            + self.twiddle5.im * x1219n.re
5577            + self.twiddle8.im * x1318n.re
5578            + self.twiddle11.im * x1417n.re
5579            + self.twiddle14.im * x1516n.re;
5580        let b427im_a = buffer.load(0).im
5581            + self.twiddle4.re * x130p.im
5582            + self.twiddle8.re * x229p.im
5583            + self.twiddle12.re * x328p.im
5584            + self.twiddle15.re * x427p.im
5585            + self.twiddle11.re * x526p.im
5586            + self.twiddle7.re * x625p.im
5587            + self.twiddle3.re * x724p.im
5588            + self.twiddle1.re * x823p.im
5589            + self.twiddle5.re * x922p.im
5590            + self.twiddle9.re * x1021p.im
5591            + self.twiddle13.re * x1120p.im
5592            + self.twiddle14.re * x1219p.im
5593            + self.twiddle10.re * x1318p.im
5594            + self.twiddle6.re * x1417p.im
5595            + self.twiddle2.re * x1516p.im;
5596        let b427im_b = self.twiddle4.im * x130n.re
5597            + self.twiddle8.im * x229n.re
5598            + self.twiddle12.im * x328n.re
5599            + -self.twiddle15.im * x427n.re
5600            + -self.twiddle11.im * x526n.re
5601            + -self.twiddle7.im * x625n.re
5602            + -self.twiddle3.im * x724n.re
5603            + self.twiddle1.im * x823n.re
5604            + self.twiddle5.im * x922n.re
5605            + self.twiddle9.im * x1021n.re
5606            + self.twiddle13.im * x1120n.re
5607            + -self.twiddle14.im * x1219n.re
5608            + -self.twiddle10.im * x1318n.re
5609            + -self.twiddle6.im * x1417n.re
5610            + -self.twiddle2.im * x1516n.re;
5611        let b526im_a = buffer.load(0).im
5612            + self.twiddle5.re * x130p.im
5613            + self.twiddle10.re * x229p.im
5614            + self.twiddle15.re * x328p.im
5615            + self.twiddle11.re * x427p.im
5616            + self.twiddle6.re * x526p.im
5617            + self.twiddle1.re * x625p.im
5618            + self.twiddle4.re * x724p.im
5619            + self.twiddle9.re * x823p.im
5620            + self.twiddle14.re * x922p.im
5621            + self.twiddle12.re * x1021p.im
5622            + self.twiddle7.re * x1120p.im
5623            + self.twiddle2.re * x1219p.im
5624            + self.twiddle3.re * x1318p.im
5625            + self.twiddle8.re * x1417p.im
5626            + self.twiddle13.re * x1516p.im;
5627        let b526im_b = self.twiddle5.im * x130n.re
5628            + self.twiddle10.im * x229n.re
5629            + self.twiddle15.im * x328n.re
5630            + -self.twiddle11.im * x427n.re
5631            + -self.twiddle6.im * x526n.re
5632            + -self.twiddle1.im * x625n.re
5633            + self.twiddle4.im * x724n.re
5634            + self.twiddle9.im * x823n.re
5635            + self.twiddle14.im * x922n.re
5636            + -self.twiddle12.im * x1021n.re
5637            + -self.twiddle7.im * x1120n.re
5638            + -self.twiddle2.im * x1219n.re
5639            + self.twiddle3.im * x1318n.re
5640            + self.twiddle8.im * x1417n.re
5641            + self.twiddle13.im * x1516n.re;
5642        let b625im_a = buffer.load(0).im
5643            + self.twiddle6.re * x130p.im
5644            + self.twiddle12.re * x229p.im
5645            + self.twiddle13.re * x328p.im
5646            + self.twiddle7.re * x427p.im
5647            + self.twiddle1.re * x526p.im
5648            + self.twiddle5.re * x625p.im
5649            + self.twiddle11.re * x724p.im
5650            + self.twiddle14.re * x823p.im
5651            + self.twiddle8.re * x922p.im
5652            + self.twiddle2.re * x1021p.im
5653            + self.twiddle4.re * x1120p.im
5654            + self.twiddle10.re * x1219p.im
5655            + self.twiddle15.re * x1318p.im
5656            + self.twiddle9.re * x1417p.im
5657            + self.twiddle3.re * x1516p.im;
5658        let b625im_b = self.twiddle6.im * x130n.re
5659            + self.twiddle12.im * x229n.re
5660            + -self.twiddle13.im * x328n.re
5661            + -self.twiddle7.im * x427n.re
5662            + -self.twiddle1.im * x526n.re
5663            + self.twiddle5.im * x625n.re
5664            + self.twiddle11.im * x724n.re
5665            + -self.twiddle14.im * x823n.re
5666            + -self.twiddle8.im * x922n.re
5667            + -self.twiddle2.im * x1021n.re
5668            + self.twiddle4.im * x1120n.re
5669            + self.twiddle10.im * x1219n.re
5670            + -self.twiddle15.im * x1318n.re
5671            + -self.twiddle9.im * x1417n.re
5672            + -self.twiddle3.im * x1516n.re;
5673        let b724im_a = buffer.load(0).im
5674            + self.twiddle7.re * x130p.im
5675            + self.twiddle14.re * x229p.im
5676            + self.twiddle10.re * x328p.im
5677            + self.twiddle3.re * x427p.im
5678            + self.twiddle4.re * x526p.im
5679            + self.twiddle11.re * x625p.im
5680            + self.twiddle13.re * x724p.im
5681            + self.twiddle6.re * x823p.im
5682            + self.twiddle1.re * x922p.im
5683            + self.twiddle8.re * x1021p.im
5684            + self.twiddle15.re * x1120p.im
5685            + self.twiddle9.re * x1219p.im
5686            + self.twiddle2.re * x1318p.im
5687            + self.twiddle5.re * x1417p.im
5688            + self.twiddle12.re * x1516p.im;
5689        let b724im_b = self.twiddle7.im * x130n.re
5690            + self.twiddle14.im * x229n.re
5691            + -self.twiddle10.im * x328n.re
5692            + -self.twiddle3.im * x427n.re
5693            + self.twiddle4.im * x526n.re
5694            + self.twiddle11.im * x625n.re
5695            + -self.twiddle13.im * x724n.re
5696            + -self.twiddle6.im * x823n.re
5697            + self.twiddle1.im * x922n.re
5698            + self.twiddle8.im * x1021n.re
5699            + self.twiddle15.im * x1120n.re
5700            + -self.twiddle9.im * x1219n.re
5701            + -self.twiddle2.im * x1318n.re
5702            + self.twiddle5.im * x1417n.re
5703            + self.twiddle12.im * x1516n.re;
5704        let b823im_a = buffer.load(0).im
5705            + self.twiddle8.re * x130p.im
5706            + self.twiddle15.re * x229p.im
5707            + self.twiddle7.re * x328p.im
5708            + self.twiddle1.re * x427p.im
5709            + self.twiddle9.re * x526p.im
5710            + self.twiddle14.re * x625p.im
5711            + self.twiddle6.re * x724p.im
5712            + self.twiddle2.re * x823p.im
5713            + self.twiddle10.re * x922p.im
5714            + self.twiddle13.re * x1021p.im
5715            + self.twiddle5.re * x1120p.im
5716            + self.twiddle3.re * x1219p.im
5717            + self.twiddle11.re * x1318p.im
5718            + self.twiddle12.re * x1417p.im
5719            + self.twiddle4.re * x1516p.im;
5720        let b823im_b = self.twiddle8.im * x130n.re
5721            + -self.twiddle15.im * x229n.re
5722            + -self.twiddle7.im * x328n.re
5723            + self.twiddle1.im * x427n.re
5724            + self.twiddle9.im * x526n.re
5725            + -self.twiddle14.im * x625n.re
5726            + -self.twiddle6.im * x724n.re
5727            + self.twiddle2.im * x823n.re
5728            + self.twiddle10.im * x922n.re
5729            + -self.twiddle13.im * x1021n.re
5730            + -self.twiddle5.im * x1120n.re
5731            + self.twiddle3.im * x1219n.re
5732            + self.twiddle11.im * x1318n.re
5733            + -self.twiddle12.im * x1417n.re
5734            + -self.twiddle4.im * x1516n.re;
5735        let b922im_a = buffer.load(0).im
5736            + self.twiddle9.re * x130p.im
5737            + self.twiddle13.re * x229p.im
5738            + self.twiddle4.re * x328p.im
5739            + self.twiddle5.re * x427p.im
5740            + self.twiddle14.re * x526p.im
5741            + self.twiddle8.re * x625p.im
5742            + self.twiddle1.re * x724p.im
5743            + self.twiddle10.re * x823p.im
5744            + self.twiddle12.re * x922p.im
5745            + self.twiddle3.re * x1021p.im
5746            + self.twiddle6.re * x1120p.im
5747            + self.twiddle15.re * x1219p.im
5748            + self.twiddle7.re * x1318p.im
5749            + self.twiddle2.re * x1417p.im
5750            + self.twiddle11.re * x1516p.im;
5751        let b922im_b = self.twiddle9.im * x130n.re
5752            + -self.twiddle13.im * x229n.re
5753            + -self.twiddle4.im * x328n.re
5754            + self.twiddle5.im * x427n.re
5755            + self.twiddle14.im * x526n.re
5756            + -self.twiddle8.im * x625n.re
5757            + self.twiddle1.im * x724n.re
5758            + self.twiddle10.im * x823n.re
5759            + -self.twiddle12.im * x922n.re
5760            + -self.twiddle3.im * x1021n.re
5761            + self.twiddle6.im * x1120n.re
5762            + self.twiddle15.im * x1219n.re
5763            + -self.twiddle7.im * x1318n.re
5764            + self.twiddle2.im * x1417n.re
5765            + self.twiddle11.im * x1516n.re;
5766        let b1021im_a = buffer.load(0).im
5767            + self.twiddle10.re * x130p.im
5768            + self.twiddle11.re * x229p.im
5769            + self.twiddle1.re * x328p.im
5770            + self.twiddle9.re * x427p.im
5771            + self.twiddle12.re * x526p.im
5772            + self.twiddle2.re * x625p.im
5773            + self.twiddle8.re * x724p.im
5774            + self.twiddle13.re * x823p.im
5775            + self.twiddle3.re * x922p.im
5776            + self.twiddle7.re * x1021p.im
5777            + self.twiddle14.re * x1120p.im
5778            + self.twiddle4.re * x1219p.im
5779            + self.twiddle6.re * x1318p.im
5780            + self.twiddle15.re * x1417p.im
5781            + self.twiddle5.re * x1516p.im;
5782        let b1021im_b = self.twiddle10.im * x130n.re
5783            + -self.twiddle11.im * x229n.re
5784            + -self.twiddle1.im * x328n.re
5785            + self.twiddle9.im * x427n.re
5786            + -self.twiddle12.im * x526n.re
5787            + -self.twiddle2.im * x625n.re
5788            + self.twiddle8.im * x724n.re
5789            + -self.twiddle13.im * x823n.re
5790            + -self.twiddle3.im * x922n.re
5791            + self.twiddle7.im * x1021n.re
5792            + -self.twiddle14.im * x1120n.re
5793            + -self.twiddle4.im * x1219n.re
5794            + self.twiddle6.im * x1318n.re
5795            + -self.twiddle15.im * x1417n.re
5796            + -self.twiddle5.im * x1516n.re;
5797        let b1120im_a = buffer.load(0).im
5798            + self.twiddle11.re * x130p.im
5799            + self.twiddle9.re * x229p.im
5800            + self.twiddle2.re * x328p.im
5801            + self.twiddle13.re * x427p.im
5802            + self.twiddle7.re * x526p.im
5803            + self.twiddle4.re * x625p.im
5804            + self.twiddle15.re * x724p.im
5805            + self.twiddle5.re * x823p.im
5806            + self.twiddle6.re * x922p.im
5807            + self.twiddle14.re * x1021p.im
5808            + self.twiddle3.re * x1120p.im
5809            + self.twiddle8.re * x1219p.im
5810            + self.twiddle12.re * x1318p.im
5811            + self.twiddle1.re * x1417p.im
5812            + self.twiddle10.re * x1516p.im;
5813        let b1120im_b = self.twiddle11.im * x130n.re
5814            + -self.twiddle9.im * x229n.re
5815            + self.twiddle2.im * x328n.re
5816            + self.twiddle13.im * x427n.re
5817            + -self.twiddle7.im * x526n.re
5818            + self.twiddle4.im * x625n.re
5819            + self.twiddle15.im * x724n.re
5820            + -self.twiddle5.im * x823n.re
5821            + self.twiddle6.im * x922n.re
5822            + -self.twiddle14.im * x1021n.re
5823            + -self.twiddle3.im * x1120n.re
5824            + self.twiddle8.im * x1219n.re
5825            + -self.twiddle12.im * x1318n.re
5826            + -self.twiddle1.im * x1417n.re
5827            + self.twiddle10.im * x1516n.re;
5828        let b1219im_a = buffer.load(0).im
5829            + self.twiddle12.re * x130p.im
5830            + self.twiddle7.re * x229p.im
5831            + self.twiddle5.re * x328p.im
5832            + self.twiddle14.re * x427p.im
5833            + self.twiddle2.re * x526p.im
5834            + self.twiddle10.re * x625p.im
5835            + self.twiddle9.re * x724p.im
5836            + self.twiddle3.re * x823p.im
5837            + self.twiddle15.re * x922p.im
5838            + self.twiddle4.re * x1021p.im
5839            + self.twiddle8.re * x1120p.im
5840            + self.twiddle11.re * x1219p.im
5841            + self.twiddle1.re * x1318p.im
5842            + self.twiddle13.re * x1417p.im
5843            + self.twiddle6.re * x1516p.im;
5844        let b1219im_b = self.twiddle12.im * x130n.re
5845            + -self.twiddle7.im * x229n.re
5846            + self.twiddle5.im * x328n.re
5847            + -self.twiddle14.im * x427n.re
5848            + -self.twiddle2.im * x526n.re
5849            + self.twiddle10.im * x625n.re
5850            + -self.twiddle9.im * x724n.re
5851            + self.twiddle3.im * x823n.re
5852            + self.twiddle15.im * x922n.re
5853            + -self.twiddle4.im * x1021n.re
5854            + self.twiddle8.im * x1120n.re
5855            + -self.twiddle11.im * x1219n.re
5856            + self.twiddle1.im * x1318n.re
5857            + self.twiddle13.im * x1417n.re
5858            + -self.twiddle6.im * x1516n.re;
5859        let b1318im_a = buffer.load(0).im
5860            + self.twiddle13.re * x130p.im
5861            + self.twiddle5.re * x229p.im
5862            + self.twiddle8.re * x328p.im
5863            + self.twiddle10.re * x427p.im
5864            + self.twiddle3.re * x526p.im
5865            + self.twiddle15.re * x625p.im
5866            + self.twiddle2.re * x724p.im
5867            + self.twiddle11.re * x823p.im
5868            + self.twiddle7.re * x922p.im
5869            + self.twiddle6.re * x1021p.im
5870            + self.twiddle12.re * x1120p.im
5871            + self.twiddle1.re * x1219p.im
5872            + self.twiddle14.re * x1318p.im
5873            + self.twiddle4.re * x1417p.im
5874            + self.twiddle9.re * x1516p.im;
5875        let b1318im_b = self.twiddle13.im * x130n.re
5876            + -self.twiddle5.im * x229n.re
5877            + self.twiddle8.im * x328n.re
5878            + -self.twiddle10.im * x427n.re
5879            + self.twiddle3.im * x526n.re
5880            + -self.twiddle15.im * x625n.re
5881            + -self.twiddle2.im * x724n.re
5882            + self.twiddle11.im * x823n.re
5883            + -self.twiddle7.im * x922n.re
5884            + self.twiddle6.im * x1021n.re
5885            + -self.twiddle12.im * x1120n.re
5886            + self.twiddle1.im * x1219n.re
5887            + self.twiddle14.im * x1318n.re
5888            + -self.twiddle4.im * x1417n.re
5889            + self.twiddle9.im * x1516n.re;
5890        let b1417im_a = buffer.load(0).im
5891            + self.twiddle14.re * x130p.im
5892            + self.twiddle3.re * x229p.im
5893            + self.twiddle11.re * x328p.im
5894            + self.twiddle6.re * x427p.im
5895            + self.twiddle8.re * x526p.im
5896            + self.twiddle9.re * x625p.im
5897            + self.twiddle5.re * x724p.im
5898            + self.twiddle12.re * x823p.im
5899            + self.twiddle2.re * x922p.im
5900            + self.twiddle15.re * x1021p.im
5901            + self.twiddle1.re * x1120p.im
5902            + self.twiddle13.re * x1219p.im
5903            + self.twiddle4.re * x1318p.im
5904            + self.twiddle10.re * x1417p.im
5905            + self.twiddle7.re * x1516p.im;
5906        let b1417im_b = self.twiddle14.im * x130n.re
5907            + -self.twiddle3.im * x229n.re
5908            + self.twiddle11.im * x328n.re
5909            + -self.twiddle6.im * x427n.re
5910            + self.twiddle8.im * x526n.re
5911            + -self.twiddle9.im * x625n.re
5912            + self.twiddle5.im * x724n.re
5913            + -self.twiddle12.im * x823n.re
5914            + self.twiddle2.im * x922n.re
5915            + -self.twiddle15.im * x1021n.re
5916            + -self.twiddle1.im * x1120n.re
5917            + self.twiddle13.im * x1219n.re
5918            + -self.twiddle4.im * x1318n.re
5919            + self.twiddle10.im * x1417n.re
5920            + -self.twiddle7.im * x1516n.re;
5921        let b1516im_a = buffer.load(0).im
5922            + self.twiddle15.re * x130p.im
5923            + self.twiddle1.re * x229p.im
5924            + self.twiddle14.re * x328p.im
5925            + self.twiddle2.re * x427p.im
5926            + self.twiddle13.re * x526p.im
5927            + self.twiddle3.re * x625p.im
5928            + self.twiddle12.re * x724p.im
5929            + self.twiddle4.re * x823p.im
5930            + self.twiddle11.re * x922p.im
5931            + self.twiddle5.re * x1021p.im
5932            + self.twiddle10.re * x1120p.im
5933            + self.twiddle6.re * x1219p.im
5934            + self.twiddle9.re * x1318p.im
5935            + self.twiddle7.re * x1417p.im
5936            + self.twiddle8.re * x1516p.im;
5937        let b1516im_b = self.twiddle15.im * x130n.re
5938            + -self.twiddle1.im * x229n.re
5939            + self.twiddle14.im * x328n.re
5940            + -self.twiddle2.im * x427n.re
5941            + self.twiddle13.im * x526n.re
5942            + -self.twiddle3.im * x625n.re
5943            + self.twiddle12.im * x724n.re
5944            + -self.twiddle4.im * x823n.re
5945            + self.twiddle11.im * x922n.re
5946            + -self.twiddle5.im * x1021n.re
5947            + self.twiddle10.im * x1120n.re
5948            + -self.twiddle6.im * x1219n.re
5949            + self.twiddle9.im * x1318n.re
5950            + -self.twiddle7.im * x1417n.re
5951            + self.twiddle8.im * x1516n.re;
5952
5953        let out1re = b130re_a - b130re_b;
5954        let out1im = b130im_a + b130im_b;
5955        let out2re = b229re_a - b229re_b;
5956        let out2im = b229im_a + b229im_b;
5957        let out3re = b328re_a - b328re_b;
5958        let out3im = b328im_a + b328im_b;
5959        let out4re = b427re_a - b427re_b;
5960        let out4im = b427im_a + b427im_b;
5961        let out5re = b526re_a - b526re_b;
5962        let out5im = b526im_a + b526im_b;
5963        let out6re = b625re_a - b625re_b;
5964        let out6im = b625im_a + b625im_b;
5965        let out7re = b724re_a - b724re_b;
5966        let out7im = b724im_a + b724im_b;
5967        let out8re = b823re_a - b823re_b;
5968        let out8im = b823im_a + b823im_b;
5969        let out9re = b922re_a - b922re_b;
5970        let out9im = b922im_a + b922im_b;
5971        let out10re = b1021re_a - b1021re_b;
5972        let out10im = b1021im_a + b1021im_b;
5973        let out11re = b1120re_a - b1120re_b;
5974        let out11im = b1120im_a + b1120im_b;
5975        let out12re = b1219re_a - b1219re_b;
5976        let out12im = b1219im_a + b1219im_b;
5977        let out13re = b1318re_a - b1318re_b;
5978        let out13im = b1318im_a + b1318im_b;
5979        let out14re = b1417re_a - b1417re_b;
5980        let out14im = b1417im_a + b1417im_b;
5981        let out15re = b1516re_a - b1516re_b;
5982        let out15im = b1516im_a + b1516im_b;
5983        let out16re = b1516re_a + b1516re_b;
5984        let out16im = b1516im_a - b1516im_b;
5985        let out17re = b1417re_a + b1417re_b;
5986        let out17im = b1417im_a - b1417im_b;
5987        let out18re = b1318re_a + b1318re_b;
5988        let out18im = b1318im_a - b1318im_b;
5989        let out19re = b1219re_a + b1219re_b;
5990        let out19im = b1219im_a - b1219im_b;
5991        let out20re = b1120re_a + b1120re_b;
5992        let out20im = b1120im_a - b1120im_b;
5993        let out21re = b1021re_a + b1021re_b;
5994        let out21im = b1021im_a - b1021im_b;
5995        let out22re = b922re_a + b922re_b;
5996        let out22im = b922im_a - b922im_b;
5997        let out23re = b823re_a + b823re_b;
5998        let out23im = b823im_a - b823im_b;
5999        let out24re = b724re_a + b724re_b;
6000        let out24im = b724im_a - b724im_b;
6001        let out25re = b625re_a + b625re_b;
6002        let out25im = b625im_a - b625im_b;
6003        let out26re = b526re_a + b526re_b;
6004        let out26im = b526im_a - b526im_b;
6005        let out27re = b427re_a + b427re_b;
6006        let out27im = b427im_a - b427im_b;
6007        let out28re = b328re_a + b328re_b;
6008        let out28im = b328im_a - b328im_b;
6009        let out29re = b229re_a + b229re_b;
6010        let out29im = b229im_a - b229im_b;
6011        let out30re = b130re_a + b130re_b;
6012        let out30im = b130im_a - b130im_b;
6013        buffer.store(sum, 0);
6014        buffer.store(
6015            Complex {
6016                re: out1re,
6017                im: out1im,
6018            },
6019            1,
6020        );
6021        buffer.store(
6022            Complex {
6023                re: out2re,
6024                im: out2im,
6025            },
6026            2,
6027        );
6028        buffer.store(
6029            Complex {
6030                re: out3re,
6031                im: out3im,
6032            },
6033            3,
6034        );
6035        buffer.store(
6036            Complex {
6037                re: out4re,
6038                im: out4im,
6039            },
6040            4,
6041        );
6042        buffer.store(
6043            Complex {
6044                re: out5re,
6045                im: out5im,
6046            },
6047            5,
6048        );
6049        buffer.store(
6050            Complex {
6051                re: out6re,
6052                im: out6im,
6053            },
6054            6,
6055        );
6056        buffer.store(
6057            Complex {
6058                re: out7re,
6059                im: out7im,
6060            },
6061            7,
6062        );
6063        buffer.store(
6064            Complex {
6065                re: out8re,
6066                im: out8im,
6067            },
6068            8,
6069        );
6070        buffer.store(
6071            Complex {
6072                re: out9re,
6073                im: out9im,
6074            },
6075            9,
6076        );
6077        buffer.store(
6078            Complex {
6079                re: out10re,
6080                im: out10im,
6081            },
6082            10,
6083        );
6084        buffer.store(
6085            Complex {
6086                re: out11re,
6087                im: out11im,
6088            },
6089            11,
6090        );
6091        buffer.store(
6092            Complex {
6093                re: out12re,
6094                im: out12im,
6095            },
6096            12,
6097        );
6098        buffer.store(
6099            Complex {
6100                re: out13re,
6101                im: out13im,
6102            },
6103            13,
6104        );
6105        buffer.store(
6106            Complex {
6107                re: out14re,
6108                im: out14im,
6109            },
6110            14,
6111        );
6112        buffer.store(
6113            Complex {
6114                re: out15re,
6115                im: out15im,
6116            },
6117            15,
6118        );
6119        buffer.store(
6120            Complex {
6121                re: out16re,
6122                im: out16im,
6123            },
6124            16,
6125        );
6126        buffer.store(
6127            Complex {
6128                re: out17re,
6129                im: out17im,
6130            },
6131            17,
6132        );
6133        buffer.store(
6134            Complex {
6135                re: out18re,
6136                im: out18im,
6137            },
6138            18,
6139        );
6140        buffer.store(
6141            Complex {
6142                re: out19re,
6143                im: out19im,
6144            },
6145            19,
6146        );
6147        buffer.store(
6148            Complex {
6149                re: out20re,
6150                im: out20im,
6151            },
6152            20,
6153        );
6154        buffer.store(
6155            Complex {
6156                re: out21re,
6157                im: out21im,
6158            },
6159            21,
6160        );
6161        buffer.store(
6162            Complex {
6163                re: out22re,
6164                im: out22im,
6165            },
6166            22,
6167        );
6168        buffer.store(
6169            Complex {
6170                re: out23re,
6171                im: out23im,
6172            },
6173            23,
6174        );
6175        buffer.store(
6176            Complex {
6177                re: out24re,
6178                im: out24im,
6179            },
6180            24,
6181        );
6182        buffer.store(
6183            Complex {
6184                re: out25re,
6185                im: out25im,
6186            },
6187            25,
6188        );
6189        buffer.store(
6190            Complex {
6191                re: out26re,
6192                im: out26im,
6193            },
6194            26,
6195        );
6196        buffer.store(
6197            Complex {
6198                re: out27re,
6199                im: out27im,
6200            },
6201            27,
6202        );
6203        buffer.store(
6204            Complex {
6205                re: out28re,
6206                im: out28im,
6207            },
6208            28,
6209        );
6210        buffer.store(
6211            Complex {
6212                re: out29re,
6213                im: out29im,
6214            },
6215            29,
6216        );
6217        buffer.store(
6218            Complex {
6219                re: out30re,
6220                im: out30im,
6221            },
6222            30,
6223        );
6224    }
6225}
6226pub struct Butterfly32<T> {
6227    butterfly16: Butterfly16<T>,
6228    butterfly8: Butterfly8<T>,
6229    twiddles: [Complex<T>; 7],
6230}
6231boilerplate_fft_butterfly!(Butterfly32, 32, |this: &Butterfly32<_>| this
6232    .butterfly8
6233    .fft_direction());
6234impl<T: FftNum> Butterfly32<T> {
6235    pub fn new(direction: FftDirection) -> Self {
6236        Self {
6237            butterfly16: Butterfly16::new(direction),
6238            butterfly8: Butterfly8::new(direction),
6239            twiddles: [
6240                twiddles::compute_twiddle(1, 32, direction),
6241                twiddles::compute_twiddle(2, 32, direction),
6242                twiddles::compute_twiddle(3, 32, direction),
6243                twiddles::compute_twiddle(4, 32, direction),
6244                twiddles::compute_twiddle(5, 32, direction),
6245                twiddles::compute_twiddle(6, 32, direction),
6246                twiddles::compute_twiddle(7, 32, direction),
6247            ],
6248        }
6249    }
6250
6251    #[inline(never)]
6252    unsafe fn perform_fft_contiguous(&self, mut buffer: impl LoadStore<T>) {
6253        // we're going to hardcode a step of split radix
6254        // step 1: copy and reorder the  input into the scratch
6255        let mut scratch_evens = [
6256            buffer.load(0),
6257            buffer.load(2),
6258            buffer.load(4),
6259            buffer.load(6),
6260            buffer.load(8),
6261            buffer.load(10),
6262            buffer.load(12),
6263            buffer.load(14),
6264            buffer.load(16),
6265            buffer.load(18),
6266            buffer.load(20),
6267            buffer.load(22),
6268            buffer.load(24),
6269            buffer.load(26),
6270            buffer.load(28),
6271            buffer.load(30),
6272        ];
6273
6274        let mut scratch_odds_n1 = [
6275            buffer.load(1),
6276            buffer.load(5),
6277            buffer.load(9),
6278            buffer.load(13),
6279            buffer.load(17),
6280            buffer.load(21),
6281            buffer.load(25),
6282            buffer.load(29),
6283        ];
6284        let mut scratch_odds_n3 = [
6285            buffer.load(31),
6286            buffer.load(3),
6287            buffer.load(7),
6288            buffer.load(11),
6289            buffer.load(15),
6290            buffer.load(19),
6291            buffer.load(23),
6292            buffer.load(27),
6293        ];
6294
6295        // step 2: column FFTs
6296        self.butterfly16.perform_fft_contiguous(&mut scratch_evens);
6297        self.butterfly8.perform_fft_contiguous(&mut scratch_odds_n1);
6298        self.butterfly8.perform_fft_contiguous(&mut scratch_odds_n3);
6299
6300        // step 3: apply twiddle factors
6301        scratch_odds_n1[1] = scratch_odds_n1[1] * self.twiddles[0];
6302        scratch_odds_n3[1] = scratch_odds_n3[1] * self.twiddles[0].conj();
6303
6304        scratch_odds_n1[2] = scratch_odds_n1[2] * self.twiddles[1];
6305        scratch_odds_n3[2] = scratch_odds_n3[2] * self.twiddles[1].conj();
6306
6307        scratch_odds_n1[3] = scratch_odds_n1[3] * self.twiddles[2];
6308        scratch_odds_n3[3] = scratch_odds_n3[3] * self.twiddles[2].conj();
6309
6310        scratch_odds_n1[4] = scratch_odds_n1[4] * self.twiddles[3];
6311        scratch_odds_n3[4] = scratch_odds_n3[4] * self.twiddles[3].conj();
6312
6313        scratch_odds_n1[5] = scratch_odds_n1[5] * self.twiddles[4];
6314        scratch_odds_n3[5] = scratch_odds_n3[5] * self.twiddles[4].conj();
6315
6316        scratch_odds_n1[6] = scratch_odds_n1[6] * self.twiddles[5];
6317        scratch_odds_n3[6] = scratch_odds_n3[6] * self.twiddles[5].conj();
6318
6319        scratch_odds_n1[7] = scratch_odds_n1[7] * self.twiddles[6];
6320        scratch_odds_n3[7] = scratch_odds_n3[7] * self.twiddles[6].conj();
6321
6322        // step 4: cross FFTs
6323        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[0], &mut scratch_odds_n3[0]);
6324        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[1], &mut scratch_odds_n3[1]);
6325        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[2], &mut scratch_odds_n3[2]);
6326        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[3], &mut scratch_odds_n3[3]);
6327        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[4], &mut scratch_odds_n3[4]);
6328        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[5], &mut scratch_odds_n3[5]);
6329        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[6], &mut scratch_odds_n3[6]);
6330        Butterfly2::perform_fft_strided(&mut scratch_odds_n1[7], &mut scratch_odds_n3[7]);
6331
6332        // apply the butterfly 4 twiddle factor, which is just a rotation
6333        scratch_odds_n3[0] = twiddles::rotate_90(scratch_odds_n3[0], self.fft_direction());
6334        scratch_odds_n3[1] = twiddles::rotate_90(scratch_odds_n3[1], self.fft_direction());
6335        scratch_odds_n3[2] = twiddles::rotate_90(scratch_odds_n3[2], self.fft_direction());
6336        scratch_odds_n3[3] = twiddles::rotate_90(scratch_odds_n3[3], self.fft_direction());
6337        scratch_odds_n3[4] = twiddles::rotate_90(scratch_odds_n3[4], self.fft_direction());
6338        scratch_odds_n3[5] = twiddles::rotate_90(scratch_odds_n3[5], self.fft_direction());
6339        scratch_odds_n3[6] = twiddles::rotate_90(scratch_odds_n3[6], self.fft_direction());
6340        scratch_odds_n3[7] = twiddles::rotate_90(scratch_odds_n3[7], self.fft_direction());
6341
6342        //step 5: copy/add/subtract data back to buffer
6343        buffer.store(scratch_evens[0] + scratch_odds_n1[0], 0);
6344        buffer.store(scratch_evens[1] + scratch_odds_n1[1], 1);
6345        buffer.store(scratch_evens[2] + scratch_odds_n1[2], 2);
6346        buffer.store(scratch_evens[3] + scratch_odds_n1[3], 3);
6347        buffer.store(scratch_evens[4] + scratch_odds_n1[4], 4);
6348        buffer.store(scratch_evens[5] + scratch_odds_n1[5], 5);
6349        buffer.store(scratch_evens[6] + scratch_odds_n1[6], 6);
6350        buffer.store(scratch_evens[7] + scratch_odds_n1[7], 7);
6351        buffer.store(scratch_evens[8] + scratch_odds_n3[0], 8);
6352        buffer.store(scratch_evens[9] + scratch_odds_n3[1], 9);
6353        buffer.store(scratch_evens[10] + scratch_odds_n3[2], 10);
6354        buffer.store(scratch_evens[11] + scratch_odds_n3[3], 11);
6355        buffer.store(scratch_evens[12] + scratch_odds_n3[4], 12);
6356        buffer.store(scratch_evens[13] + scratch_odds_n3[5], 13);
6357        buffer.store(scratch_evens[14] + scratch_odds_n3[6], 14);
6358        buffer.store(scratch_evens[15] + scratch_odds_n3[7], 15);
6359        buffer.store(scratch_evens[0] - scratch_odds_n1[0], 16);
6360        buffer.store(scratch_evens[1] - scratch_odds_n1[1], 17);
6361        buffer.store(scratch_evens[2] - scratch_odds_n1[2], 18);
6362        buffer.store(scratch_evens[3] - scratch_odds_n1[3], 19);
6363        buffer.store(scratch_evens[4] - scratch_odds_n1[4], 20);
6364        buffer.store(scratch_evens[5] - scratch_odds_n1[5], 21);
6365        buffer.store(scratch_evens[6] - scratch_odds_n1[6], 22);
6366        buffer.store(scratch_evens[7] - scratch_odds_n1[7], 23);
6367        buffer.store(scratch_evens[8] - scratch_odds_n3[0], 24);
6368        buffer.store(scratch_evens[9] - scratch_odds_n3[1], 25);
6369        buffer.store(scratch_evens[10] - scratch_odds_n3[2], 26);
6370        buffer.store(scratch_evens[11] - scratch_odds_n3[3], 27);
6371        buffer.store(scratch_evens[12] - scratch_odds_n3[4], 28);
6372        buffer.store(scratch_evens[13] - scratch_odds_n3[5], 29);
6373        buffer.store(scratch_evens[14] - scratch_odds_n3[6], 30);
6374        buffer.store(scratch_evens[15] - scratch_odds_n3[7], 31);
6375    }
6376}
6377
6378#[cfg(test)]
6379mod unit_tests {
6380    use super::*;
6381    use crate::test_utils::check_fft_algorithm;
6382
6383    //the tests for all butterflies will be identical except for the identifiers used and size
6384    //so it's ideal for a macro
6385    macro_rules! test_butterfly_func {
6386        ($test_name:ident, $struct_name:ident, $size:expr) => {
6387            #[test]
6388            fn $test_name() {
6389                let butterfly = $struct_name::new(FftDirection::Forward);
6390                check_fft_algorithm::<f32>(&butterfly, $size, FftDirection::Forward);
6391
6392                let butterfly_direction = $struct_name::new(FftDirection::Inverse);
6393                check_fft_algorithm::<f32>(&butterfly_direction, $size, FftDirection::Inverse);
6394            }
6395        };
6396    }
6397    test_butterfly_func!(test_butterfly2, Butterfly2, 2);
6398    test_butterfly_func!(test_butterfly3, Butterfly3, 3);
6399    test_butterfly_func!(test_butterfly4, Butterfly4, 4);
6400    test_butterfly_func!(test_butterfly5, Butterfly5, 5);
6401    test_butterfly_func!(test_butterfly6, Butterfly6, 6);
6402    test_butterfly_func!(test_butterfly7, Butterfly7, 7);
6403    test_butterfly_func!(test_butterfly8, Butterfly8, 8);
6404    test_butterfly_func!(test_butterfly9, Butterfly9, 9);
6405    test_butterfly_func!(test_butterfly11, Butterfly11, 11);
6406    test_butterfly_func!(test_butterfly12, Butterfly12, 12);
6407    test_butterfly_func!(test_butterfly13, Butterfly13, 13);
6408    test_butterfly_func!(test_butterfly16, Butterfly16, 16);
6409    test_butterfly_func!(test_butterfly17, Butterfly17, 17);
6410    test_butterfly_func!(test_butterfly19, Butterfly19, 19);
6411    test_butterfly_func!(test_butterfly23, Butterfly23, 23);
6412    test_butterfly_func!(test_butterfly24, Butterfly24, 24);
6413    test_butterfly_func!(test_butterfly27, Butterfly27, 27);
6414    test_butterfly_func!(test_butterfly29, Butterfly29, 29);
6415    test_butterfly_func!(test_butterfly31, Butterfly31, 31);
6416    test_butterfly_func!(test_butterfly32, Butterfly32, 32);
6417}