rustfft/sse/
sse_planner.rs

1use num_integer::gcd;
2use std::any::TypeId;
3use std::collections::HashMap;
4
5use std::sync::Arc;
6
7use crate::{common::FftNum, fft_cache::FftCache, FftDirection};
8
9use crate::algorithm::*;
10use crate::sse::sse_butterflies::*;
11use crate::sse::sse_prime_butterflies;
12use crate::sse::sse_radix4::*;
13use crate::Fft;
14
15use crate::math_utils::{PrimeFactor, PrimeFactors};
16
17const MIN_RADIX4_BITS: u32 = 6; // smallest size to consider radix 4 an option is 2^6 = 64
18const MAX_RADER_PRIME_FACTOR: usize = 23; // don't use Raders if the inner fft length has prime factor larger than this
19
20/// A Recipe is a structure that describes the design of a FFT, without actually creating it.
21/// It is used as a middle step in the planning process.
22#[derive(Debug, PartialEq, Clone)]
23pub enum Recipe {
24    Dft(usize),
25    MixedRadix {
26        left_fft: Arc<Recipe>,
27        right_fft: Arc<Recipe>,
28    },
29    #[allow(dead_code)]
30    GoodThomasAlgorithm {
31        left_fft: Arc<Recipe>,
32        right_fft: Arc<Recipe>,
33    },
34    MixedRadixSmall {
35        left_fft: Arc<Recipe>,
36        right_fft: Arc<Recipe>,
37    },
38    GoodThomasAlgorithmSmall {
39        left_fft: Arc<Recipe>,
40        right_fft: Arc<Recipe>,
41    },
42    RadersAlgorithm {
43        inner_fft: Arc<Recipe>,
44    },
45    BluesteinsAlgorithm {
46        len: usize,
47        inner_fft: Arc<Recipe>,
48    },
49    Radix4 {
50        k: u32,
51        base_fft: Arc<Recipe>,
52    },
53    Butterfly1,
54    Butterfly2,
55    Butterfly3,
56    Butterfly4,
57    Butterfly5,
58    Butterfly6,
59    Butterfly8,
60    Butterfly9,
61    Butterfly10,
62    Butterfly12,
63    Butterfly15,
64    Butterfly16,
65    Butterfly24,
66    Butterfly32,
67    PrimeButterfly {
68        len: usize,
69    },
70}
71
72impl Recipe {
73    pub fn len(&self) -> usize {
74        match self {
75            Recipe::Dft(length) => *length,
76            Recipe::Radix4 { k, base_fft } => base_fft.len() * (1 << (k * 2)),
77            Recipe::Butterfly1 => 1,
78            Recipe::Butterfly2 => 2,
79            Recipe::Butterfly3 => 3,
80            Recipe::Butterfly4 => 4,
81            Recipe::Butterfly5 => 5,
82            Recipe::Butterfly6 => 6,
83            Recipe::Butterfly8 => 8,
84            Recipe::Butterfly9 => 9,
85            Recipe::Butterfly10 => 10,
86            Recipe::Butterfly12 => 12,
87            Recipe::Butterfly15 => 15,
88            Recipe::Butterfly16 => 16,
89            Recipe::Butterfly24 => 24,
90            Recipe::Butterfly32 => 32,
91            Recipe::PrimeButterfly { len } => *len,
92            Recipe::MixedRadix {
93                left_fft,
94                right_fft,
95            } => left_fft.len() * right_fft.len(),
96            Recipe::GoodThomasAlgorithm {
97                left_fft,
98                right_fft,
99            } => left_fft.len() * right_fft.len(),
100            Recipe::MixedRadixSmall {
101                left_fft,
102                right_fft,
103            } => left_fft.len() * right_fft.len(),
104            Recipe::GoodThomasAlgorithmSmall {
105                left_fft,
106                right_fft,
107            } => left_fft.len() * right_fft.len(),
108            Recipe::RadersAlgorithm { inner_fft } => inner_fft.len() + 1,
109            Recipe::BluesteinsAlgorithm { len, .. } => *len,
110        }
111    }
112}
113
114/// The SSE FFT planner creates new FFT algorithm instances using a mix of scalar and SSE accelerated algorithms.
115/// It requires at least SSE4.1, which is available on all reasonably recent x86_64 cpus.
116///
117/// RustFFT has several FFT algorithms available. For a given FFT size, the `FftPlannerSse` decides which of the
118/// available FFT algorithms to use and then initializes them.
119///
120/// ~~~
121/// // Perform a forward Fft of size 1234
122/// use std::sync::Arc;
123/// use rustfft::{FftPlannerSse, num_complex::Complex};
124///
125/// if let Ok(mut planner) = FftPlannerSse::new() {
126///   let fft = planner.plan_fft_forward(1234);
127///
128///   let mut buffer = vec![Complex{ re: 0.0f32, im: 0.0f32 }; 1234];
129///   fft.process(&mut buffer);
130///
131///   // The FFT instance returned by the planner has the type `Arc<dyn Fft<T>>`,
132///   // where T is the numeric type, ie f32 or f64, so it's cheap to clone
133///   let fft_clone = Arc::clone(&fft);
134/// }
135/// ~~~
136///
137/// If you plan on creating multiple FFT instances, it is recommended to re-use the same planner for all of them. This
138/// is because the planner re-uses internal data across FFT instances wherever possible, saving memory and reducing
139/// setup time. (FFT instances created with one planner will never re-use data and buffers with FFT instances created
140/// by a different planner)
141///
142/// Each FFT instance owns [`Arc`s](std::sync::Arc) to its internal data, rather than borrowing it from the planner, so it's perfectly
143/// safe to drop the planner after creating Fft instances.
144pub struct FftPlannerSse<T: FftNum> {
145    algorithm_cache: FftCache<T>,
146    recipe_cache: HashMap<usize, Arc<Recipe>>,
147    all_butterflies: Box<[usize]>,
148}
149
150impl<T: FftNum> FftPlannerSse<T> {
151    /// Creates a new `FftPlannerSse` instance.
152    ///
153    /// Returns `Ok(planner_instance)` if we're compiling for X86_64, SSE support was enabled in feature flags, and the current CPU supports the `sse4.1` CPU feature.
154    /// Returns `Err(())` if SSE support is not available.
155    pub fn new() -> Result<Self, ()> {
156        if is_x86_feature_detected!("sse4.1") {
157            // Ideally, we would implement the planner with specialization.
158            // Specialization won't be on stable rust for a long time though, so in the meantime, we can hack around it.
159            //
160            // We use TypeID to determine if T is f32, f64, or neither. If neither, we don't want to do any SSE acceleration
161            // If it's f32 or f64, then construct and return a SSE planner instance.
162            //
163            // All SSE accelerated algorithms come in separate versions for f32 and f64. The type is checked when a new one is created, and if it does not
164            // match the type the FFT is meant for, it will panic. This will never be a problem if using a planner to construct the FFTs.
165            //
166            // An annoying snag with this setup is that we frequently have to transmute buffers from &mut [Complex<T>] to &mut [Complex<f32 or f64>] or vice versa.
167            // We know this is safe because we assert everywhere that Type(f32 or f64)==Type(T), so it's just a matter of "doing it right" every time.
168            // These transmutes are required because the FFT algorithm's input will come through the FFT trait, which may only be bounded by FftNum.
169            // So the buffers will have the type &mut [Complex<T>].
170            let id_f32 = TypeId::of::<f32>();
171            let id_f64 = TypeId::of::<f64>();
172            let id_t = TypeId::of::<T>();
173
174            if id_t == id_f32 || id_t == id_f64 {
175                let hand_butterflies: [usize; 13] = [2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 24, 32];
176                let prime_butterflies = sse_prime_butterflies::prime_butterfly_lens();
177                let mut all_butterflies: Box<[usize]> = hand_butterflies
178                    .iter()
179                    .chain(prime_butterflies.iter())
180                    .copied()
181                    .collect();
182                all_butterflies.sort();
183
184                // make sure we didn't get any duplicate butterflies from multiple sources
185                // since it's sorted, any duplicates will be adjacent
186                for window in all_butterflies.windows(2) {
187                    assert_ne!(window[0], window[1]);
188                }
189
190                return Ok(Self {
191                    algorithm_cache: FftCache::new(),
192                    recipe_cache: HashMap::new(),
193                    all_butterflies,
194                });
195            }
196        }
197        Err(())
198    }
199
200    /// Returns a `Fft` instance which uses SSE4.1 instructions to compute FFTs of size `len`.
201    ///
202    /// If the provided `direction` is `FftDirection::Forward`, the returned instance will compute forward FFTs. If it's `FftDirection::Inverse`, it will compute inverse FFTs.
203    ///
204    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
205    pub fn plan_fft(&mut self, len: usize, direction: FftDirection) -> Arc<dyn Fft<T>> {
206        // Step 1: Create a "recipe" for this FFT, which will tell us exactly which combination of algorithms to use
207        let recipe = self.design_fft_for_len(len);
208
209        // Step 2: Use our recipe to construct a Fft trait object
210        self.build_fft(&recipe, direction)
211    }
212
213    /// Returns a `Fft` instance which uses SSE4.1 instructions to compute forward FFTs of size `len`
214    ///
215    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
216    pub fn plan_fft_forward(&mut self, len: usize) -> Arc<dyn Fft<T>> {
217        self.plan_fft(len, FftDirection::Forward)
218    }
219
220    /// Returns a `Fft` instance which uses SSE4.1 instructions to compute inverse FFTs of size `len.
221    ///
222    /// If this is called multiple times, the planner will attempt to re-use internal data between calls, reducing memory usage and FFT initialization time.
223    pub fn plan_fft_inverse(&mut self, len: usize) -> Arc<dyn Fft<T>> {
224        self.plan_fft(len, FftDirection::Inverse)
225    }
226
227    // Make a recipe for a length
228    fn design_fft_for_len(&mut self, len: usize) -> Arc<Recipe> {
229        if len < 1 {
230            Arc::new(Recipe::Dft(len))
231        } else if let Some(recipe) = self.recipe_cache.get(&len) {
232            Arc::clone(&recipe)
233        } else {
234            let factors = PrimeFactors::compute(len);
235            let recipe = self.design_fft_with_factors(len, factors);
236            self.recipe_cache.insert(len, Arc::clone(&recipe));
237            recipe
238        }
239    }
240
241    // Create the fft from a recipe, take from cache if possible
242    fn build_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>> {
243        let len = recipe.len();
244        if let Some(instance) = self.algorithm_cache.get(len, direction) {
245            instance
246        } else {
247            let fft = self.build_new_fft(recipe, direction);
248            self.algorithm_cache.insert(&fft);
249            fft
250        }
251    }
252
253    // Create a new fft from a recipe
254    fn build_new_fft(&mut self, recipe: &Recipe, direction: FftDirection) -> Arc<dyn Fft<T>> {
255        let id_f32 = TypeId::of::<f32>();
256        let id_f64 = TypeId::of::<f64>();
257        let id_t = TypeId::of::<T>();
258
259        match recipe {
260            Recipe::Dft(len) => Arc::new(Dft::new(*len, direction)) as Arc<dyn Fft<T>>,
261            Recipe::Radix4 { k, base_fft } => {
262                let base_fft = self.build_fft(&base_fft, direction);
263                if id_t == id_f32 {
264                    Arc::new(SseRadix4::<f32, T>::new(*k, base_fft).unwrap()) as Arc<dyn Fft<T>>
265                } else if id_t == id_f64 {
266                    Arc::new(SseRadix4::<f64, T>::new(*k, base_fft).unwrap()) as Arc<dyn Fft<T>>
267                } else {
268                    panic!("Not f32 or f64");
269                }
270            }
271            Recipe::Butterfly1 => {
272                if id_t == id_f32 {
273                    Arc::new(SseF32Butterfly1::new(direction)) as Arc<dyn Fft<T>>
274                } else if id_t == id_f64 {
275                    Arc::new(SseF64Butterfly1::new(direction)) as Arc<dyn Fft<T>>
276                } else {
277                    panic!("Not f32 or f64");
278                }
279            }
280            Recipe::Butterfly2 => {
281                if id_t == id_f32 {
282                    Arc::new(SseF32Butterfly2::new(direction)) as Arc<dyn Fft<T>>
283                } else if id_t == id_f64 {
284                    Arc::new(SseF64Butterfly2::new(direction)) as Arc<dyn Fft<T>>
285                } else {
286                    panic!("Not f32 or f64");
287                }
288            }
289            Recipe::Butterfly3 => {
290                if id_t == id_f32 {
291                    Arc::new(SseF32Butterfly3::new(direction)) as Arc<dyn Fft<T>>
292                } else if id_t == id_f64 {
293                    Arc::new(SseF64Butterfly3::new(direction)) as Arc<dyn Fft<T>>
294                } else {
295                    panic!("Not f32 or f64");
296                }
297            }
298            Recipe::Butterfly4 => {
299                if id_t == id_f32 {
300                    Arc::new(SseF32Butterfly4::new(direction)) as Arc<dyn Fft<T>>
301                } else if id_t == id_f64 {
302                    Arc::new(SseF64Butterfly4::new(direction)) as Arc<dyn Fft<T>>
303                } else {
304                    panic!("Not f32 or f64");
305                }
306            }
307            Recipe::Butterfly5 => {
308                if id_t == id_f32 {
309                    Arc::new(SseF32Butterfly5::new(direction)) as Arc<dyn Fft<T>>
310                } else if id_t == id_f64 {
311                    Arc::new(SseF64Butterfly5::new(direction)) as Arc<dyn Fft<T>>
312                } else {
313                    panic!("Not f32 or f64");
314                }
315            }
316            Recipe::Butterfly6 => {
317                if id_t == id_f32 {
318                    Arc::new(SseF32Butterfly6::new(direction)) as Arc<dyn Fft<T>>
319                } else if id_t == id_f64 {
320                    Arc::new(SseF64Butterfly6::new(direction)) as Arc<dyn Fft<T>>
321                } else {
322                    panic!("Not f32 or f64");
323                }
324            }
325            Recipe::Butterfly8 => {
326                if id_t == id_f32 {
327                    Arc::new(SseF32Butterfly8::new(direction)) as Arc<dyn Fft<T>>
328                } else if id_t == id_f64 {
329                    Arc::new(SseF64Butterfly8::new(direction)) as Arc<dyn Fft<T>>
330                } else {
331                    panic!("Not f32 or f64");
332                }
333            }
334            Recipe::Butterfly9 => {
335                if id_t == id_f32 {
336                    Arc::new(SseF32Butterfly9::new(direction)) as Arc<dyn Fft<T>>
337                } else if id_t == id_f64 {
338                    Arc::new(SseF64Butterfly9::new(direction)) as Arc<dyn Fft<T>>
339                } else {
340                    panic!("Not f32 or f64");
341                }
342            }
343            Recipe::Butterfly10 => {
344                if id_t == id_f32 {
345                    Arc::new(SseF32Butterfly10::new(direction)) as Arc<dyn Fft<T>>
346                } else if id_t == id_f64 {
347                    Arc::new(SseF64Butterfly10::new(direction)) as Arc<dyn Fft<T>>
348                } else {
349                    panic!("Not f32 or f64");
350                }
351            }
352            Recipe::Butterfly12 => {
353                if id_t == id_f32 {
354                    Arc::new(SseF32Butterfly12::new(direction)) as Arc<dyn Fft<T>>
355                } else if id_t == id_f64 {
356                    Arc::new(SseF64Butterfly12::new(direction)) as Arc<dyn Fft<T>>
357                } else {
358                    panic!("Not f32 or f64");
359                }
360            }
361            Recipe::Butterfly15 => {
362                if id_t == id_f32 {
363                    Arc::new(SseF32Butterfly15::new(direction)) as Arc<dyn Fft<T>>
364                } else if id_t == id_f64 {
365                    Arc::new(SseF64Butterfly15::new(direction)) as Arc<dyn Fft<T>>
366                } else {
367                    panic!("Not f32 or f64");
368                }
369            }
370            Recipe::Butterfly16 => {
371                if id_t == id_f32 {
372                    Arc::new(SseF32Butterfly16::new(direction)) as Arc<dyn Fft<T>>
373                } else if id_t == id_f64 {
374                    Arc::new(SseF64Butterfly16::new(direction)) as Arc<dyn Fft<T>>
375                } else {
376                    panic!("Not f32 or f64");
377                }
378            }
379            Recipe::Butterfly24 => {
380                if id_t == id_f32 {
381                    Arc::new(SseF32Butterfly24::new(direction)) as Arc<dyn Fft<T>>
382                } else if id_t == id_f64 {
383                    Arc::new(SseF64Butterfly24::new(direction)) as Arc<dyn Fft<T>>
384                } else {
385                    panic!("Not f32 or f64");
386                }
387            }
388            Recipe::Butterfly32 => {
389                if id_t == id_f32 {
390                    Arc::new(SseF32Butterfly32::new(direction)) as Arc<dyn Fft<T>>
391                } else if id_t == id_f64 {
392                    Arc::new(SseF64Butterfly32::new(direction)) as Arc<dyn Fft<T>>
393                } else {
394                    panic!("Not f32 or f64");
395                }
396            }
397            Recipe::PrimeButterfly { len } => {
398                // Safety: construct_prime_butterfly() requires the sse4.1 instruction set, and we checked that in the constructor
399                unsafe { sse_prime_butterflies::construct_prime_butterfly(*len, direction) }
400            }
401            Recipe::MixedRadix {
402                left_fft,
403                right_fft,
404            } => {
405                let left_fft = self.build_fft(&left_fft, direction);
406                let right_fft = self.build_fft(&right_fft, direction);
407                Arc::new(MixedRadix::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
408            }
409            Recipe::GoodThomasAlgorithm {
410                left_fft,
411                right_fft,
412            } => {
413                let left_fft = self.build_fft(&left_fft, direction);
414                let right_fft = self.build_fft(&right_fft, direction);
415                Arc::new(GoodThomasAlgorithm::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
416            }
417            Recipe::MixedRadixSmall {
418                left_fft,
419                right_fft,
420            } => {
421                let left_fft = self.build_fft(&left_fft, direction);
422                let right_fft = self.build_fft(&right_fft, direction);
423                Arc::new(MixedRadixSmall::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
424            }
425            Recipe::GoodThomasAlgorithmSmall {
426                left_fft,
427                right_fft,
428            } => {
429                let left_fft = self.build_fft(&left_fft, direction);
430                let right_fft = self.build_fft(&right_fft, direction);
431                Arc::new(GoodThomasAlgorithmSmall::new(left_fft, right_fft)) as Arc<dyn Fft<T>>
432            }
433            Recipe::RadersAlgorithm { inner_fft } => {
434                let inner_fft = self.build_fft(&inner_fft, direction);
435                Arc::new(RadersAlgorithm::new(inner_fft)) as Arc<dyn Fft<T>>
436            }
437            Recipe::BluesteinsAlgorithm { len, inner_fft } => {
438                let inner_fft = self.build_fft(&inner_fft, direction);
439                Arc::new(BluesteinsAlgorithm::new(*len, inner_fft)) as Arc<dyn Fft<T>>
440            }
441        }
442    }
443
444    fn design_fft_with_factors(&mut self, len: usize, factors: PrimeFactors) -> Arc<Recipe> {
445        if let Some(fft_instance) = self.design_butterfly_algorithm(len) {
446            fft_instance
447        } else if factors.is_prime() {
448            self.design_prime(len)
449        } else if len.trailing_zeros() >= MIN_RADIX4_BITS {
450            if factors.get_other_factors().is_empty() && factors.get_power_of_three() < 2 {
451                self.design_radix4(factors)
452            } else {
453                let non_power_of_two = factors
454                    .remove_factors(PrimeFactor {
455                        value: 2,
456                        count: len.trailing_zeros(),
457                    })
458                    .unwrap();
459                let power_of_two = PrimeFactors::compute(1 << len.trailing_zeros());
460                self.design_mixed_radix(power_of_two, non_power_of_two)
461            }
462        } else {
463            // Can we do this as a mixed radix with just two butterflies?
464            // Loop through and find all combinations
465            // If more than one is found, keep the one where the factors are closer together.
466            // For example length 20 where 10x2 and 5x4 are possible, we use 5x4.
467            let mut bf_left = 0;
468            let mut bf_right = 0;
469            // If the length is below 14, or over 1024 we don't need to try this.
470            if len > 13 && len <= 1024 {
471                for (n, bf_l) in self.all_butterflies.iter().enumerate() {
472                    if len % bf_l == 0 {
473                        let bf_r = len / bf_l;
474                        if self.all_butterflies.iter().skip(n).any(|&m| m == bf_r) {
475                            bf_left = *bf_l;
476                            bf_right = bf_r;
477                        }
478                    }
479                }
480                if bf_left > 0 {
481                    let fact_l = PrimeFactors::compute(bf_left);
482                    let fact_r = PrimeFactors::compute(bf_right);
483                    return self.design_mixed_radix(fact_l, fact_r);
484                }
485            }
486            // Not possible with just butterflies, go with the general solution.
487            let (left_factors, right_factors) = factors.partition_factors();
488            self.design_mixed_radix(left_factors, right_factors)
489        }
490    }
491
492    fn design_mixed_radix(
493        &mut self,
494        left_factors: PrimeFactors,
495        right_factors: PrimeFactors,
496    ) -> Arc<Recipe> {
497        let left_len = left_factors.get_product();
498        let right_len = right_factors.get_product();
499
500        //neither size is a butterfly, so go with the normal algorithm
501        let left_fft = self.design_fft_with_factors(left_len, left_factors);
502        let right_fft = self.design_fft_with_factors(right_len, right_factors);
503
504        //if both left_len and right_len are small, use algorithms optimized for small FFTs
505        if left_len < 33 && right_len < 33 {
506            // for small FFTs, if gcd is 1, good-thomas is faster
507            if gcd(left_len, right_len) == 1 {
508                Arc::new(Recipe::GoodThomasAlgorithmSmall {
509                    left_fft,
510                    right_fft,
511                })
512            } else {
513                Arc::new(Recipe::MixedRadixSmall {
514                    left_fft,
515                    right_fft,
516                })
517            }
518        } else {
519            Arc::new(Recipe::MixedRadix {
520                left_fft,
521                right_fft,
522            })
523        }
524    }
525
526    // Returns Some(instance) if we have a butterfly available for this size. Returns None if there is no butterfly available for this size
527    fn design_butterfly_algorithm(&mut self, len: usize) -> Option<Arc<Recipe>> {
528        match len {
529            1 => Some(Arc::new(Recipe::Butterfly1)),
530            2 => Some(Arc::new(Recipe::Butterfly2)),
531            3 => Some(Arc::new(Recipe::Butterfly3)),
532            4 => Some(Arc::new(Recipe::Butterfly4)),
533            5 => Some(Arc::new(Recipe::Butterfly5)),
534            6 => Some(Arc::new(Recipe::Butterfly6)),
535            8 => Some(Arc::new(Recipe::Butterfly8)),
536            9 => Some(Arc::new(Recipe::Butterfly9)),
537            10 => Some(Arc::new(Recipe::Butterfly10)),
538            12 => Some(Arc::new(Recipe::Butterfly12)),
539            15 => Some(Arc::new(Recipe::Butterfly15)),
540            16 => Some(Arc::new(Recipe::Butterfly16)),
541            24 => Some(Arc::new(Recipe::Butterfly24)),
542            32 => Some(Arc::new(Recipe::Butterfly32)),
543            _ => {
544                if sse_prime_butterflies::prime_butterfly_lens().contains(&len) {
545                    Some(Arc::new(Recipe::PrimeButterfly { len }))
546                } else {
547                    None
548                }
549            }
550        }
551    }
552
553    fn design_prime(&mut self, len: usize) -> Arc<Recipe> {
554        let inner_fft_len_rader = len - 1;
555        let raders_factors = PrimeFactors::compute(inner_fft_len_rader);
556        // If any of the prime factors is too large, Rader's gets slow and Bluestein's is the better choice
557        if raders_factors
558            .get_other_factors()
559            .iter()
560            .any(|val| val.value > MAX_RADER_PRIME_FACTOR)
561        {
562            // we want to use bluestein's algorithm. we have a free choice of which inner FFT length to use
563            // the only restriction is that it has to be (2 * len - 1) or larger. So we want the fastest FFT we can compute at or above that size.
564
565            // the most obvious choice is the next-highest power of two, but there's one trick we can pull to get a smaller fft that we can be 100% certain will be faster
566            let min_inner_len = 2 * len - 1;
567            let inner_len_pow2 = min_inner_len.checked_next_power_of_two().unwrap();
568            let inner_len_factor3 = inner_len_pow2 / 4 * 3;
569
570            let inner_len = if inner_len_factor3 >= min_inner_len {
571                inner_len_factor3
572            } else {
573                inner_len_pow2
574            };
575            let inner_fft = self.design_fft_for_len(inner_len);
576
577            Arc::new(Recipe::BluesteinsAlgorithm { len, inner_fft })
578        } else {
579            let inner_fft = self.design_fft_with_factors(inner_fft_len_rader, raders_factors);
580            Arc::new(Recipe::RadersAlgorithm { inner_fft })
581        }
582    }
583
584    fn design_radix4(&mut self, factors: PrimeFactors) -> Arc<Recipe> {
585        // We can eventually relax this restriction -- it's not instrinsic to radix4, it's just that anything besides 2^n and 3*2^n hasn't been measured yet
586        assert!(factors.get_other_factors().is_empty() && factors.get_power_of_three() < 2);
587
588        let p2 = factors.get_power_of_two();
589        let base_len: usize = if factors.get_power_of_three() == 0 {
590            // pure power of 2
591            match p2 {
592                // base cases. we shouldn't hit these but we might as well be ready for them
593                0 => 1,
594                1 => 2,
595                2 => 4,
596                3 => 8,
597                // main case: if len is a power of 4, use a base of 16, otherwise use a base of 32
598                _ => {
599                    if p2 % 2 == 1 {
600                        32
601                    } else {
602                        16
603                    }
604                }
605            }
606        } else {
607            // we have a factor 3 that we're going to stick into the butterflies
608            match p2 {
609                // base cases. we shouldn't hit these but we might as well be ready for them
610                0 => 3,
611                1 => 6,
612                // main case: if len is 3*4^k, use a base of 12, otherwise use a base of 24
613                _ => {
614                    if p2 % 2 == 1 {
615                        24
616                    } else {
617                        12
618                    }
619                }
620            }
621        };
622
623        // now that we know the base length, divide it out get what radix4 needs to compute
624        let cross_len = factors.get_product() / base_len;
625        assert!(cross_len.is_power_of_two());
626
627        let cross_bits = cross_len.trailing_zeros();
628        assert!(cross_bits % 2 == 0);
629        let k = cross_bits / 2;
630
631        let base_fft = self.design_fft_for_len(base_len);
632        Arc::new(Recipe::Radix4 { k, base_fft })
633    }
634}
635
636#[cfg(test)]
637mod unit_tests {
638    use super::*;
639
640    fn is_mixedradix(plan: &Recipe) -> bool {
641        match plan {
642            &Recipe::MixedRadix { .. } => true,
643            _ => false,
644        }
645    }
646
647    fn is_mixedradixsmall(plan: &Recipe) -> bool {
648        match plan {
649            &Recipe::MixedRadixSmall { .. } => true,
650            _ => false,
651        }
652    }
653
654    fn is_goodthomassmall(plan: &Recipe) -> bool {
655        match plan {
656            &Recipe::GoodThomasAlgorithmSmall { .. } => true,
657            _ => false,
658        }
659    }
660
661    fn is_raders(plan: &Recipe) -> bool {
662        match plan {
663            &Recipe::RadersAlgorithm { .. } => true,
664            _ => false,
665        }
666    }
667
668    fn is_bluesteins(plan: &Recipe) -> bool {
669        match plan {
670            &Recipe::BluesteinsAlgorithm { .. } => true,
671            _ => false,
672        }
673    }
674
675    #[test]
676    fn test_plan_sse_trivial() {
677        // Length 0 and 1 should use Dft
678        let mut planner = FftPlannerSse::<f64>::new().unwrap();
679        for len in 0..1 {
680            let plan = planner.design_fft_for_len(len);
681            assert_eq!(*plan, Recipe::Dft(len));
682            assert_eq!(plan.len(), len, "Recipe reports wrong length");
683        }
684    }
685
686    #[test]
687    fn test_plan_sse_largepoweroftwo() {
688        // Powers of 2 above 6 should use Radix4
689        let mut planner = FftPlannerSse::<f64>::new().unwrap();
690        for pow in 6..32 {
691            let len = 1 << pow;
692            let plan = planner.design_fft_for_len(len);
693            assert!(matches!(*plan, Recipe::Radix4 { k: _, base_fft: _ }));
694            assert_eq!(plan.len(), len, "Recipe reports wrong length");
695        }
696    }
697
698    #[test]
699    fn test_plan_sse_butterflies() {
700        // Check that all butterflies are used
701        let mut planner = FftPlannerSse::<f64>::new().unwrap();
702        assert_eq!(*planner.design_fft_for_len(2), Recipe::Butterfly2);
703        assert_eq!(*planner.design_fft_for_len(3), Recipe::Butterfly3);
704        assert_eq!(*planner.design_fft_for_len(4), Recipe::Butterfly4);
705        assert_eq!(*planner.design_fft_for_len(5), Recipe::Butterfly5);
706        assert_eq!(*planner.design_fft_for_len(6), Recipe::Butterfly6);
707        assert_eq!(*planner.design_fft_for_len(8), Recipe::Butterfly8);
708        assert_eq!(*planner.design_fft_for_len(9), Recipe::Butterfly9);
709        assert_eq!(*planner.design_fft_for_len(10), Recipe::Butterfly10);
710        assert_eq!(*planner.design_fft_for_len(12), Recipe::Butterfly12);
711        assert_eq!(*planner.design_fft_for_len(15), Recipe::Butterfly15);
712        assert_eq!(*planner.design_fft_for_len(16), Recipe::Butterfly16);
713        assert_eq!(*planner.design_fft_for_len(24), Recipe::Butterfly24);
714        assert_eq!(*planner.design_fft_for_len(32), Recipe::Butterfly32);
715        for len in sse_prime_butterflies::prime_butterfly_lens() {
716            assert_eq!(
717                *planner.design_fft_for_len(*len),
718                Recipe::PrimeButterfly { len: *len }
719            );
720        }
721    }
722
723    #[test]
724    fn test_plan_sse_mixedradix() {
725        // Products of several different primes should become MixedRadix
726        let mut planner = FftPlannerSse::<f64>::new().unwrap();
727        for pow2 in 2..5 {
728            for pow3 in 2..5 {
729                for pow5 in 2..5 {
730                    for pow7 in 2..5 {
731                        let len = 2usize.pow(pow2)
732                            * 3usize.pow(pow3)
733                            * 5usize.pow(pow5)
734                            * 7usize.pow(pow7);
735                        let plan = planner.design_fft_for_len(len);
736                        assert!(is_mixedradix(&plan), "Expected MixedRadix, got {:?}", plan);
737                        assert_eq!(plan.len(), len, "Recipe reports wrong length");
738                    }
739                }
740            }
741        }
742    }
743
744    #[test]
745    fn test_plan_sse_mixedradixsmall() {
746        // Products of two "small" lengths < 31 that have a common divisor >1, and isn't a power of 2 should be MixedRadixSmall
747        let mut planner = FftPlannerSse::<f64>::new().unwrap();
748        for len in [5 * 20, 5 * 25].iter() {
749            let plan = planner.design_fft_for_len(*len);
750            assert!(
751                is_mixedradixsmall(&plan),
752                "Expected MixedRadixSmall, got {:?}",
753                plan
754            );
755            assert_eq!(plan.len(), *len, "Recipe reports wrong length");
756        }
757    }
758
759    #[test]
760    fn test_plan_sse_goodthomasbutterfly() {
761        let mut planner = FftPlannerSse::<f64>::new().unwrap();
762        for len in [3 * 7, 5 * 7, 11 * 13, 2 * 29].iter() {
763            let plan = planner.design_fft_for_len(*len);
764            assert!(
765                is_goodthomassmall(&plan),
766                "Expected GoodThomasAlgorithmSmall, got {:?}",
767                plan
768            );
769            assert_eq!(plan.len(), *len, "Recipe reports wrong length");
770        }
771    }
772
773    #[test]
774    fn test_plan_sse_bluestein_vs_rader() {
775        let difficultprimes: [usize; 11] = [59, 83, 107, 149, 167, 173, 179, 359, 719, 1439, 2879];
776        let easyprimes: [usize; 24] = [
777            53, 61, 67, 71, 73, 79, 89, 97, 101, 103, 109, 113, 127, 131, 137, 139, 151, 157, 163,
778            181, 191, 193, 197, 199,
779        ];
780
781        let mut planner = FftPlannerSse::<f64>::new().unwrap();
782        for len in difficultprimes.iter() {
783            let plan = planner.design_fft_for_len(*len);
784            assert!(
785                is_bluesteins(&plan),
786                "Expected BluesteinsAlgorithm, got {:?}",
787                plan
788            );
789            assert_eq!(plan.len(), *len, "Recipe reports wrong length");
790        }
791        for len in easyprimes.iter() {
792            let plan = planner.design_fft_for_len(*len);
793            assert!(is_raders(&plan), "Expected RadersAlgorithm, got {:?}", plan);
794            assert_eq!(plan.len(), *len, "Recipe reports wrong length");
795        }
796    }
797
798    #[test]
799    fn test_sse_fft_cache() {
800        {
801            // Check that FFTs are reused if they're both forward
802            let mut planner = FftPlannerSse::<f64>::new().unwrap();
803            let fft_a = planner.plan_fft(1234, FftDirection::Forward);
804            let fft_b = planner.plan_fft(1234, FftDirection::Forward);
805            assert!(Arc::ptr_eq(&fft_a, &fft_b), "Existing fft was not reused");
806        }
807        {
808            // Check that FFTs are reused if they're both inverse
809            let mut planner = FftPlannerSse::<f64>::new().unwrap();
810            let fft_a = planner.plan_fft(1234, FftDirection::Inverse);
811            let fft_b = planner.plan_fft(1234, FftDirection::Inverse);
812            assert!(Arc::ptr_eq(&fft_a, &fft_b), "Existing fft was not reused");
813        }
814        {
815            // Check that FFTs are NOT resued if they don't both have the same direction
816            let mut planner = FftPlannerSse::<f64>::new().unwrap();
817            let fft_a = planner.plan_fft(1234, FftDirection::Forward);
818            let fft_b = planner.plan_fft(1234, FftDirection::Inverse);
819            assert!(
820                !Arc::ptr_eq(&fft_a, &fft_b),
821                "Existing fft was reused, even though directions don't match"
822            );
823        }
824    }
825
826    #[test]
827    fn test_sse_recipe_cache() {
828        // Check that all butterflies are used
829        let mut planner = FftPlannerSse::<f64>::new().unwrap();
830        let fft_a = planner.design_fft_for_len(1234);
831        let fft_b = planner.design_fft_for_len(1234);
832        assert!(
833            Arc::ptr_eq(&fft_a, &fft_b),
834            "Existing recipe was not reused"
835        );
836    }
837}