polars_arrow/legacy/kernels/take_agg/
var.rs

1use super::*;
2
3/// Numerical stable online variance aggregation.
4///
5/// See:
6/// Welford, B. P. (1962). "Note on a method for calculating corrected sums of squares and products".
7/// Technometrics. 4 (3): 419–420. doi:10.2307/1266577. JSTOR 1266577.
8/// and:
9/// Ling, Robert F. (1974). "Comparison of Several Algorithms for Computing Sample Means and Variances".
10/// Journal of the American Statistical Association. 69 (348): 859–866. doi:10.2307/2286154. JSTOR 2286154.
11pub fn online_variance<I>(
12    // iterator producing values
13    iter: I,
14    ddof: u8,
15) -> Option<f64>
16where
17    I: IntoIterator<Item = f64>,
18{
19    let mut m2 = 0.0;
20    let mut mean = 0.0;
21    let mut count = 0u64;
22
23    for value in iter {
24        let new_count = count + 1;
25        let delta_1 = value - mean;
26        let new_mean = delta_1 / new_count as f64 + mean;
27        let delta_2 = value - new_mean;
28        let new_m2 = m2 + delta_1 * delta_2;
29
30        count += 1;
31        mean = new_mean;
32        m2 = new_m2;
33    }
34
35    if count <= ddof as u64 {
36        return None;
37    }
38
39    Some(m2 / (count as f64 - ddof as f64))
40}
41
42/// Take kernel for single chunk and an iterator as index.
43/// # Safety
44/// caller must ensure iterators indexes are in bounds
45pub unsafe fn take_var_no_null_primitive_iter_unchecked<T, I>(
46    arr: &PrimitiveArray<T>,
47    indices: I,
48    ddof: u8,
49) -> Option<f64>
50where
51    T: NativeType + ToPrimitive,
52    I: IntoIterator<Item = usize>,
53{
54    debug_assert!(arr.null_count() == 0);
55    let array_values = arr.values().as_slice();
56    let iter = unsafe {
57        indices.into_iter().map(|idx| {
58            let value = *array_values.get_unchecked(idx);
59            value.to_f64().unwrap_unchecked()
60        })
61    };
62    online_variance(iter, ddof)
63}
64
65/// Take kernel for single chunk and an iterator as index.
66/// # Safety
67/// caller must ensure iterators indexes are in bounds
68pub unsafe fn take_var_nulls_primitive_iter_unchecked<T, I>(
69    arr: &PrimitiveArray<T>,
70    indices: I,
71    ddof: u8,
72) -> Option<f64>
73where
74    T: NativeType + ToPrimitive,
75    I: IntoIterator<Item = usize>,
76{
77    debug_assert!(arr.null_count() > 0);
78    let array_values = arr.values().as_slice();
79    let validity = arr.validity().unwrap();
80
81    let iter = unsafe {
82        indices.into_iter().flat_map(|idx| {
83            if validity.get_bit_unchecked(idx) {
84                let value = *array_values.get_unchecked(idx);
85                value.to_f64()
86            } else {
87                None
88            }
89        })
90    };
91    online_variance(iter, ddof)
92}