statrs/statistics/
iter_statistics.rs

1use crate::error::StatsError;
2use crate::statistics::*;
3use std::borrow::Borrow;
4use std::f64;
5
6impl<T> Statistics<f64> for T
7where
8    T: IntoIterator,
9    T::Item: Borrow<f64>,
10{
11    fn min(self) -> f64 {
12        let mut iter = self.into_iter();
13        match iter.next() {
14            None => f64::NAN,
15            Some(x) => iter.map(|x| *x.borrow()).fold(*x.borrow(), |acc, x| {
16                if x < acc || x.is_nan() {
17                    x
18                } else {
19                    acc
20                }
21            }),
22        }
23    }
24
25    fn max(self) -> f64 {
26        let mut iter = self.into_iter();
27        match iter.next() {
28            None => f64::NAN,
29            Some(x) => iter.map(|x| *x.borrow()).fold(*x.borrow(), |acc, x| {
30                if x > acc || x.is_nan() {
31                    x
32                } else {
33                    acc
34                }
35            }),
36        }
37    }
38
39    fn abs_min(self) -> f64 {
40        let mut iter = self.into_iter();
41        match iter.next() {
42            None => f64::NAN,
43            Some(init) => iter
44                .map(|x| x.borrow().abs())
45                .fold(init.borrow().abs(), |acc, x| {
46                    if x < acc || x.is_nan() {
47                        x
48                    } else {
49                        acc
50                    }
51                }),
52        }
53    }
54
55    fn abs_max(self) -> f64 {
56        let mut iter = self.into_iter();
57        match iter.next() {
58            None => f64::NAN,
59            Some(init) => iter
60                .map(|x| x.borrow().abs())
61                .fold(init.borrow().abs(), |acc, x| {
62                    if x > acc || x.is_nan() {
63                        x
64                    } else {
65                        acc
66                    }
67                }),
68        }
69    }
70
71    fn mean(self) -> f64 {
72        let mut i = 0.0;
73        let mut mean = 0.0;
74        for x in self {
75            i += 1.0;
76            mean += (x.borrow() - mean) / i;
77        }
78        if i > 0.0 {
79            mean
80        } else {
81            f64::NAN
82        }
83    }
84
85    fn geometric_mean(self) -> f64 {
86        let mut i = 0.0;
87        let mut sum = 0.0;
88        for x in self {
89            i += 1.0;
90            sum += x.borrow().ln();
91        }
92        if i > 0.0 {
93            (sum / i).exp()
94        } else {
95            f64::NAN
96        }
97    }
98
99    fn harmonic_mean(self) -> f64 {
100        let mut i = 0.0;
101        let mut sum = 0.0;
102        for x in self {
103            i += 1.0;
104
105            let borrow = *x.borrow();
106            if borrow < 0f64 {
107                return f64::NAN;
108            }
109            sum += 1.0 / borrow;
110        }
111        if i > 0.0 {
112            i / sum
113        } else {
114            f64::NAN
115        }
116    }
117
118    fn variance(self) -> f64 {
119        let mut iter = self.into_iter();
120        let mut sum = match iter.next() {
121            None => f64::NAN,
122            Some(x) => *x.borrow(),
123        };
124        let mut i = 1.0;
125        let mut variance = 0.0;
126
127        for x in iter {
128            i += 1.0;
129            let borrow = *x.borrow();
130            sum += borrow;
131            let diff = i * borrow - sum;
132            variance += diff * diff / (i * (i - 1.0))
133        }
134        if i > 1.0 {
135            variance / (i - 1.0)
136        } else {
137            f64::NAN
138        }
139    }
140
141    fn std_dev(self) -> f64 {
142        self.variance().sqrt()
143    }
144
145    fn population_variance(self) -> f64 {
146        let mut iter = self.into_iter();
147        let mut sum = match iter.next() {
148            None => return f64::NAN,
149            Some(x) => *x.borrow(),
150        };
151        let mut i = 1.0;
152        let mut variance = 0.0;
153
154        for x in iter {
155            i += 1.0;
156            let borrow = *x.borrow();
157            sum += borrow;
158            let diff = i * borrow - sum;
159            variance += diff * diff / (i * (i - 1.0));
160        }
161        variance / i
162    }
163
164    fn population_std_dev(self) -> f64 {
165        self.population_variance().sqrt()
166    }
167
168    fn covariance(self, other: Self) -> f64 {
169        let mut n = 0.0;
170        let mut mean1 = 0.0;
171        let mut mean2 = 0.0;
172        let mut comoment = 0.0;
173
174        let mut iter = other.into_iter();
175        for x in self {
176            let borrow = *x.borrow();
177            let borrow2 = match iter.next() {
178                None => panic!("{}", StatsError::ContainersMustBeSameLength),
179                Some(x) => *x.borrow(),
180            };
181            let old_mean2 = mean2;
182            n += 1.0;
183            mean1 += (borrow - mean1) / n;
184            mean2 += (borrow2 - mean2) / n;
185            comoment += (borrow - mean1) * (borrow2 - old_mean2);
186        }
187        if iter.next().is_some() {
188            panic!("{}", StatsError::ContainersMustBeSameLength);
189        }
190
191        if n > 1.0 {
192            comoment / (n - 1.0)
193        } else {
194            f64::NAN
195        }
196    }
197
198    fn population_covariance(self, other: Self) -> f64 {
199        let mut n = 0.0;
200        let mut mean1 = 0.0;
201        let mut mean2 = 0.0;
202        let mut comoment = 0.0;
203
204        let mut iter = other.into_iter();
205        for x in self {
206            let borrow = *x.borrow();
207            let borrow2 = match iter.next() {
208                None => panic!("{}", StatsError::ContainersMustBeSameLength),
209                Some(x) => *x.borrow(),
210            };
211            let old_mean2 = mean2;
212            n += 1.0;
213            mean1 += (borrow - mean1) / n;
214            mean2 += (borrow2 - mean2) / n;
215            comoment += (borrow - mean1) * (borrow2 - old_mean2);
216        }
217        if iter.next().is_some() {
218            panic!("{}", StatsError::ContainersMustBeSameLength)
219        }
220        if n > 0.0 {
221            comoment / n
222        } else {
223            f64::NAN
224        }
225    }
226
227    fn quadratic_mean(self) -> f64 {
228        let mut i = 0.0;
229        let mut mean = 0.0;
230        for x in self {
231            let borrow = *x.borrow();
232            i += 1.0;
233            mean += (borrow * borrow - mean) / i;
234        }
235        if i > 0.0 {
236            mean.sqrt()
237        } else {
238            f64::NAN
239        }
240    }
241}
242
243#[rustfmt::skip]
244#[cfg(test)]
245mod tests {
246    use std::f64::consts;
247    use rand::rngs::StdRng;
248    use rand::{SeedableRng};
249    use rand::distributions::Distribution;
250    use crate::distribution::Normal;
251    use crate::statistics::Statistics;
252    use crate::generate::{InfinitePeriodic, InfiniteSinusoidal};
253    use crate::testing;
254
255    #[test]
256    fn test_mean() {
257        let mut data = testing::load_data("nist/lottery.txt");
258        assert_almost_eq!((&data).mean(), 518.958715596330, 1e-12);
259
260        data = testing::load_data("nist/lew.txt");
261        assert_almost_eq!((&data).mean(), -177.435000000000, 1e-13);
262
263        data = testing::load_data("nist/mavro.txt");
264        assert_almost_eq!((&data).mean(), 2.00185600000000, 1e-15);
265
266        data = testing::load_data("nist/michaelso.txt");
267        assert_almost_eq!((&data).mean(), 299.852400000000, 1e-13);
268
269        data = testing::load_data("nist/numacc1.txt");
270        assert_eq!((&data).mean(), 10000002.0);
271
272        data = testing::load_data("nist/numacc2.txt");
273        assert_almost_eq!((&data).mean(), 1.2, 1e-15);
274
275        data = testing::load_data("nist/numacc3.txt");
276        assert_eq!((&data).mean(), 1000000.2);
277
278        data = testing::load_data("nist/numacc4.txt");
279        assert_almost_eq!((&data).mean(), 10000000.2, 1e-8);
280    }
281
282    #[test]
283    fn test_std_dev() {
284        let mut data = testing::load_data("nist/lottery.txt");
285        assert_almost_eq!((&data).std_dev(), 291.699727470969, 1e-13);
286
287        data = testing::load_data("nist/lew.txt");
288        assert_almost_eq!((&data).std_dev(), 277.332168044316, 1e-12);
289
290        data = testing::load_data("nist/mavro.txt");
291        assert_almost_eq!((&data).std_dev(), 0.000429123454003053, 1e-15);
292
293        data = testing::load_data("nist/michaelso.txt");
294        assert_almost_eq!((&data).std_dev(), 0.0790105478190518, 1e-13);
295
296        data = testing::load_data("nist/numacc1.txt");
297        assert_eq!((&data).std_dev(), 1.0);
298
299        data = testing::load_data("nist/numacc2.txt");
300        assert_almost_eq!((&data).std_dev(), 0.1, 1e-16);
301
302        data = testing::load_data("nist/numacc3.txt");
303        assert_almost_eq!((&data).std_dev(), 0.1, 1e-10);
304
305        data = testing::load_data("nist/numacc4.txt");
306        assert_almost_eq!((&data).std_dev(), 0.1, 1e-9);
307    }
308
309    #[test]
310    fn test_min_max_short() {
311        let data = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0];
312        assert_eq!(data.min(), -3.0);
313        assert_eq!(data.max(), 10.0);
314    }
315
316    #[test]
317    fn test_mean_variance_stability() {
318        let seed = [
319            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
320            19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31
321        ];
322        let mut rng: StdRng = SeedableRng::from_seed(seed);
323        let normal = Normal::new(1e9, 2.0).unwrap();
324        let samples = (0..10000).map(|_| normal.sample::<StdRng>(&mut rng)).collect::<Vec<f64>>();
325        assert_almost_eq!((&samples).mean(), 1e9, 10.0);
326        assert_almost_eq!((&samples).variance(), 4.0, 0.1);
327        assert_almost_eq!((&samples).std_dev(), 2.0, 0.01);
328        assert_almost_eq!((&samples).quadratic_mean(), 1e9, 10.0);
329    }
330
331    #[test]
332    fn test_covariance_consistent_with_variance() {
333        let mut data = testing::load_data("nist/lottery.txt");
334        assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);
335
336        data = testing::load_data("nist/lew.txt");
337        assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);
338
339        data = testing::load_data("nist/mavro.txt");
340        assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);
341
342        data = testing::load_data("nist/michaelso.txt");
343        assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);
344
345        data = testing::load_data("nist/numacc1.txt");
346        assert_almost_eq!((&data).variance(), (&data).covariance(&data), 1e-10);
347    }
348
349    #[test]
350    fn test_pop_covar_consistent_with_pop_var() {
351        let mut data = testing::load_data("nist/lottery.txt");
352        assert_almost_eq!((&data).population_variance(), (&data).population_covariance(&data), 1e-10);
353
354        data = testing::load_data("nist/lew.txt");
355        assert_almost_eq!((&data).population_variance(), (&data).population_covariance(&data), 1e-10);
356
357        data = testing::load_data("nist/mavro.txt");
358        assert_almost_eq!((&data).population_variance(), (&data).population_covariance(&data), 1e-10);
359
360        data = testing::load_data("nist/michaelso.txt");
361        assert_almost_eq!((&data).population_variance(), (&data).population_covariance(&data), 1e-10);
362
363        data = testing::load_data("nist/numacc1.txt");
364        assert_almost_eq!((&data).population_variance(), (&data).population_covariance(&data), 1e-10);
365    }
366
367    #[test]
368    fn test_covariance_is_symmetric() {
369        let data_a = &testing::load_data("nist/lottery.txt")[0..200];
370        let data_b = &testing::load_data("nist/lew.txt")[0..200];
371        assert_almost_eq!(data_a.covariance(data_b), data_b.covariance(data_a), 1e-10);
372        assert_almost_eq!(data_a.population_covariance(data_b), data_b.population_covariance(data_a), 1e-11);
373    }
374
375    #[test]
376    fn test_empty_data_returns_nan() {
377        let data = [0.0; 0];
378        assert!(data.min().is_nan());
379        assert!(data.max().is_nan());
380        assert!(data.mean().is_nan());
381        assert!(data.quadratic_mean().is_nan());
382        assert!(data.variance().is_nan());
383        assert!(data.population_variance().is_nan());
384    }
385
386    // TODO: test github issue 137 (Math.NET)
387
388    #[test]
389    fn test_large_samples() {
390        let shorter = InfinitePeriodic::default(4.0, 1.0).take(4*4096).collect::<Vec<f64>>();
391        let longer = InfinitePeriodic::default(4.0, 1.0).take(4*32768).collect::<Vec<f64>>();
392        assert_almost_eq!((&shorter).mean(), 0.375, 1e-14);
393        assert_almost_eq!((&longer).mean(), 0.375, 1e-14);
394        assert_almost_eq!((&shorter).quadratic_mean(), (0.21875f64).sqrt(), 1e-14);
395        assert_almost_eq!((&longer).quadratic_mean(), (0.21875f64).sqrt(), 1e-14);
396    }
397
398    #[test]
399    fn test_quadratic_mean_of_sinusoidal() {
400        let data = InfiniteSinusoidal::default(64.0, 16.0, 2.0).take(128).collect::<Vec<f64>>();
401        assert_almost_eq!((&data).quadratic_mean(), 2.0 / consts::SQRT_2, 1e-15);
402    }
403}