statrs/statistics/
slice_statistics.rs

1use crate::statistics::*;
2use core::ops::{Index, IndexMut};
3use rand::prelude::SliceRandom;
4
5#[derive(Clone, Debug, PartialEq, Eq)]
6pub struct Data<D>(D);
7
8impl<D: AsRef<[f64]>> Index<usize> for Data<D> {
9    type Output = f64;
10    fn index(&self, i: usize) -> &f64 {
11        &self.0.as_ref()[i]
12    }
13}
14
15impl<D: AsMut<[f64]> + AsRef<[f64]>> IndexMut<usize> for Data<D> {
16    fn index_mut(&mut self, i: usize) -> &mut f64 {
17        &mut self.0.as_mut()[i]
18    }
19}
20
21impl<D: AsMut<[f64]> + AsRef<[f64]>> Data<D> {
22    pub fn new(data: D) -> Self {
23        Data(data)
24    }
25    pub fn swap(&mut self, i: usize, j: usize) {
26        self.0.as_mut().swap(i, j)
27    }
28    pub fn len(&self) -> usize {
29        self.0.as_ref().len()
30    }
31    pub fn is_empty(&self) -> bool {
32        self.0.as_ref().len() == 0
33    }
34    pub fn iter(&self) -> core::slice::Iter<'_, f64> {
35        self.0.as_ref().iter()
36    }
37    // Selection algorithm from Numerical Recipes
38    // See: https://en.wikipedia.org/wiki/Selection_algorithm
39    fn select_inplace(&mut self, rank: usize) -> f64 {
40        if rank == 0 {
41            return self.min();
42        }
43        if rank > self.len() - 1 {
44            return self.max();
45        }
46
47        let mut low = 0;
48        let mut high = self.len() - 1;
49        loop {
50            if high <= low + 1 {
51                if high == low + 1 && self[high] < self[low] {
52                    self.swap(low, high)
53                }
54                return self[rank];
55            }
56
57            let middle = (low + high) / 2;
58            self.swap(middle, low + 1);
59
60            if self[low] > self[high] {
61                self.swap(low, high);
62            }
63            if self[low + 1] > self[high] {
64                self.swap(low + 1, high);
65            }
66            if self[low] > self[low + 1] {
67                self.swap(low, low + 1);
68            }
69
70            let mut begin = low + 1;
71            let mut end = high;
72            let pivot = self[begin];
73            loop {
74                loop {
75                    begin += 1;
76                    if self[begin] >= pivot {
77                        break;
78                    }
79                }
80                loop {
81                    end -= 1;
82                    if self[end] <= pivot {
83                        break;
84                    }
85                }
86                if end < begin {
87                    break;
88                }
89                self.swap(begin, end);
90            }
91
92            self[low + 1] = self[end];
93            self[end] = pivot;
94
95            if end >= rank {
96                high = end - 1;
97            }
98            if end <= rank {
99                low = begin;
100            }
101        }
102    }
103}
104
105impl<D: AsRef<[f64]>> ::rand::distributions::Distribution<f64> for Data<D> {
106    fn sample<R: ::rand::Rng + ?Sized>(&self, rng: &mut R) -> f64 {
107        *self.0.as_ref().choose(rng).unwrap()
108    }
109}
110
111impl<D: AsMut<[f64]> + AsRef<[f64]>> OrderStatistics<f64> for Data<D> {
112    fn order_statistic(&mut self, order: usize) -> f64 {
113        let n = self.len();
114        match order {
115            1 => self.min(),
116            _ if order == n => self.max(),
117            _ if order < 1 || order > n => f64::NAN,
118            _ => self.select_inplace(order - 1),
119        }
120    }
121
122    fn median(&mut self) -> f64 {
123        let k = self.len() / 2;
124        if self.len() % 2 != 0 {
125            self.select_inplace(k)
126        } else {
127            (self.select_inplace(k.saturating_sub(1)) + self.select_inplace(k)) / 2.0
128        }
129    }
130
131    fn quantile(&mut self, tau: f64) -> f64 {
132        if !(0.0..=1.0).contains(&tau) || self.is_empty() {
133            return f64::NAN;
134        }
135
136        let h = (self.len() as f64 + 1.0 / 3.0) * tau + 1.0 / 3.0;
137        let hf = h as i64;
138
139        if hf <= 0 || tau == 0.0 {
140            return self.min();
141        }
142        if hf >= self.len() as i64 || ulps_eq!(tau, 1.0) {
143            return self.max();
144        }
145
146        let a = self.select_inplace((hf as usize).saturating_sub(1));
147        let b = self.select_inplace(hf as usize);
148        a + (h - hf as f64) * (b - a)
149    }
150
151    fn percentile(&mut self, p: usize) -> f64 {
152        self.quantile(p as f64 / 100.0)
153    }
154
155    fn lower_quartile(&mut self) -> f64 {
156        self.quantile(0.25)
157    }
158
159    fn upper_quartile(&mut self) -> f64 {
160        self.quantile(0.75)
161    }
162
163    fn interquartile_range(&mut self) -> f64 {
164        self.upper_quartile() - self.lower_quartile()
165    }
166
167    fn ranks(&mut self, tie_breaker: RankTieBreaker) -> Vec<f64> {
168        let n = self.len();
169        let mut ranks: Vec<f64> = vec![0.0; n];
170        let mut enumerated: Vec<_> = self.iter().enumerate().collect();
171        enumerated.sort_by(|(_, el_a), (_, el_b)| el_a.partial_cmp(el_b).unwrap());
172        match tie_breaker {
173            RankTieBreaker::First => {
174                for (i, idx) in enumerated.into_iter().map(|(idx, _)| idx).enumerate() {
175                    ranks[idx] = (i + 1) as f64
176                }
177                ranks
178            }
179            _ => {
180                let mut prev = 0;
181                let mut prev_idx = 0;
182                let mut prev_elt = 0.0;
183                for (i, (idx, elt)) in enumerated.iter().cloned().enumerate() {
184                    if i == 0 {
185                        prev_idx = idx;
186                        prev_elt = *elt;
187                    }
188                    if (*elt - prev_elt).abs() <= 0.0 {
189                        continue;
190                    }
191                    if i == prev + 1 {
192                        ranks[prev_idx] = i as f64;
193                    } else {
194                        handle_rank_ties(&mut ranks, &enumerated, prev, i, tie_breaker);
195                    }
196                    prev = i;
197                    prev_idx = idx;
198                    prev_elt = *elt;
199                }
200
201                handle_rank_ties(&mut ranks, &enumerated, prev, n, tie_breaker);
202                ranks
203            }
204        }
205    }
206}
207
208impl<D: AsMut<[f64]> + AsRef<[f64]>> Min<f64> for Data<D> {
209    /// Returns the minimum value in the data
210    ///
211    /// # Remarks
212    ///
213    /// Returns `f64::NAN` if data is empty or an entry is `f64::NAN`
214    ///
215    /// # Examples
216    ///
217    /// ```
218    /// use statrs::statistics::Min;
219    /// use statrs::statistics::Data;
220    ///
221    /// let x = [];
222    /// let x = Data::new(x);
223    /// assert!(x.min().is_nan());
224    ///
225    /// let y = [0.0, f64::NAN, 3.0, -2.0];
226    /// let y = Data::new(y);
227    /// assert!(y.min().is_nan());
228    ///
229    /// let z = [0.0, 3.0, -2.0];
230    /// let z = Data::new(z);
231    /// assert_eq!(z.min(), -2.0);
232    /// ```
233    fn min(&self) -> f64 {
234        Statistics::min(self.iter())
235    }
236}
237
238impl<D: AsMut<[f64]> + AsRef<[f64]>> Max<f64> for Data<D> {
239    /// Returns the maximum value in the data
240    ///
241    /// # Remarks
242    ///
243    /// Returns `f64::NAN` if data is empty or an entry is `f64::NAN`
244    ///
245    /// # Examples
246    ///
247    /// ```
248    /// use statrs::statistics::Max;
249    /// use statrs::statistics::Data;
250    ///
251    /// let x = [];
252    /// let x = Data::new(x);
253    /// assert!(x.max().is_nan());
254    ///
255    /// let y = [0.0, f64::NAN, 3.0, -2.0];
256    /// let y = Data::new(y);
257    /// assert!(y.max().is_nan());
258    ///
259    /// let z = [0.0, 3.0, -2.0];
260    /// let z = Data::new(z);
261    /// assert_eq!(z.max(), 3.0);
262    /// ```
263    fn max(&self) -> f64 {
264        Statistics::max(self.iter())
265    }
266}
267
268impl<D: AsMut<[f64]> + AsRef<[f64]>> Distribution<f64> for Data<D> {
269    /// Evaluates the sample mean, an estimate of the population
270    /// mean.
271    ///
272    /// # Remarks
273    ///
274    /// Returns `f64::NAN` if data is empty or an entry is `f64::NAN`
275    ///
276    /// # Examples
277    ///
278    /// ```
279    /// #[macro_use]
280    /// extern crate statrs;
281    ///
282    /// use statrs::statistics::Distribution;
283    /// use statrs::statistics::Data;
284    ///
285    /// # fn main() {
286    /// let x = [];
287    /// let x = Data::new(x);
288    /// assert!(x.mean().unwrap().is_nan());
289    ///
290    /// let y = [0.0, f64::NAN, 3.0, -2.0];
291    /// let y = Data::new(y);
292    /// assert!(y.mean().unwrap().is_nan());
293    ///
294    /// let z = [0.0, 3.0, -2.0];
295    /// let z = Data::new(z);
296    /// assert_almost_eq!(z.mean().unwrap(), 1.0 / 3.0, 1e-15);
297    /// # }
298    /// ```
299    fn mean(&self) -> Option<f64> {
300        Some(Statistics::mean(self.iter()))
301    }
302    /// Estimates the unbiased population variance from the provided samples
303    ///
304    /// # Remarks
305    ///
306    /// On a dataset of size `N`, `N-1` is used as a normalizer (Bessel's
307    /// correction).
308    ///
309    /// Returns `f64::NAN` if data has less than two entries or if any entry is
310    /// `f64::NAN`
311    ///
312    /// # Examples
313    ///
314    /// ```
315    /// use statrs::statistics::Distribution;
316    /// use statrs::statistics::Data;
317    ///
318    /// let x = [];
319    /// let x = Data::new(x);
320    /// assert!(x.variance().unwrap().is_nan());
321    ///
322    /// let y = [0.0, f64::NAN, 3.0, -2.0];
323    /// let y = Data::new(y);
324    /// assert!(y.variance().unwrap().is_nan());
325    ///
326    /// let z = [0.0, 3.0, -2.0];
327    /// let z = Data::new(z);
328    /// assert_eq!(z.variance().unwrap(), 19.0 / 3.0);
329    /// ```
330    fn variance(&self) -> Option<f64> {
331        Some(Statistics::variance(self.iter()))
332    }
333}
334
335impl<D: AsMut<[f64]> + AsRef<[f64]> + Clone> Median<f64> for Data<D> {
336    /// Returns the median value from the data
337    ///
338    /// # Remarks
339    ///
340    /// Returns `f64::NAN` if data is empty
341    ///
342    /// # Examples
343    ///
344    /// ```
345    /// use statrs::statistics::Median;
346    /// use statrs::statistics::Data;
347    ///
348    /// let x = [];
349    /// let x = Data::new(x);
350    /// assert!(x.median().is_nan());
351    ///
352    /// let y = [0.0, 3.0, -2.0];
353    /// let y = Data::new(y);
354    /// assert_eq!(y.median(), 0.0);
355    fn median(&self) -> f64 {
356        let mut v = self.clone();
357        OrderStatistics::median(&mut v)
358    }
359}
360
361fn handle_rank_ties(
362    ranks: &mut [f64],
363    index: &[(usize, &f64)],
364    a: usize,
365    b: usize,
366    tie_breaker: RankTieBreaker,
367) {
368    let rank = match tie_breaker {
369        // equivalent to (b + a - 1) as f64 / 2.0 + 1.0 but less overflow issues
370        RankTieBreaker::Average => b as f64 / 2.0 + a as f64 / 2.0 + 0.5,
371        RankTieBreaker::Min => (a + 1) as f64,
372        RankTieBreaker::Max => b as f64,
373        RankTieBreaker::First => unreachable!(),
374    };
375    for i in &index[a..b] {
376        ranks[i.0] = rank
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383    use crate::statistics::*;
384
385    #[test]
386    fn test_order_statistic_short() {
387        let data = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0, 1.0, 6.0];
388        let mut data = Data::new(data);
389        assert!(data.order_statistic(0).is_nan());
390        assert_eq!(data.order_statistic(1), -3.0);
391        assert_eq!(data.order_statistic(2), -1.0);
392        assert_eq!(data.order_statistic(3), -0.5);
393        assert_eq!(data.order_statistic(7), 5.0);
394        assert_eq!(data.order_statistic(8), 6.0);
395        assert_eq!(data.order_statistic(9), 10.0);
396        assert!(data.order_statistic(10).is_nan());
397    }
398
399    #[test]
400    fn test_quantile_short() {
401        let data = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0, 0.2, 1.0, 6.0];
402        let mut data = Data::new(data);
403        assert_eq!(data.quantile(0.0), -3.0);
404        assert_eq!(data.quantile(1.0), 10.0);
405        assert_almost_eq!(data.quantile(0.5), 3.0 / 5.0, 1e-15);
406        assert_almost_eq!(data.quantile(0.2), -4.0 / 5.0, 1e-15);
407        assert_eq!(data.quantile(0.7), 137.0 / 30.0);
408        assert_eq!(data.quantile(0.01), -3.0);
409        assert_eq!(data.quantile(0.99), 10.0);
410        assert_almost_eq!(data.quantile(0.52), 287.0 / 375.0, 1e-15);
411        assert_almost_eq!(data.quantile(0.325), -37.0 / 240.0, 1e-15);
412    }
413
414    #[test]
415    fn test_ranks() {
416        let sorted_distinct = [1.0, 2.0, 4.0, 7.0, 8.0, 9.0, 10.0, 12.0];
417        let mut sorted_distinct = Data::new(sorted_distinct);
418        let sorted_ties = [1.0, 2.0, 2.0, 7.0, 9.0, 9.0, 10.0, 12.0];
419        let mut sorted_ties = Data::new(sorted_ties);
420        assert_eq!(
421            sorted_distinct.ranks(RankTieBreaker::Average),
422            [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
423        );
424        assert_eq!(
425            sorted_ties.ranks(RankTieBreaker::Average),
426            [1.0, 2.5, 2.5, 4.0, 5.5, 5.5, 7.0, 8.0]
427        );
428        assert_eq!(
429            sorted_distinct.ranks(RankTieBreaker::Min),
430            [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
431        );
432        assert_eq!(
433            sorted_ties.ranks(RankTieBreaker::Min),
434            [1.0, 2.0, 2.0, 4.0, 5.0, 5.0, 7.0, 8.0]
435        );
436        assert_eq!(
437            sorted_distinct.ranks(RankTieBreaker::Max),
438            [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
439        );
440        assert_eq!(
441            sorted_ties.ranks(RankTieBreaker::Max),
442            [1.0, 3.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0]
443        );
444        assert_eq!(
445            sorted_distinct.ranks(RankTieBreaker::First),
446            [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
447        );
448        assert_eq!(
449            sorted_ties.ranks(RankTieBreaker::First),
450            [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
451        );
452
453        let distinct = [1.0, 8.0, 12.0, 7.0, 2.0, 9.0, 10.0, 4.0];
454        let distinct = Data::new(distinct);
455        let ties = [1.0, 9.0, 12.0, 7.0, 2.0, 9.0, 10.0, 2.0];
456        let ties = Data::new(ties);
457        assert_eq!(
458            distinct.clone().ranks(RankTieBreaker::Average),
459            [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
460        );
461        assert_eq!(
462            ties.clone().ranks(RankTieBreaker::Average),
463            [1.0, 5.5, 8.0, 4.0, 2.5, 5.5, 7.0, 2.5]
464        );
465        assert_eq!(
466            distinct.clone().ranks(RankTieBreaker::Min),
467            [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
468        );
469        assert_eq!(
470            ties.clone().ranks(RankTieBreaker::Min),
471            [1.0, 5.0, 8.0, 4.0, 2.0, 5.0, 7.0, 2.0]
472        );
473        assert_eq!(
474            distinct.clone().ranks(RankTieBreaker::Max),
475            [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
476        );
477        assert_eq!(
478            ties.clone().ranks(RankTieBreaker::Max),
479            [1.0, 6.0, 8.0, 4.0, 3.0, 6.0, 7.0, 3.0]
480        );
481        assert_eq!(
482            distinct.clone().ranks(RankTieBreaker::First),
483            [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
484        );
485        assert_eq!(
486            ties.clone().ranks(RankTieBreaker::First),
487            [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
488        );
489    }
490
491    #[test]
492    fn test_median_short() {
493        let even = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0, 0.2, 1.0, 6.0];
494        let even = Data::new(even);
495        assert_eq!(even.median(), 0.6);
496
497        let odd = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0, 0.2, 1.0];
498        let odd = Data::new(odd);
499        assert_eq!(odd.median(), 0.2);
500    }
501
502    #[test]
503    fn test_median_long_constant_seq() {
504        let even = vec![2.0; 100000];
505        let even = Data::new(even);
506        assert_eq!(2.0, even.median());
507
508        let odd = vec![2.0; 100001];
509        let odd = Data::new(odd);
510        assert_eq!(2.0, odd.median());
511    }
512
513    // TODO: test codeplex issue 5667 (Math.NET)
514
515    #[test]
516    fn test_median_robust_on_infinities() {
517        let data3 = [2.0, f64::NEG_INFINITY, f64::INFINITY];
518        let data3 = Data::new(data3);
519        assert_eq!(data3.median(), 2.0);
520        assert_eq!(data3.median(), 2.0);
521
522        let data3 = [f64::NEG_INFINITY, 2.0, f64::INFINITY];
523        let data3 = Data::new(data3);
524        assert_eq!(data3.median(), 2.0);
525        assert_eq!(data3.median(), 2.0);
526
527        let data3 = [f64::NEG_INFINITY, f64::INFINITY, 2.0];
528        let data3 = Data::new(data3);
529        assert_eq!(data3.median(), 2.0);
530        assert_eq!(data3.median(), 2.0);
531
532        let data4 = [f64::NEG_INFINITY, 2.0, 3.0, f64::INFINITY];
533        let data4 = Data::new(data4);
534        assert_eq!(data4.median(), 2.5);
535        assert_eq!(data4.median(), 2.5);
536    }
537    #[test]
538    fn test_foo() {
539        let arr = [0.0, 1.0, 2.0, 3.0];
540        let mut arr = Data::new(arr);
541        arr.order_statistic(2);
542    }
543}