1use crate::statistics::*;
2use core::ops::{Index, IndexMut};
3use rand::prelude::SliceRandom;
4
5#[derive(Clone, Debug, PartialEq, Eq)]
6pub struct Data<D>(D);
7
8impl<D: AsRef<[f64]>> Index<usize> for Data<D> {
9 type Output = f64;
10 fn index(&self, i: usize) -> &f64 {
11 &self.0.as_ref()[i]
12 }
13}
14
15impl<D: AsMut<[f64]> + AsRef<[f64]>> IndexMut<usize> for Data<D> {
16 fn index_mut(&mut self, i: usize) -> &mut f64 {
17 &mut self.0.as_mut()[i]
18 }
19}
20
21impl<D: AsMut<[f64]> + AsRef<[f64]>> Data<D> {
22 pub fn new(data: D) -> Self {
23 Data(data)
24 }
25 pub fn swap(&mut self, i: usize, j: usize) {
26 self.0.as_mut().swap(i, j)
27 }
28 pub fn len(&self) -> usize {
29 self.0.as_ref().len()
30 }
31 pub fn is_empty(&self) -> bool {
32 self.0.as_ref().len() == 0
33 }
34 pub fn iter(&self) -> core::slice::Iter<'_, f64> {
35 self.0.as_ref().iter()
36 }
37 fn select_inplace(&mut self, rank: usize) -> f64 {
40 if rank == 0 {
41 return self.min();
42 }
43 if rank > self.len() - 1 {
44 return self.max();
45 }
46
47 let mut low = 0;
48 let mut high = self.len() - 1;
49 loop {
50 if high <= low + 1 {
51 if high == low + 1 && self[high] < self[low] {
52 self.swap(low, high)
53 }
54 return self[rank];
55 }
56
57 let middle = (low + high) / 2;
58 self.swap(middle, low + 1);
59
60 if self[low] > self[high] {
61 self.swap(low, high);
62 }
63 if self[low + 1] > self[high] {
64 self.swap(low + 1, high);
65 }
66 if self[low] > self[low + 1] {
67 self.swap(low, low + 1);
68 }
69
70 let mut begin = low + 1;
71 let mut end = high;
72 let pivot = self[begin];
73 loop {
74 loop {
75 begin += 1;
76 if self[begin] >= pivot {
77 break;
78 }
79 }
80 loop {
81 end -= 1;
82 if self[end] <= pivot {
83 break;
84 }
85 }
86 if end < begin {
87 break;
88 }
89 self.swap(begin, end);
90 }
91
92 self[low + 1] = self[end];
93 self[end] = pivot;
94
95 if end >= rank {
96 high = end - 1;
97 }
98 if end <= rank {
99 low = begin;
100 }
101 }
102 }
103}
104
105impl<D: AsRef<[f64]>> ::rand::distributions::Distribution<f64> for Data<D> {
106 fn sample<R: ::rand::Rng + ?Sized>(&self, rng: &mut R) -> f64 {
107 *self.0.as_ref().choose(rng).unwrap()
108 }
109}
110
111impl<D: AsMut<[f64]> + AsRef<[f64]>> OrderStatistics<f64> for Data<D> {
112 fn order_statistic(&mut self, order: usize) -> f64 {
113 let n = self.len();
114 match order {
115 1 => self.min(),
116 _ if order == n => self.max(),
117 _ if order < 1 || order > n => f64::NAN,
118 _ => self.select_inplace(order - 1),
119 }
120 }
121
122 fn median(&mut self) -> f64 {
123 let k = self.len() / 2;
124 if self.len() % 2 != 0 {
125 self.select_inplace(k)
126 } else {
127 (self.select_inplace(k.saturating_sub(1)) + self.select_inplace(k)) / 2.0
128 }
129 }
130
131 fn quantile(&mut self, tau: f64) -> f64 {
132 if !(0.0..=1.0).contains(&tau) || self.is_empty() {
133 return f64::NAN;
134 }
135
136 let h = (self.len() as f64 + 1.0 / 3.0) * tau + 1.0 / 3.0;
137 let hf = h as i64;
138
139 if hf <= 0 || tau == 0.0 {
140 return self.min();
141 }
142 if hf >= self.len() as i64 || ulps_eq!(tau, 1.0) {
143 return self.max();
144 }
145
146 let a = self.select_inplace((hf as usize).saturating_sub(1));
147 let b = self.select_inplace(hf as usize);
148 a + (h - hf as f64) * (b - a)
149 }
150
151 fn percentile(&mut self, p: usize) -> f64 {
152 self.quantile(p as f64 / 100.0)
153 }
154
155 fn lower_quartile(&mut self) -> f64 {
156 self.quantile(0.25)
157 }
158
159 fn upper_quartile(&mut self) -> f64 {
160 self.quantile(0.75)
161 }
162
163 fn interquartile_range(&mut self) -> f64 {
164 self.upper_quartile() - self.lower_quartile()
165 }
166
167 fn ranks(&mut self, tie_breaker: RankTieBreaker) -> Vec<f64> {
168 let n = self.len();
169 let mut ranks: Vec<f64> = vec![0.0; n];
170 let mut enumerated: Vec<_> = self.iter().enumerate().collect();
171 enumerated.sort_by(|(_, el_a), (_, el_b)| el_a.partial_cmp(el_b).unwrap());
172 match tie_breaker {
173 RankTieBreaker::First => {
174 for (i, idx) in enumerated.into_iter().map(|(idx, _)| idx).enumerate() {
175 ranks[idx] = (i + 1) as f64
176 }
177 ranks
178 }
179 _ => {
180 let mut prev = 0;
181 let mut prev_idx = 0;
182 let mut prev_elt = 0.0;
183 for (i, (idx, elt)) in enumerated.iter().cloned().enumerate() {
184 if i == 0 {
185 prev_idx = idx;
186 prev_elt = *elt;
187 }
188 if (*elt - prev_elt).abs() <= 0.0 {
189 continue;
190 }
191 if i == prev + 1 {
192 ranks[prev_idx] = i as f64;
193 } else {
194 handle_rank_ties(&mut ranks, &enumerated, prev, i, tie_breaker);
195 }
196 prev = i;
197 prev_idx = idx;
198 prev_elt = *elt;
199 }
200
201 handle_rank_ties(&mut ranks, &enumerated, prev, n, tie_breaker);
202 ranks
203 }
204 }
205 }
206}
207
208impl<D: AsMut<[f64]> + AsRef<[f64]>> Min<f64> for Data<D> {
209 fn min(&self) -> f64 {
234 Statistics::min(self.iter())
235 }
236}
237
238impl<D: AsMut<[f64]> + AsRef<[f64]>> Max<f64> for Data<D> {
239 fn max(&self) -> f64 {
264 Statistics::max(self.iter())
265 }
266}
267
268impl<D: AsMut<[f64]> + AsRef<[f64]>> Distribution<f64> for Data<D> {
269 fn mean(&self) -> Option<f64> {
300 Some(Statistics::mean(self.iter()))
301 }
302 fn variance(&self) -> Option<f64> {
331 Some(Statistics::variance(self.iter()))
332 }
333}
334
335impl<D: AsMut<[f64]> + AsRef<[f64]> + Clone> Median<f64> for Data<D> {
336 fn median(&self) -> f64 {
356 let mut v = self.clone();
357 OrderStatistics::median(&mut v)
358 }
359}
360
361fn handle_rank_ties(
362 ranks: &mut [f64],
363 index: &[(usize, &f64)],
364 a: usize,
365 b: usize,
366 tie_breaker: RankTieBreaker,
367) {
368 let rank = match tie_breaker {
369 RankTieBreaker::Average => b as f64 / 2.0 + a as f64 / 2.0 + 0.5,
371 RankTieBreaker::Min => (a + 1) as f64,
372 RankTieBreaker::Max => b as f64,
373 RankTieBreaker::First => unreachable!(),
374 };
375 for i in &index[a..b] {
376 ranks[i.0] = rank
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383 use crate::statistics::*;
384
385 #[test]
386 fn test_order_statistic_short() {
387 let data = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0, 1.0, 6.0];
388 let mut data = Data::new(data);
389 assert!(data.order_statistic(0).is_nan());
390 assert_eq!(data.order_statistic(1), -3.0);
391 assert_eq!(data.order_statistic(2), -1.0);
392 assert_eq!(data.order_statistic(3), -0.5);
393 assert_eq!(data.order_statistic(7), 5.0);
394 assert_eq!(data.order_statistic(8), 6.0);
395 assert_eq!(data.order_statistic(9), 10.0);
396 assert!(data.order_statistic(10).is_nan());
397 }
398
399 #[test]
400 fn test_quantile_short() {
401 let data = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0, 0.2, 1.0, 6.0];
402 let mut data = Data::new(data);
403 assert_eq!(data.quantile(0.0), -3.0);
404 assert_eq!(data.quantile(1.0), 10.0);
405 assert_almost_eq!(data.quantile(0.5), 3.0 / 5.0, 1e-15);
406 assert_almost_eq!(data.quantile(0.2), -4.0 / 5.0, 1e-15);
407 assert_eq!(data.quantile(0.7), 137.0 / 30.0);
408 assert_eq!(data.quantile(0.01), -3.0);
409 assert_eq!(data.quantile(0.99), 10.0);
410 assert_almost_eq!(data.quantile(0.52), 287.0 / 375.0, 1e-15);
411 assert_almost_eq!(data.quantile(0.325), -37.0 / 240.0, 1e-15);
412 }
413
414 #[test]
415 fn test_ranks() {
416 let sorted_distinct = [1.0, 2.0, 4.0, 7.0, 8.0, 9.0, 10.0, 12.0];
417 let mut sorted_distinct = Data::new(sorted_distinct);
418 let sorted_ties = [1.0, 2.0, 2.0, 7.0, 9.0, 9.0, 10.0, 12.0];
419 let mut sorted_ties = Data::new(sorted_ties);
420 assert_eq!(
421 sorted_distinct.ranks(RankTieBreaker::Average),
422 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
423 );
424 assert_eq!(
425 sorted_ties.ranks(RankTieBreaker::Average),
426 [1.0, 2.5, 2.5, 4.0, 5.5, 5.5, 7.0, 8.0]
427 );
428 assert_eq!(
429 sorted_distinct.ranks(RankTieBreaker::Min),
430 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
431 );
432 assert_eq!(
433 sorted_ties.ranks(RankTieBreaker::Min),
434 [1.0, 2.0, 2.0, 4.0, 5.0, 5.0, 7.0, 8.0]
435 );
436 assert_eq!(
437 sorted_distinct.ranks(RankTieBreaker::Max),
438 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
439 );
440 assert_eq!(
441 sorted_ties.ranks(RankTieBreaker::Max),
442 [1.0, 3.0, 3.0, 4.0, 6.0, 6.0, 7.0, 8.0]
443 );
444 assert_eq!(
445 sorted_distinct.ranks(RankTieBreaker::First),
446 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
447 );
448 assert_eq!(
449 sorted_ties.ranks(RankTieBreaker::First),
450 [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
451 );
452
453 let distinct = [1.0, 8.0, 12.0, 7.0, 2.0, 9.0, 10.0, 4.0];
454 let distinct = Data::new(distinct);
455 let ties = [1.0, 9.0, 12.0, 7.0, 2.0, 9.0, 10.0, 2.0];
456 let ties = Data::new(ties);
457 assert_eq!(
458 distinct.clone().ranks(RankTieBreaker::Average),
459 [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
460 );
461 assert_eq!(
462 ties.clone().ranks(RankTieBreaker::Average),
463 [1.0, 5.5, 8.0, 4.0, 2.5, 5.5, 7.0, 2.5]
464 );
465 assert_eq!(
466 distinct.clone().ranks(RankTieBreaker::Min),
467 [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
468 );
469 assert_eq!(
470 ties.clone().ranks(RankTieBreaker::Min),
471 [1.0, 5.0, 8.0, 4.0, 2.0, 5.0, 7.0, 2.0]
472 );
473 assert_eq!(
474 distinct.clone().ranks(RankTieBreaker::Max),
475 [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
476 );
477 assert_eq!(
478 ties.clone().ranks(RankTieBreaker::Max),
479 [1.0, 6.0, 8.0, 4.0, 3.0, 6.0, 7.0, 3.0]
480 );
481 assert_eq!(
482 distinct.clone().ranks(RankTieBreaker::First),
483 [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
484 );
485 assert_eq!(
486 ties.clone().ranks(RankTieBreaker::First),
487 [1.0, 5.0, 8.0, 4.0, 2.0, 6.0, 7.0, 3.0]
488 );
489 }
490
491 #[test]
492 fn test_median_short() {
493 let even = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0, 0.2, 1.0, 6.0];
494 let even = Data::new(even);
495 assert_eq!(even.median(), 0.6);
496
497 let odd = [-1.0, 5.0, 0.0, -3.0, 10.0, -0.5, 4.0, 0.2, 1.0];
498 let odd = Data::new(odd);
499 assert_eq!(odd.median(), 0.2);
500 }
501
502 #[test]
503 fn test_median_long_constant_seq() {
504 let even = vec![2.0; 100000];
505 let even = Data::new(even);
506 assert_eq!(2.0, even.median());
507
508 let odd = vec![2.0; 100001];
509 let odd = Data::new(odd);
510 assert_eq!(2.0, odd.median());
511 }
512
513 #[test]
516 fn test_median_robust_on_infinities() {
517 let data3 = [2.0, f64::NEG_INFINITY, f64::INFINITY];
518 let data3 = Data::new(data3);
519 assert_eq!(data3.median(), 2.0);
520 assert_eq!(data3.median(), 2.0);
521
522 let data3 = [f64::NEG_INFINITY, 2.0, f64::INFINITY];
523 let data3 = Data::new(data3);
524 assert_eq!(data3.median(), 2.0);
525 assert_eq!(data3.median(), 2.0);
526
527 let data3 = [f64::NEG_INFINITY, f64::INFINITY, 2.0];
528 let data3 = Data::new(data3);
529 assert_eq!(data3.median(), 2.0);
530 assert_eq!(data3.median(), 2.0);
531
532 let data4 = [f64::NEG_INFINITY, 2.0, 3.0, f64::INFINITY];
533 let data4 = Data::new(data4);
534 assert_eq!(data4.median(), 2.5);
535 assert_eq!(data4.median(), 2.5);
536 }
537 #[test]
538 fn test_foo() {
539 let arr = [0.0, 1.0, 2.0, 3.0];
540 let mut arr = Data::new(arr);
541 arr.order_statistic(2);
542 }
543}