statistical/
stats_.rs

1// Copyright (c) 2015 Jeff Belgum
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
4// documentation files (the "Software"), to deal in the Software without restriction, including without
5// limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
6// the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
7// conditions:
8//
9// The above copyright notice and this permission notice shall be included in all copies or substantial portions
10// of the Software.
11//
12// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
13// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
14// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
15// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
16// DEALINGS IN THE SOFTWARE.
17
18extern crate rand;
19extern crate num;
20
21use num::{Float,
22          Num,
23          NumCast,
24          One,
25          Zero};
26
27
28pub enum Degree {
29    One,
30    Two,
31    Three,
32    Four
33}
34
35pub fn std_moment<T>(v: &[T], r: Degree, _mean: Option<T>, pstdev: Option<T>) -> T
36    where T: Float
37{
38    let _mean = _mean.unwrap_or_else(|| mean(v));
39    let pstdev = pstdev.unwrap_or_else(|| population_standard_deviation(v, Some(_mean)));
40    let r = match r {
41        Degree::One => 1,
42        Degree::Two => 2,
43        Degree::Three => 3,
44        Degree::Four => 4
45    };
46    v.iter().map(|&x| ((x-_mean)/pstdev).powi(r)).fold(T::zero(), |acc, elem| acc + elem)
47}
48
49/// The mean is the sum of a collection of numbers divided by the number of numbers in the collection.
50/// (reference)[http://en.wikipedia.org/wiki/Arithmetic_mean]
51pub fn mean<T>(v: &[T]) -> T
52    where T: Float
53{
54    let len = num::cast(v.len()).unwrap();
55    v.iter().fold(T::zero(), |acc: T, elem| acc + *elem) / len
56}
57
58/// The median is the number separating the higher half of a data sample, a population, or
59/// a probability distribution, from the lower half (reference)[http://en.wikipedia.org/wiki/Median)
60pub fn median<T>(v: &[T]) -> T
61    where T: Copy + Num + NumCast + PartialOrd
62{
63    assert!(v.len() > 0);
64    let mut scratch: Vec<&T> = Vec::with_capacity(v.len());
65    scratch.extend(v.iter());
66    quicksort(&mut scratch);
67
68    let mid = scratch.len() / 2;
69    if scratch.len() % 2 == 1 {
70        *scratch[mid]
71    } else {
72        (*scratch[mid] + *scratch[mid-1]) / num::cast(2).unwrap()
73    }
74}
75
76pub fn sum_square_deviations<T>(v: &[T], c: Option<T>) -> T
77    where T: Float
78{
79    let c = match c {
80        Some(c) => c,
81        None => mean(v),
82    };
83
84    let sum = v.iter().map( |x| (*x - c) * (*x - c) ).fold(T::zero(), |acc, elem| acc + elem);
85    assert!(sum >= T::zero(), "negative sum of square root deviations");
86    sum
87}
88
89/// (Sample variance)[http://en.wikipedia.org/wiki/Variance#Sample_variance]
90pub fn variance<T>(v: &[T], xbar: Option<T>) -> T
91    where T: Float
92{
93    assert!(v.len() > 1, "variance requires at least two data points");
94    let len: T = num::cast(v.len()).unwrap();
95    let sum = sum_square_deviations(v, xbar);
96    sum / (len - T::one())
97}
98
99/// (Population variance)[http://en.wikipedia.org/wiki/Variance#Population_variance]
100pub fn population_variance<T>(v: &[T], mu: Option<T>) -> T
101    where T: Float
102{
103    assert!(v.len() > 0, "population variance requires at least one data point");
104    let len: T = num::cast(v.len()).unwrap();
105    let sum = sum_square_deviations(v, mu);
106    sum / len
107}
108
109///  Standard deviation is a measure that is used to quantify the amount of variation or
110///  dispersion of a set of data values. (reference)[http://en.wikipedia.org/wiki/Standard_deviation]
111pub fn standard_deviation<T>(v: &[T], xbar: Option<T>) -> T
112    where T: Float
113{
114    let var = variance(v, xbar);
115    var.sqrt()
116}
117
118///  Population standard deviation is a measure that is used to quantify the amount of variation or
119///  dispersion of a set of data values. (reference)[http://en.wikipedia.org/wiki/Standard_deviation]
120pub fn population_standard_deviation<T>(v: &[T], mu: Option<T>) -> T
121    where T: Float
122{
123    let pvar = population_variance(v, mu);
124    pvar.sqrt()
125}
126
127
128/// Standard score is a given datum's (signed) number of standard deviations above the mean.
129/// (reference)[http://en.wikipedia.org/wiki/Standard_score]
130/// Method returns a vector of scores for a vector of inputs. scores[n] is the score of v[n]
131pub fn standard_scores<T>(v: &[T]) -> Vec<T>
132    where T: Float
133{
134    let mean = mean(&v);
135    let standard_deviation = standard_deviation(&v, None);
136    let scores: Vec<T> = v.iter().map(|val| (*val - mean)/standard_deviation).collect();
137    return scores;
138}
139
140#[inline(always)]
141fn select_pivot<T>(v: &mut [T])
142    where T: Copy
143{
144    let idx = rand::random::<usize>() % v.len();
145    let tmp = v[0];
146    v[0] = v[idx];
147    v[idx] = tmp;
148}
149
150fn partition<T>(v: &mut [T]) -> usize
151    where T: PartialOrd + Copy
152{
153    select_pivot(v);
154    let pivot = v[0];
155    let mut i = 0;
156    let mut j = 0;
157    let end = v.len() - 1;
158    while i < end {
159        i += 1;
160        if v[i] < pivot {
161            v[j] = v[i];
162            j += 1;
163            v[i] = v[j];
164        }
165
166    }
167    v[j] = pivot;
168    j
169
170}
171
172pub fn quicksort<T>(v: &mut [T])
173    where T: PartialOrd + Copy
174{
175    if v.len() <= 1 {
176        return
177    }
178    let pivot = partition(v);
179    quicksort(&mut v[..pivot]);
180    quicksort(&mut v[(pivot+1)..]);
181}
182
183
184#[cfg(test)]
185mod tests {
186    extern crate rand;
187    extern crate num;
188
189    use super::*;
190    use num::Float;
191    use num::abs;
192
193    const EPSILON: f32 = 1e-6;
194
195    #[test]
196    fn test_mean() {
197        let vec = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25];
198
199        let diff = abs(mean(&vec) - 1.375);
200
201        assert!(diff <= EPSILON);
202    }
203
204    #[test]
205    fn test_median() {
206        let vec = vec![1.0, 3.0];
207        let diff = abs(median(&vec) - 2.0);
208
209        assert!(diff <= EPSILON);
210
211        let vec = vec![1.0, 3.0, 5.0];
212        let diff = abs(median(&vec) - 3.0);
213
214        assert!(diff <= EPSILON);
215
216        let vec = vec![1.0, 3.0, 5.0, 7.0];
217        let diff = abs(median(&vec) - 4.0);
218
219        assert!(diff <= EPSILON);
220    }
221
222    #[test]
223    fn test_variance() {
224        let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25];
225        // result is within `epsilon` of expected value
226        let expected = 1.428571;
227
228        assert!((expected - variance(&v, None)).abs() < EPSILON);
229    }
230
231    #[test]
232    fn test_population_variance() {
233        let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25];
234        // result is within `epsilon` of expected value
235        let expected = 1.25;
236
237        assert!((expected - population_variance(&v, None)).abs() < EPSILON);
238    }
239
240    #[test]
241    fn test_standard_deviation() {
242        let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25];
243        // result is within `epsilon` of expected value
244        let expected = 1.195229;
245
246        assert!((expected - standard_deviation(&v, None)).abs() < EPSILON);
247    }
248
249    #[test]
250    fn test_population_standard_deviation() {
251        let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25];
252        // result is within `epsilon` of expected value
253        let expected = 1.118034;
254
255        assert!((expected - population_standard_deviation(&v, None)).abs() < EPSILON);
256    }
257
258    #[test]
259    fn test_standard_scores() {
260        let v = vec![0.0, 0.25, 0.25, 1.25, 1.5, 1.75, 2.75, 3.25];
261        let expected = vec![-1.150407536484354, -0.941242529850835, -0.941242529850835, -0.10458250331675945, 0.10458250331675945, 0.31374750995027834, 1.150407536484354, 1.5687375497513918];
262        assert!(expected == standard_scores(&v));
263    }
264
265    #[test]
266    fn test_qsort_empty() {
267        let mut vec: Vec<f64> = vec![];
268        quicksort(&mut vec);
269        assert_eq!(vec, vec![]);
270    }
271
272    #[test]
273    fn test_qsort_small() {
274        let len = 10;
275        let mut vec = Vec::with_capacity(len);
276        for _ in 0..len { vec.push(rand::random::<f64>()); }
277        quicksort(&mut vec);
278        for i in 0..(len-1) {
279            assert!(vec[i] < vec[i+1], "sorted vectors must be monotonically increasing");
280        }
281    }
282
283    #[test]
284    fn test_qsort_large() {
285        let len = 1_000_000;
286        let mut vec = Vec::with_capacity(len);
287        for _ in 0..len { vec.push(rand::random::<f64>()); }
288        quicksort(&mut vec);
289        for i in 0..(len-1) {
290            assert!(vec[i] < vec[i+1], "sorted vectors must be monotonically increasing");
291        }
292    }
293
294    #[test]
295    fn test_qsort_sorted() {
296        let len = 1_000;
297        let mut vec = Vec::with_capacity(len);
298        for n in 0..len { vec.push(n); }
299        quicksort(&mut vec);
300        for i in 0..(len-1) {
301            assert!(vec[i] < vec[i+1], "sorted vectors must be monotonically increasing");
302        }
303    }
304
305    #[test]
306    fn test_qsort_reverse_sorted() {
307        let len = 1_000;
308        let mut vec = Vec::with_capacity(len);
309        for n in 0..len { vec.push(len-n); }
310        quicksort(&mut vec);
311        for i in 0..(len-1) {
312            assert!(vec[i] < vec[i+1], "sorted vectors must be monotonically increasing");
313        }
314    }
315
316}