gondola_core/
stats.rs

1//! Statistics tools 
2//!
3// This file is part of gaps-online-software and published 
4// under the GPLv3 license
5
6use statrs::distribution::{Gamma, Continuous};
7use num_traits::{
8  Float,
9  NumAssign
10};
11
12#[cfg(feature="pybindings")]
13use pyo3::prelude::*;
14
15#[cfg(feature="pybindings")]
16use numpy::{
17  PyArray1,
18  PyArrayMethods,
19};
20
21/// Calculates the standard deviation of a vector.
22///
23/// This function returns an `Option<f32>` because the standard deviation is
24/// undefined for an empty vector or a vector with a single element.
25pub fn standard_deviation(data: &Vec<f32>) -> Option<f32> {
26  // The standard deviation is not defined for vectors with less than 2 elements.
27  if data.len() < 2 {
28    return None;
29  }
30
31  // First, calculate the mean of the data.
32  let mean = mean(data);
33
34  // Then, calculate the sum of the squared differences from the mean.
35  // This is the variance.
36  let variance_sum: f32 = data.iter()
37  .map(|x| (x - mean).powi(2))
38  .sum();
39
40  // The sample standard deviation is the square root of the variance
41  // divided by (n-1), where n is the number of data points.
42  let variance = variance_sum / ((data.len() - 1) as f32);
43
44  // Take the square root to get the standard deviation.
45  Some(variance.sqrt())
46}
47
48
49
50
51/// Calculate the provided statistic over the columns of the given matrix 
52/// representation, respecting nans 
53///
54/// <div class="info">
55/// **Note:** We don't want an external dependency for this allone,
56/// but might switch to ndarray in the future.
57/// </div>
58///
59/// # Arguments:
60///
61///   * data: input data in form of a 2d matrix
62///   * func: statistics to apply, e.g. mean or median
63pub fn calculate_column_stat<T, F>(data: &Vec<Vec<T>>, func: F) -> Vec<T> 
64  where T: Float + NumAssign + Copy,
65        F: Fn(&[T]) -> T {  
66  let num_columns = data[0].len();
67  let num_rows    = data.len();
68  // Initialize a Vec to store the column-wise medians
69  let mut column_stats: Vec<T> = vec![T::zero(); num_columns];
70  debug!("Calculating stat for {} columns!", num_columns);
71  debug!("Calculating stat for {} rows!", num_rows);
72  // Calculate the median for each column across all sub-vectors, ignoring NaN values
73  for col in 0..num_columns  {
74    let mut col_vals: Vec<T> = vec![T::zero(); num_rows];
75    //let mut col_vals = Vec::<f32>::new();
76    for k in 0..num_rows {
77      col_vals[k] = data[k][col];
78    }
79    col_vals.retain(|x| !x.is_nan());
80    if col_vals.len() == 0 {
81      column_stats[col] = T::nan();
82    } else {
83      //column_medians[col] = statistical::median(col_vals.as_slice());//.unwrap_or(f32::NAN);
84      column_stats[col] = func(col_vals.as_slice());
85    }
86  }
87  column_stats
88}
89
90
91/// Simply calculate the mean of a vector of numbers
92///
93/// <div class="info">
94/// **Note:** We don't want an external dependency for thisallone,
95/// but might switch to ndarray in the future.
96/// </div>
97pub fn mean<T>(input: &[T]) -> T 
98  where T: Float + NumAssign + Copy {  
99  if input.len() == 0 {
100    error!("Vector is empty, can not calculate mean!");
101    return T::nan();
102  }
103  let mut n_entries = T::zero();
104  let mut sum       = T::zero();
105  for k in input.iter() {
106    if k.is_nan() {
107      continue;
108      
109    }
110    sum += *k;
111    n_entries += T::one();
112  }
113  sum / n_entries
114}
115
116/// A simple gamma function e.g. to generate fake waveforms
117pub fn gamma_pdf(xs : &[f32], shape : f64, scale : f64) -> Vec<f32> {
118  let mut ys = Vec::<f32>::with_capacity(xs.len());
119  
120  let gamma = Gamma::new(shape, scale).unwrap();
121  for x in xs {
122    ys.push(gamma.pdf(*x as f64) as f32);
123  }
124  return ys;
125}
126
127//---------------------------------------------------
128
129#[cfg(feature="pybindings")]
130#[pyfunction]
131#[pyo3(name="gamma_pdf")]
132pub fn py_gamma_pdf<'_py>(xs    : Bound<'_py,PyArray1<f32>>,
133                          shape : f64,
134                          scale : f64) -> Vec<f32> {
135  let ys : Vec::<f32>;
136  unsafe {
137    ys = gamma_pdf(xs.as_slice().unwrap(), shape, scale);
138  }
139  return ys;
140}
141
142#[cfg(feature="pybindings")]
143#[pyfunction]
144#[pyo3(name="mean")]
145pub fn py_mean<'_py>(xs    : Bound<'_py,PyArray1<f32>>) -> f32 { 
146  let mean_val : f32;
147  unsafe {
148    mean_val = mean(xs.as_slice().unwrap());
149  }
150  mean_val
151}
152