polars_core/chunked_array/ops/aggregate/
quantile.rs

1use super::*;
2
3pub trait QuantileAggSeries {
4    /// Get the median of the [`ChunkedArray`] as a new [`Series`] of length 1.
5    fn median_reduce(&self) -> Scalar;
6    /// Get the quantile of the [`ChunkedArray`] as a new [`Series`] of length 1.
7    fn quantile_reduce(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Scalar>;
8}
9
10/// helper
11fn quantile_idx(
12    quantile: f64,
13    length: usize,
14    null_count: usize,
15    method: QuantileMethod,
16) -> (usize, f64, usize) {
17    let nonnull_count = (length - null_count) as f64;
18    let float_idx = (nonnull_count - 1.0) * quantile + null_count as f64;
19    let mut base_idx = match method {
20        QuantileMethod::Nearest => {
21            let idx = float_idx.round() as usize;
22            return (idx, 0.0, idx);
23        },
24        QuantileMethod::Lower | QuantileMethod::Midpoint | QuantileMethod::Linear => {
25            float_idx as usize
26        },
27        QuantileMethod::Higher => float_idx.ceil() as usize,
28        QuantileMethod::Equiprobable => {
29            let idx = ((nonnull_count * quantile).ceil() - 1.0).max(0.0) as usize + null_count;
30            return (idx, 0.0, idx);
31        },
32    };
33
34    base_idx = base_idx.clamp(0, length - 1);
35    let top_idx = f64::ceil(float_idx) as usize;
36
37    (base_idx, float_idx, top_idx)
38}
39
40/// helper
41fn linear_interpol<T: Float>(lower: T, upper: T, idx: usize, float_idx: f64) -> T {
42    if lower == upper {
43        lower
44    } else {
45        let proportion: T = T::from(float_idx).unwrap() - T::from(idx).unwrap();
46        proportion * (upper - lower) + lower
47    }
48}
49fn midpoint_interpol<T: Float>(lower: T, upper: T) -> T {
50    if lower == upper {
51        lower
52    } else {
53        (lower + upper) / (T::one() + T::one())
54    }
55}
56
57// Uses quickselect instead of sorting all data
58fn quantile_slice<T: ToPrimitive + TotalOrd + Copy>(
59    vals: &mut [T],
60    quantile: f64,
61    method: QuantileMethod,
62) -> PolarsResult<Option<f64>> {
63    polars_ensure!((0.0..=1.0).contains(&quantile),
64        ComputeError: "quantile should be between 0.0 and 1.0",
65    );
66    if vals.is_empty() {
67        return Ok(None);
68    }
69    if vals.len() == 1 {
70        return Ok(vals[0].to_f64());
71    }
72    let (idx, float_idx, top_idx) = quantile_idx(quantile, vals.len(), 0, method);
73
74    let (_lhs, lower, rhs) = vals.select_nth_unstable_by(idx, TotalOrd::tot_cmp);
75    if idx == top_idx {
76        Ok(lower.to_f64())
77    } else {
78        match method {
79            QuantileMethod::Midpoint => {
80                let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
81                Ok(Some(midpoint_interpol(
82                    lower.to_f64().unwrap(),
83                    upper.to_f64().unwrap(),
84                )))
85            },
86            QuantileMethod::Linear => {
87                let upper = rhs.iter().copied().min_by(TotalOrd::tot_cmp).unwrap();
88                Ok(linear_interpol(
89                    lower.to_f64().unwrap(),
90                    upper.to_f64().unwrap(),
91                    idx,
92                    float_idx,
93                )
94                .to_f64())
95            },
96            _ => Ok(lower.to_f64()),
97        }
98    }
99}
100
101fn generic_quantile<T>(
102    ca: ChunkedArray<T>,
103    quantile: f64,
104    method: QuantileMethod,
105) -> PolarsResult<Option<f64>>
106where
107    T: PolarsNumericType,
108{
109    polars_ensure!(
110        (0.0..=1.0).contains(&quantile),
111        ComputeError: "`quantile` should be between 0.0 and 1.0",
112    );
113
114    let null_count = ca.null_count();
115    let length = ca.len();
116
117    if null_count == length {
118        return Ok(None);
119    }
120
121    let (idx, float_idx, top_idx) = quantile_idx(quantile, length, null_count, method);
122    let sorted = ca.sort(false);
123    let lower = sorted.get(idx).map(|v| v.to_f64().unwrap());
124
125    let opt = match method {
126        QuantileMethod::Midpoint => {
127            if top_idx == idx {
128                lower
129            } else {
130                let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
131                midpoint_interpol(lower.unwrap(), upper.unwrap()).to_f64()
132            }
133        },
134        QuantileMethod::Linear => {
135            if top_idx == idx {
136                lower
137            } else {
138                let upper = sorted.get(idx + 1).map(|v| v.to_f64().unwrap());
139
140                linear_interpol(lower.unwrap(), upper.unwrap(), idx, float_idx).to_f64()
141            }
142        },
143        _ => lower,
144    };
145    Ok(opt)
146}
147
148impl<T> ChunkQuantile<f64> for ChunkedArray<T>
149where
150    T: PolarsIntegerType,
151    T::Native: TotalOrd,
152{
153    fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f64>> {
154        // in case of sorted data, the sort is free, so don't take quickselect route
155        if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
156            let mut owned = slice.to_vec();
157            quantile_slice(&mut owned, quantile, method)
158        } else {
159            generic_quantile(self.clone(), quantile, method)
160        }
161    }
162
163    fn median(&self) -> Option<f64> {
164        self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
165    }
166}
167
168// Version of quantile/median that don't need a memcpy
169impl<T> ChunkedArray<T>
170where
171    T: PolarsIntegerType,
172    T::Native: TotalOrd,
173{
174    pub(crate) fn quantile_faster(
175        mut self,
176        quantile: f64,
177        method: QuantileMethod,
178    ) -> PolarsResult<Option<f64>> {
179        // in case of sorted data, the sort is free, so don't take quickselect route
180        let is_sorted = self.is_sorted_ascending_flag();
181        if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
182            quantile_slice(slice, quantile, method)
183        } else {
184            self.quantile(quantile, method)
185        }
186    }
187
188    pub(crate) fn median_faster(self) -> Option<f64> {
189        self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
190    }
191}
192
193impl ChunkQuantile<f32> for Float32Chunked {
194    fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f32>> {
195        // in case of sorted data, the sort is free, so don't take quickselect route
196        let out = if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
197            let mut owned = slice.to_vec();
198            quantile_slice(&mut owned, quantile, method)
199        } else {
200            generic_quantile(self.clone(), quantile, method)
201        };
202        out.map(|v| v.map(|v| v as f32))
203    }
204
205    fn median(&self) -> Option<f32> {
206        self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
207    }
208}
209
210impl ChunkQuantile<f64> for Float64Chunked {
211    fn quantile(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Option<f64>> {
212        // in case of sorted data, the sort is free, so don't take quickselect route
213        if let (Ok(slice), false) = (self.cont_slice(), self.is_sorted_ascending_flag()) {
214            let mut owned = slice.to_vec();
215            quantile_slice(&mut owned, quantile, method)
216        } else {
217            generic_quantile(self.clone(), quantile, method)
218        }
219    }
220
221    fn median(&self) -> Option<f64> {
222        self.quantile(0.5, QuantileMethod::Linear).unwrap() // unwrap fine since quantile in range
223    }
224}
225
226impl Float64Chunked {
227    pub(crate) fn quantile_faster(
228        mut self,
229        quantile: f64,
230        method: QuantileMethod,
231    ) -> PolarsResult<Option<f64>> {
232        // in case of sorted data, the sort is free, so don't take quickselect route
233        let is_sorted = self.is_sorted_ascending_flag();
234        if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
235            quantile_slice(slice, quantile, method)
236        } else {
237            self.quantile(quantile, method)
238        }
239    }
240
241    pub(crate) fn median_faster(self) -> Option<f64> {
242        self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
243    }
244}
245
246impl Float32Chunked {
247    pub(crate) fn quantile_faster(
248        mut self,
249        quantile: f64,
250        method: QuantileMethod,
251    ) -> PolarsResult<Option<f32>> {
252        // in case of sorted data, the sort is free, so don't take quickselect route
253        let is_sorted = self.is_sorted_ascending_flag();
254        if let (Some(slice), false) = (self.cont_slice_mut(), is_sorted) {
255            quantile_slice(slice, quantile, method).map(|v| v.map(|v| v as f32))
256        } else {
257            self.quantile(quantile, method)
258        }
259    }
260
261    pub(crate) fn median_faster(self) -> Option<f32> {
262        self.quantile_faster(0.5, QuantileMethod::Linear).unwrap()
263    }
264}
265
266impl ChunkQuantile<String> for StringChunked {}
267impl ChunkQuantile<Series> for ListChunked {}
268#[cfg(feature = "dtype-array")]
269impl ChunkQuantile<Series> for ArrayChunked {}
270#[cfg(feature = "object")]
271impl<T: PolarsObject> ChunkQuantile<Series> for ObjectChunked<T> {}
272impl ChunkQuantile<bool> for BooleanChunked {}