polars_core/chunked_array/ops/
mod.rs

1//! Traits for miscellaneous operations on ChunkedArray
2use arrow::offset::OffsetsBuffer;
3
4use crate::prelude::*;
5
6pub(crate) mod aggregate;
7pub(crate) mod any_value;
8pub(crate) mod append;
9mod apply;
10#[cfg(feature = "approx_unique")]
11mod approx_n_unique;
12pub mod arity;
13mod bit_repr;
14mod bits;
15#[cfg(feature = "bitwise")]
16mod bitwise_reduce;
17pub(crate) mod chunkops;
18pub(crate) mod compare_inner;
19#[cfg(feature = "dtype-decimal")]
20mod decimal;
21pub(crate) mod downcast;
22pub(crate) mod explode;
23mod explode_and_offsets;
24mod extend;
25pub mod fill_null;
26mod filter;
27pub mod float_sorted_arg_max;
28mod for_each;
29pub mod full;
30pub mod gather;
31pub(crate) mod nulls;
32mod reverse;
33#[cfg(feature = "rolling_window")]
34pub(crate) mod rolling_window;
35pub mod row_encode;
36pub mod search_sorted;
37mod set;
38mod shift;
39pub mod sort;
40#[cfg(feature = "algorithm_group_by")]
41pub(crate) mod unique;
42#[cfg(feature = "zip_with")]
43pub mod zip;
44
45pub use chunkops::_set_check_length;
46#[cfg(feature = "serde-lazy")]
47use serde::{Deserialize, Serialize};
48pub use sort::options::*;
49
50use crate::chunked_array::cast::CastOptions;
51use crate::series::{BitRepr, IsSorted};
52#[cfg(feature = "reinterpret")]
53pub trait Reinterpret {
54    fn reinterpret_signed(&self) -> Series {
55        unimplemented!()
56    }
57
58    fn reinterpret_unsigned(&self) -> Series {
59        unimplemented!()
60    }
61}
62
63/// Transmute [`ChunkedArray`] to bit representation.
64/// This is useful in hashing context and reduces no.
65/// of compiled code paths.
66pub(crate) trait ToBitRepr {
67    fn to_bit_repr(&self) -> BitRepr;
68}
69
70pub trait ChunkAnyValue {
71    /// Get a single value. Beware this is slow.
72    /// If you need to use this slightly performant, cast Categorical to UInt32
73    ///
74    /// # Safety
75    /// Does not do any bounds checking.
76    unsafe fn get_any_value_unchecked(&self, index: usize) -> AnyValue;
77
78    /// Get a single value. Beware this is slow.
79    fn get_any_value(&self, index: usize) -> PolarsResult<AnyValue>;
80}
81
82/// Explode/flatten a List or String Series
83pub trait ChunkExplode {
84    fn explode(&self) -> PolarsResult<Series> {
85        self.explode_and_offsets().map(|t| t.0)
86    }
87    fn offsets(&self) -> PolarsResult<OffsetsBuffer<i64>>;
88    fn explode_and_offsets(&self) -> PolarsResult<(Series, OffsetsBuffer<i64>)>;
89}
90
91pub trait ChunkBytes {
92    fn to_byte_slices(&self) -> Vec<&[u8]>;
93}
94
95/// This differs from ChunkWindowCustom and ChunkWindow
96/// by not using a fold aggregator, but reusing a `Series` wrapper and calling `Series` aggregators.
97/// This likely is a bit slower than ChunkWindow
98#[cfg(feature = "rolling_window")]
99pub trait ChunkRollApply: AsRefDataType {
100    fn rolling_map(
101        &self,
102        _f: &dyn Fn(&Series) -> Series,
103        _options: RollingOptionsFixedWindow,
104    ) -> PolarsResult<Series>
105    where
106        Self: Sized,
107    {
108        polars_bail!(opq = rolling_map, self.as_ref_dtype());
109    }
110}
111
112pub trait ChunkTake<Idx: ?Sized>: ChunkTakeUnchecked<Idx> {
113    /// Gather values from ChunkedArray by index.
114    fn take(&self, indices: &Idx) -> PolarsResult<Self>
115    where
116        Self: Sized;
117}
118
119pub trait ChunkTakeUnchecked<Idx: ?Sized> {
120    /// Gather values from ChunkedArray by index.
121    ///
122    /// # Safety
123    /// The non-null indices must be valid.
124    unsafe fn take_unchecked(&self, indices: &Idx) -> Self;
125}
126
127/// Create a `ChunkedArray` with new values by index or by boolean mask.
128///
129/// Note that these operations clone data. This is however the only way we can modify at mask or
130/// index level as the underlying Arrow arrays are immutable.
131pub trait ChunkSet<'a, A, B> {
132    /// Set the values at indexes `idx` to some optional value `Option<T>`.
133    ///
134    /// # Example
135    ///
136    /// ```rust
137    /// # use polars_core::prelude::*;
138    /// let ca = UInt32Chunked::new("a".into(), &[1, 2, 3]);
139    /// let new = ca.scatter_single(vec![0, 1], Some(10)).unwrap();
140    ///
141    /// assert_eq!(Vec::from(&new), &[Some(10), Some(10), Some(3)]);
142    /// ```
143    fn scatter_single<I: IntoIterator<Item = IdxSize>>(
144        &'a self,
145        idx: I,
146        opt_value: Option<A>,
147    ) -> PolarsResult<Self>
148    where
149        Self: Sized;
150
151    /// Set the values at indexes `idx` by applying a closure to these values.
152    ///
153    /// # Example
154    ///
155    /// ```rust
156    /// # use polars_core::prelude::*;
157    /// let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
158    /// let new = ca.scatter_with(vec![0, 1], |opt_v| opt_v.map(|v| v - 5)).unwrap();
159    ///
160    /// assert_eq!(Vec::from(&new), &[Some(-4), Some(-3), Some(3)]);
161    /// ```
162    fn scatter_with<I: IntoIterator<Item = IdxSize>, F>(
163        &'a self,
164        idx: I,
165        f: F,
166    ) -> PolarsResult<Self>
167    where
168        Self: Sized,
169        F: Fn(Option<A>) -> Option<B>;
170    /// Set the values where the mask evaluates to `true` to some optional value `Option<T>`.
171    ///
172    /// # Example
173    ///
174    /// ```rust
175    /// # use polars_core::prelude::*;
176    /// let ca = Int32Chunked::new("a".into(), &[1, 2, 3]);
177    /// let mask = BooleanChunked::new("mask".into(), &[false, true, false]);
178    /// let new = ca.set(&mask, Some(5)).unwrap();
179    /// assert_eq!(Vec::from(&new), &[Some(1), Some(5), Some(3)]);
180    /// ```
181    fn set(&'a self, mask: &BooleanChunked, opt_value: Option<A>) -> PolarsResult<Self>
182    where
183        Self: Sized;
184}
185
186/// Cast `ChunkedArray<T>` to `ChunkedArray<N>`
187pub trait ChunkCast {
188    /// Cast a [`ChunkedArray`] to [`DataType`]
189    fn cast(&self, dtype: &DataType) -> PolarsResult<Series> {
190        self.cast_with_options(dtype, CastOptions::NonStrict)
191    }
192
193    /// Cast a [`ChunkedArray`] to [`DataType`]
194    fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Series>;
195
196    /// Does not check if the cast is a valid one and may over/underflow
197    ///
198    /// # Safety
199    /// - This doesn't do utf8 validation checking when casting from binary
200    /// - This doesn't do categorical bound checking when casting from UInt32
201    unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Series>;
202}
203
204/// Fastest way to do elementwise operations on a [`ChunkedArray<T>`] when the operation is cheaper than
205/// branching due to null checking.
206pub trait ChunkApply<'a, T> {
207    type FuncRet;
208
209    /// Apply a closure elementwise. This is fastest when the null check branching is more expensive
210    /// than the closure application. Often it is.
211    ///
212    /// Null values remain null.
213    ///
214    /// # Example
215    ///
216    /// ```
217    /// use polars_core::prelude::*;
218    /// fn double(ca: &UInt32Chunked) -> UInt32Chunked {
219    ///     ca.apply_values(|v| v * 2)
220    /// }
221    /// ```
222    #[must_use]
223    fn apply_values<F>(&'a self, f: F) -> Self
224    where
225        F: Fn(T) -> Self::FuncRet + Copy;
226
227    /// Apply a closure elementwise including null values.
228    #[must_use]
229    fn apply<F>(&'a self, f: F) -> Self
230    where
231        F: Fn(Option<T>) -> Option<Self::FuncRet> + Copy;
232
233    /// Apply a closure elementwise and write results to a mutable slice.
234    fn apply_to_slice<F, S>(&'a self, f: F, slice: &mut [S])
235    // (value of chunkedarray, value of slice) -> value of slice
236    where
237        F: Fn(Option<T>, &S) -> S;
238}
239
240/// Aggregation operations.
241pub trait ChunkAgg<T> {
242    /// Aggregate the sum of the ChunkedArray.
243    /// Returns `None` if not implemented for `T`.
244    /// If the array is empty, `0` is returned
245    fn sum(&self) -> Option<T> {
246        None
247    }
248
249    fn _sum_as_f64(&self) -> f64;
250
251    fn min(&self) -> Option<T> {
252        None
253    }
254
255    /// Returns the maximum value in the array, according to the natural order.
256    /// Returns `None` if the array is empty or only contains null values.
257    fn max(&self) -> Option<T> {
258        None
259    }
260
261    fn min_max(&self) -> Option<(T, T)> {
262        Some((self.min()?, self.max()?))
263    }
264
265    /// Returns the mean value in the array.
266    /// Returns `None` if the array is empty or only contains null values.
267    fn mean(&self) -> Option<f64> {
268        None
269    }
270}
271
272/// Quantile and median aggregation.
273pub trait ChunkQuantile<T> {
274    /// Returns the mean value in the array.
275    /// Returns `None` if the array is empty or only contains null values.
276    fn median(&self) -> Option<T> {
277        None
278    }
279    /// Aggregate a given quantile of the ChunkedArray.
280    /// Returns `None` if the array is empty or only contains null values.
281    fn quantile(&self, _quantile: f64, _method: QuantileMethod) -> PolarsResult<Option<T>> {
282        Ok(None)
283    }
284}
285
286/// Variance and standard deviation aggregation.
287pub trait ChunkVar {
288    /// Compute the variance of this ChunkedArray/Series.
289    fn var(&self, _ddof: u8) -> Option<f64> {
290        None
291    }
292
293    /// Compute the standard deviation of this ChunkedArray/Series.
294    fn std(&self, _ddof: u8) -> Option<f64> {
295        None
296    }
297}
298
299/// Bitwise Reduction Operations.
300#[cfg(feature = "bitwise")]
301pub trait ChunkBitwiseReduce {
302    type Physical;
303
304    fn and_reduce(&self) -> Option<Self::Physical>;
305    fn or_reduce(&self) -> Option<Self::Physical>;
306    fn xor_reduce(&self) -> Option<Self::Physical>;
307}
308
309/// Compare [`Series`] and [`ChunkedArray`]'s and get a `boolean` mask that
310/// can be used to filter rows.
311///
312/// # Example
313///
314/// ```
315/// use polars_core::prelude::*;
316/// fn filter_all_ones(df: &DataFrame) -> PolarsResult<DataFrame> {
317///     let mask = df
318///     .column("column_a")?
319///     .as_materialized_series()
320///     .equal(1)?;
321///
322///     df.filter(&mask)
323/// }
324/// ```
325pub trait ChunkCompareEq<Rhs> {
326    type Item;
327
328    /// Check for equality.
329    fn equal(&self, rhs: Rhs) -> Self::Item;
330
331    /// Check for equality where `None == None`.
332    fn equal_missing(&self, rhs: Rhs) -> Self::Item;
333
334    /// Check for inequality.
335    fn not_equal(&self, rhs: Rhs) -> Self::Item;
336
337    /// Check for inequality where `None == None`.
338    fn not_equal_missing(&self, rhs: Rhs) -> Self::Item;
339}
340
341/// Compare [`Series`] and [`ChunkedArray`]'s using inequality operators (`<`, `>=`, etc.) and get
342/// a `boolean` mask that can be used to filter rows.
343pub trait ChunkCompareIneq<Rhs> {
344    type Item;
345
346    /// Greater than comparison.
347    fn gt(&self, rhs: Rhs) -> Self::Item;
348
349    /// Greater than or equal comparison.
350    fn gt_eq(&self, rhs: Rhs) -> Self::Item;
351
352    /// Less than comparison.
353    fn lt(&self, rhs: Rhs) -> Self::Item;
354
355    /// Less than or equal comparison
356    fn lt_eq(&self, rhs: Rhs) -> Self::Item;
357}
358
359/// Get unique values in a `ChunkedArray`
360pub trait ChunkUnique {
361    // We don't return Self to be able to use AutoRef specialization
362    /// Get unique values of a ChunkedArray
363    fn unique(&self) -> PolarsResult<Self>
364    where
365        Self: Sized;
366
367    /// Get first index of the unique values in a `ChunkedArray`.
368    /// This Vec is sorted.
369    fn arg_unique(&self) -> PolarsResult<IdxCa>;
370
371    /// Number of unique values in the `ChunkedArray`
372    fn n_unique(&self) -> PolarsResult<usize> {
373        self.arg_unique().map(|v| v.len())
374    }
375}
376
377#[cfg(feature = "approx_unique")]
378pub trait ChunkApproxNUnique {
379    fn approx_n_unique(&self) -> IdxSize;
380}
381
382/// Sort operations on `ChunkedArray`.
383pub trait ChunkSort<T: PolarsDataType> {
384    #[allow(unused_variables)]
385    fn sort_with(&self, options: SortOptions) -> ChunkedArray<T>;
386
387    /// Returned a sorted `ChunkedArray`.
388    fn sort(&self, descending: bool) -> ChunkedArray<T>;
389
390    /// Retrieve the indexes needed to sort this array.
391    fn arg_sort(&self, options: SortOptions) -> IdxCa;
392
393    /// Retrieve the indexes need to sort this and the other arrays.
394    #[allow(unused_variables)]
395    fn arg_sort_multiple(
396        &self,
397        by: &[Column],
398        _options: &SortMultipleOptions,
399    ) -> PolarsResult<IdxCa> {
400        polars_bail!(opq = arg_sort_multiple, T::get_dtype());
401    }
402}
403
404pub type FillNullLimit = Option<IdxSize>;
405
406#[derive(Copy, Clone, Debug, PartialEq, Hash)]
407#[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))]
408pub enum FillNullStrategy {
409    /// previous value in array
410    Backward(FillNullLimit),
411    /// next value in array
412    Forward(FillNullLimit),
413    /// mean value of array
414    Mean,
415    /// minimal value in array
416    Min,
417    /// maximum value in array
418    Max,
419    /// replace with the value zero
420    Zero,
421    /// replace with the value one
422    One,
423    /// replace with the maximum value of that data type
424    MaxBound,
425    /// replace with the minimal value of that data type
426    MinBound,
427}
428
429impl FillNullStrategy {
430    pub fn is_elementwise(&self) -> bool {
431        matches!(self, Self::One | Self::Zero)
432    }
433}
434
435/// Replace None values with a value
436pub trait ChunkFillNullValue<T> {
437    /// Replace None values with a give value `T`.
438    fn fill_null_with_values(&self, value: T) -> PolarsResult<Self>
439    where
440        Self: Sized;
441}
442
443/// Fill a ChunkedArray with one value.
444pub trait ChunkFull<T> {
445    /// Create a ChunkedArray with a single value.
446    fn full(name: PlSmallStr, value: T, length: usize) -> Self
447    where
448        Self: Sized;
449}
450
451pub trait ChunkFullNull {
452    fn full_null(_name: PlSmallStr, _length: usize) -> Self
453    where
454        Self: Sized;
455}
456
457/// Reverse a [`ChunkedArray<T>`]
458pub trait ChunkReverse {
459    /// Return a reversed version of this array.
460    fn reverse(&self) -> Self;
461}
462
463/// Filter values by a boolean mask.
464pub trait ChunkFilter<T: PolarsDataType> {
465    /// Filter values in the ChunkedArray with a boolean mask.
466    ///
467    /// ```rust
468    /// # use polars_core::prelude::*;
469    /// let array = Int32Chunked::new("array".into(), &[1, 2, 3]);
470    /// let mask = BooleanChunked::new("mask".into(), &[true, false, true]);
471    ///
472    /// let filtered = array.filter(&mask).unwrap();
473    /// assert_eq!(Vec::from(&filtered), [Some(1), Some(3)])
474    /// ```
475    fn filter(&self, filter: &BooleanChunked) -> PolarsResult<ChunkedArray<T>>
476    where
477        Self: Sized;
478}
479
480/// Create a new ChunkedArray filled with values at that index.
481pub trait ChunkExpandAtIndex<T: PolarsDataType> {
482    /// Create a new ChunkedArray filled with values at that index.
483    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T>;
484}
485
486macro_rules! impl_chunk_expand {
487    ($self:ident, $length:ident, $index:ident) => {{
488        if $self.is_empty() {
489            return $self.clone();
490        }
491        let opt_val = $self.get($index);
492        match opt_val {
493            Some(val) => ChunkedArray::full($self.name().clone(), val, $length),
494            None => ChunkedArray::full_null($self.name().clone(), $length),
495        }
496    }};
497}
498
499impl<T: PolarsNumericType> ChunkExpandAtIndex<T> for ChunkedArray<T>
500where
501    ChunkedArray<T>: ChunkFull<T::Native>,
502{
503    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<T> {
504        let mut out = impl_chunk_expand!(self, length, index);
505        out.set_sorted_flag(IsSorted::Ascending);
506        out
507    }
508}
509
510impl ChunkExpandAtIndex<BooleanType> for BooleanChunked {
511    fn new_from_index(&self, index: usize, length: usize) -> BooleanChunked {
512        let mut out = impl_chunk_expand!(self, length, index);
513        out.set_sorted_flag(IsSorted::Ascending);
514        out
515    }
516}
517
518impl ChunkExpandAtIndex<StringType> for StringChunked {
519    fn new_from_index(&self, index: usize, length: usize) -> StringChunked {
520        let mut out = impl_chunk_expand!(self, length, index);
521        out.set_sorted_flag(IsSorted::Ascending);
522        out
523    }
524}
525
526impl ChunkExpandAtIndex<BinaryType> for BinaryChunked {
527    fn new_from_index(&self, index: usize, length: usize) -> BinaryChunked {
528        let mut out = impl_chunk_expand!(self, length, index);
529        out.set_sorted_flag(IsSorted::Ascending);
530        out
531    }
532}
533
534impl ChunkExpandAtIndex<BinaryOffsetType> for BinaryOffsetChunked {
535    fn new_from_index(&self, index: usize, length: usize) -> BinaryOffsetChunked {
536        let mut out = impl_chunk_expand!(self, length, index);
537        out.set_sorted_flag(IsSorted::Ascending);
538        out
539    }
540}
541
542impl ChunkExpandAtIndex<ListType> for ListChunked {
543    fn new_from_index(&self, index: usize, length: usize) -> ListChunked {
544        let opt_val = self.get_as_series(index);
545        match opt_val {
546            Some(val) => {
547                let mut ca = ListChunked::full(self.name().clone(), &val, length);
548                unsafe { ca.to_logical(self.inner_dtype().clone()) };
549                ca
550            },
551            None => {
552                ListChunked::full_null_with_dtype(self.name().clone(), length, self.inner_dtype())
553            },
554        }
555    }
556}
557
558#[cfg(feature = "dtype-struct")]
559impl ChunkExpandAtIndex<StructType> for StructChunked {
560    fn new_from_index(&self, index: usize, length: usize) -> ChunkedArray<StructType> {
561        let (chunk_idx, idx) = self.index_to_chunked_index(index);
562        let chunk = self.downcast_chunks().get(chunk_idx).unwrap();
563        let chunk = if chunk.is_null(idx) {
564            new_null_array(chunk.dtype().clone(), length)
565        } else {
566            let values = chunk
567                .values()
568                .iter()
569                .map(|arr| {
570                    let s = Series::try_from((PlSmallStr::EMPTY, arr.clone())).unwrap();
571                    let s = s.new_from_index(idx, length);
572                    s.chunks()[0].clone()
573                })
574                .collect::<Vec<_>>();
575
576            StructArray::new(chunk.dtype().clone(), length, values, None).boxed()
577        };
578
579        // SAFETY: chunks are from self.
580        unsafe { self.copy_with_chunks(vec![chunk]) }
581    }
582}
583
584#[cfg(feature = "dtype-array")]
585impl ChunkExpandAtIndex<FixedSizeListType> for ArrayChunked {
586    fn new_from_index(&self, index: usize, length: usize) -> ArrayChunked {
587        let opt_val = self.get_as_series(index);
588        match opt_val {
589            Some(val) => {
590                let mut ca = ArrayChunked::full(self.name().clone(), &val, length);
591                unsafe { ca.to_logical(self.inner_dtype().clone()) };
592                ca
593            },
594            None => ArrayChunked::full_null_with_dtype(
595                self.name().clone(),
596                length,
597                self.inner_dtype(),
598                self.width(),
599            ),
600        }
601    }
602}
603
604#[cfg(feature = "object")]
605impl<T: PolarsObject> ChunkExpandAtIndex<ObjectType<T>> for ObjectChunked<T> {
606    fn new_from_index(&self, index: usize, length: usize) -> ObjectChunked<T> {
607        let opt_val = self.get(index);
608        match opt_val {
609            Some(val) => ObjectChunked::<T>::full(self.name().clone(), val.clone(), length),
610            None => ObjectChunked::<T>::full_null(self.name().clone(), length),
611        }
612    }
613}
614
615/// Shift the values of a [`ChunkedArray`] by a number of periods.
616pub trait ChunkShiftFill<T: PolarsDataType, V> {
617    /// Shift the values by a given period and fill the parts that will be empty due to this operation
618    /// with `fill_value`.
619    fn shift_and_fill(&self, periods: i64, fill_value: V) -> ChunkedArray<T>;
620}
621
622pub trait ChunkShift<T: PolarsDataType> {
623    fn shift(&self, periods: i64) -> ChunkedArray<T>;
624}
625
626/// Combine two [`ChunkedArray`] based on some predicate.
627pub trait ChunkZip<T: PolarsDataType> {
628    /// Create a new ChunkedArray with values from self where the mask evaluates `true` and values
629    /// from `other` where the mask evaluates `false`
630    fn zip_with(
631        &self,
632        mask: &BooleanChunked,
633        other: &ChunkedArray<T>,
634    ) -> PolarsResult<ChunkedArray<T>>;
635}
636
637/// Apply kernels on the arrow array chunks in a ChunkedArray.
638pub trait ChunkApplyKernel<A: Array> {
639    /// Apply kernel and return result as a new ChunkedArray.
640    #[must_use]
641    fn apply_kernel(&self, f: &dyn Fn(&A) -> ArrayRef) -> Self;
642
643    /// Apply a kernel that outputs an array of different type.
644    fn apply_kernel_cast<S>(&self, f: &dyn Fn(&A) -> ArrayRef) -> ChunkedArray<S>
645    where
646        S: PolarsDataType;
647}
648
649#[cfg(feature = "is_first_distinct")]
650/// Mask the first unique values as `true`
651pub trait IsFirstDistinct<T: PolarsDataType> {
652    fn is_first_distinct(&self) -> PolarsResult<BooleanChunked> {
653        polars_bail!(opq = is_first_distinct, T::get_dtype());
654    }
655}
656
657#[cfg(feature = "is_last_distinct")]
658/// Mask the last unique values as `true`
659pub trait IsLastDistinct<T: PolarsDataType> {
660    fn is_last_distinct(&self) -> PolarsResult<BooleanChunked> {
661        polars_bail!(opq = is_last_distinct, T::get_dtype());
662    }
663}