polars_compute/unique/
mod.rs

1use arrow::array::Array;
2
3/// Kernel to calculate the number of unique elements where the elements are already sorted.
4pub trait SortedUniqueKernel: Array {
5    /// Calculate the set of unique elements in `fst` and `others` and fold the result into one
6    /// array.
7    fn unique_fold<'a>(fst: &'a Self, others: impl Iterator<Item = &'a Self>) -> Self;
8
9    /// Calculate the set of unique elements in [`Self`] where we have no further information about
10    /// `self`.
11    fn unique(&self) -> Self;
12
13    /// Calculate the number of unique elements in [`Self`]
14    ///
15    /// A null is also considered a unique value
16    fn n_unique(&self) -> usize;
17
18    /// Calculate the number of unique non-null elements in [`Self`]
19    fn n_unique_non_null(&self) -> usize;
20}
21
22/// Optimized kernel to calculate the unique elements of an array.
23///
24/// This kernel is a specialized for where all values are known to be in some small range of
25/// values. In this case, you can usually get by with a bitset and bit-arithmetic instead of using
26/// vectors and hashsets. Consequently, this kernel is usually called when further information is
27/// known about the underlying array.
28///
29/// This trait is not implemented directly on the `Array` as with many other kernels. Rather, it is
30/// implemented on a `State` struct to which `Array`s can be appended. This allows for sharing of
31/// `State` between many chunks and allows for different implementations for the same array (e.g. a
32/// maintain order and no maintain-order variant).
33pub trait RangedUniqueKernel {
34    type Array: Array;
35
36    /// Returns whether all the values in the whole range are in the state
37    fn has_seen_all(&self) -> bool;
38
39    /// Append an `Array`'s values to the `State`
40    fn append(&mut self, array: &Self::Array);
41
42    /// Append another state into the `State`
43    fn append_state(&mut self, other: &Self);
44
45    /// Consume the state to get the unique elements
46    fn finalize_unique(self) -> Self::Array;
47    /// Consume the state to get the number of unique elements including null
48    fn finalize_n_unique(&self) -> usize;
49    /// Consume the state to get the number of unique elements excluding null
50    fn finalize_n_unique_non_null(&self) -> usize;
51}
52
53/// A generic unique kernel that selects the generally applicable unique kernel for an `Array`.
54pub trait GenericUniqueKernel {
55    /// Calculate the set of unique elements
56    fn unique(&self) -> Self;
57    /// Calculate the number of unique elements including null
58    fn n_unique(&self) -> usize;
59    /// Calculate the number of unique elements excluding null
60    fn n_unique_non_null(&self) -> usize;
61}
62
63mod boolean;
64mod dictionary;
65mod primitive;
66
67pub use boolean::BooleanUniqueKernelState;
68pub use dictionary::DictionaryRangedUniqueState;
69pub use primitive::PrimitiveRangedUniqueState;