polars_compute/unique/mod.rs
1use arrow::array::Array;
2
3/// Kernel to calculate the number of unique elements where the elements are already sorted.
4pub trait SortedUniqueKernel: Array {
5 /// Calculate the set of unique elements in `fst` and `others` and fold the result into one
6 /// array.
7 fn unique_fold<'a>(fst: &'a Self, others: impl Iterator<Item = &'a Self>) -> Self;
8
9 /// Calculate the set of unique elements in [`Self`] where we have no further information about
10 /// `self`.
11 fn unique(&self) -> Self;
12
13 /// Calculate the number of unique elements in [`Self`]
14 ///
15 /// A null is also considered a unique value
16 fn n_unique(&self) -> usize;
17
18 /// Calculate the number of unique non-null elements in [`Self`]
19 fn n_unique_non_null(&self) -> usize;
20}
21
22/// Optimized kernel to calculate the unique elements of an array.
23///
24/// This kernel is a specialized for where all values are known to be in some small range of
25/// values. In this case, you can usually get by with a bitset and bit-arithmetic instead of using
26/// vectors and hashsets. Consequently, this kernel is usually called when further information is
27/// known about the underlying array.
28///
29/// This trait is not implemented directly on the `Array` as with many other kernels. Rather, it is
30/// implemented on a `State` struct to which `Array`s can be appended. This allows for sharing of
31/// `State` between many chunks and allows for different implementations for the same array (e.g. a
32/// maintain order and no maintain-order variant).
33pub trait RangedUniqueKernel {
34 type Array: Array;
35
36 /// Returns whether all the values in the whole range are in the state
37 fn has_seen_all(&self) -> bool;
38
39 /// Append an `Array`'s values to the `State`
40 fn append(&mut self, array: &Self::Array);
41
42 /// Append another state into the `State`
43 fn append_state(&mut self, other: &Self);
44
45 /// Consume the state to get the unique elements
46 fn finalize_unique(self) -> Self::Array;
47 /// Consume the state to get the number of unique elements including null
48 fn finalize_n_unique(&self) -> usize;
49 /// Consume the state to get the number of unique elements excluding null
50 fn finalize_n_unique_non_null(&self) -> usize;
51}
52
53/// A generic unique kernel that selects the generally applicable unique kernel for an `Array`.
54pub trait GenericUniqueKernel {
55 /// Calculate the set of unique elements
56 fn unique(&self) -> Self;
57 /// Calculate the number of unique elements including null
58 fn n_unique(&self) -> usize;
59 /// Calculate the number of unique elements excluding null
60 fn n_unique_non_null(&self) -> usize;
61}
62
63mod boolean;
64mod dictionary;
65mod primitive;
66
67pub use boolean::BooleanUniqueKernelState;
68pub use dictionary::DictionaryRangedUniqueState;
69pub use primitive::PrimitiveRangedUniqueState;