polars_compute/unique/
dictionary.rs

1use arrow::array::{Array, DictionaryArray};
2use arrow::datatypes::ArrowDataType;
3
4use super::{PrimitiveRangedUniqueState, RangedUniqueKernel};
5
6/// A specialized unique kernel for [`DictionaryArray`] for when all values are in a small known
7/// range.
8pub struct DictionaryRangedUniqueState {
9    key_state: PrimitiveRangedUniqueState<u32>,
10    values: Box<dyn Array>,
11}
12
13impl DictionaryRangedUniqueState {
14    pub fn new(values: Box<dyn Array>) -> Self {
15        Self {
16            key_state: PrimitiveRangedUniqueState::new(0, values.len() as u32 + 1),
17            values,
18        }
19    }
20
21    pub fn key_state(&mut self) -> &mut PrimitiveRangedUniqueState<u32> {
22        &mut self.key_state
23    }
24}
25
26impl RangedUniqueKernel for DictionaryRangedUniqueState {
27    type Array = DictionaryArray<u32>;
28
29    fn has_seen_all(&self) -> bool {
30        self.key_state.has_seen_all()
31    }
32
33    fn append(&mut self, array: &Self::Array) {
34        self.key_state.append(array.keys());
35    }
36
37    fn append_state(&mut self, other: &Self) {
38        debug_assert_eq!(self.values, other.values);
39        self.key_state.append_state(&other.key_state);
40    }
41
42    fn finalize_unique(self) -> Self::Array {
43        let keys = self.key_state.finalize_unique();
44        DictionaryArray::<u32>::try_new(
45            ArrowDataType::Dictionary(
46                arrow::datatypes::IntegerType::UInt32,
47                Box::new(self.values.dtype().clone()),
48                false,
49            ),
50            keys,
51            self.values,
52        )
53        .unwrap()
54    }
55
56    fn finalize_n_unique(&self) -> usize {
57        self.key_state.finalize_n_unique()
58    }
59
60    fn finalize_n_unique_non_null(&self) -> usize {
61        self.key_state.finalize_n_unique_non_null()
62    }
63}