polars_arrow/array/growable/
dictionary.rs1use std::sync::Arc;
2
3use super::{make_growable, Growable};
4use crate::array::growable::utils::{extend_validity, prepare_validity};
5use crate::array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray};
6use crate::bitmap::BitmapBuilder;
7use crate::datatypes::ArrowDataType;
8
9pub struct GrowableDictionary<'a, K: DictionaryKey> {
14 dtype: ArrowDataType,
15 keys: Vec<&'a PrimitiveArray<K>>,
16 key_values: Vec<K>,
17 validity: Option<BitmapBuilder>,
18 offsets: Vec<usize>,
19 values: Box<dyn Array>,
20}
21
22fn concatenate_values<K: DictionaryKey>(
23 arrays_keys: &[&PrimitiveArray<K>],
24 arrays_values: &[&dyn Array],
25 capacity: usize,
26) -> (Box<dyn Array>, Vec<usize>) {
27 let mut mutable = make_growable(arrays_values, false, capacity);
28 let mut offsets = Vec::with_capacity(arrays_keys.len() + 1);
29 offsets.push(0);
30 for (i, values) in arrays_values.iter().enumerate() {
31 unsafe { mutable.extend(i, 0, values.len()) };
32 offsets.push(offsets[i] + values.len());
33 }
34 (mutable.as_box(), offsets)
35}
36
37impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> {
38 pub fn new(arrays: &[&'a DictionaryArray<T>], mut use_validity: bool, capacity: usize) -> Self {
42 let dtype = arrays[0].dtype().clone();
43
44 if arrays.iter().any(|array| array.null_count() > 0) {
47 use_validity = true;
48 };
49
50 let arrays_keys = arrays.iter().map(|array| array.keys()).collect::<Vec<_>>();
51 let arrays_values = arrays
52 .iter()
53 .map(|array| array.values().as_ref())
54 .collect::<Vec<_>>();
55
56 let (values, offsets) = concatenate_values(&arrays_keys, &arrays_values, capacity);
57
58 Self {
59 dtype,
60 offsets,
61 values,
62 keys: arrays_keys,
63 key_values: Vec::with_capacity(capacity),
64 validity: prepare_validity(use_validity, capacity),
65 }
66 }
67
68 #[inline]
69 fn to(&mut self) -> DictionaryArray<T> {
70 let validity = self.validity.take();
71 let key_values = std::mem::take(&mut self.key_values);
72
73 #[cfg(debug_assertions)]
74 {
75 crate::array::specification::check_indexes(&key_values, self.values.len()).unwrap();
76 }
77 let keys = PrimitiveArray::<T>::new(
78 T::PRIMITIVE.into(),
79 key_values.into(),
80 validity.map(|v| v.freeze()),
81 );
82
83 unsafe {
85 DictionaryArray::<T>::try_new_unchecked(self.dtype.clone(), keys, self.values.clone())
86 .unwrap()
87 }
88 }
89}
90
91impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> {
92 #[inline]
93 unsafe fn extend(&mut self, index: usize, start: usize, len: usize) {
94 let keys_array = *self.keys.get_unchecked(index);
95 extend_validity(&mut self.validity, keys_array, start, len);
96
97 let values = &keys_array.values().get_unchecked(start..start + len);
98 let offset = self.offsets.get_unchecked(index);
99 self.key_values.extend(
100 values
101 .iter()
102 .map(|x| {
104 let x: usize = offset + (*x).try_into().unwrap_or(0);
105 let x: T = match x.try_into() {
106 Ok(key) => key,
107 Err(_) => {
109 panic!("The maximum key is too small")
110 },
111 };
112 x
113 }),
114 );
115 }
116
117 #[inline]
118 fn len(&self) -> usize {
119 self.key_values.len()
120 }
121
122 #[inline]
123 fn extend_validity(&mut self, additional: usize) {
124 self.key_values
125 .resize(self.key_values.len() + additional, T::default());
126 if let Some(validity) = &mut self.validity {
127 validity.extend_constant(additional, false);
128 }
129 }
130
131 #[inline]
132 fn as_arc(&mut self) -> Arc<dyn Array> {
133 Arc::new(self.to())
134 }
135
136 #[inline]
137 fn as_box(&mut self) -> Box<dyn Array> {
138 Box::new(self.to())
139 }
140}
141
142impl<'a, T: DictionaryKey> From<GrowableDictionary<'a, T>> for DictionaryArray<T> {
143 #[inline]
144 fn from(mut val: GrowableDictionary<'a, T>) -> Self {
145 val.to()
146 }
147}