polars_arrow/array/growable/
utf8.rs

1use std::sync::Arc;
2
3use super::utils::extend_offset_values;
4use super::Growable;
5use crate::array::growable::utils::{extend_validity, prepare_validity};
6use crate::array::{Array, Utf8Array};
7use crate::bitmap::BitmapBuilder;
8use crate::offset::{Offset, Offsets};
9
10/// Concrete [`Growable`] for the [`Utf8Array`].
11pub struct GrowableUtf8<'a, O: Offset> {
12    arrays: Vec<&'a Utf8Array<O>>,
13    validity: Option<BitmapBuilder>,
14    values: Vec<u8>,
15    offsets: Offsets<O>,
16}
17
18impl<'a, O: Offset> GrowableUtf8<'a, O> {
19    /// Creates a new [`GrowableUtf8`] bound to `arrays` with a pre-allocated `capacity`.
20    /// # Panics
21    /// If `arrays` is empty.
22    pub fn new(arrays: Vec<&'a Utf8Array<O>>, mut use_validity: bool, capacity: usize) -> Self {
23        // if any of the arrays has nulls, insertions from any array requires setting bits
24        // as there is at least one array with nulls.
25        if arrays.iter().any(|array| array.null_count() > 0) {
26            use_validity = true;
27        };
28
29        Self {
30            arrays: arrays.to_vec(),
31            values: Vec::with_capacity(0),
32            offsets: Offsets::with_capacity(capacity),
33            validity: prepare_validity(use_validity, capacity),
34        }
35    }
36
37    fn to(&mut self) -> Utf8Array<O> {
38        let validity = std::mem::take(&mut self.validity);
39        let offsets = std::mem::take(&mut self.offsets);
40        let values = std::mem::take(&mut self.values);
41
42        #[cfg(debug_assertions)]
43        {
44            crate::array::specification::try_check_utf8(offsets.as_slice(), &values).unwrap();
45        }
46
47        unsafe {
48            Utf8Array::<O>::new_unchecked(
49                self.arrays[0].dtype().clone(),
50                offsets.into(),
51                values.into(),
52                validity.map(|v| v.freeze()),
53            )
54        }
55    }
56}
57
58impl<'a, O: Offset> Growable<'a> for GrowableUtf8<'a, O> {
59    unsafe fn extend(&mut self, index: usize, start: usize, len: usize) {
60        let array = *self.arrays.get_unchecked(index);
61        extend_validity(&mut self.validity, array, start, len);
62
63        let offsets = array.offsets();
64        let values = array.values();
65
66        self.offsets
67            .try_extend_from_slice(offsets, start, len)
68            .unwrap();
69
70        // values
71        extend_offset_values::<O>(&mut self.values, offsets.as_slice(), values, start, len);
72    }
73
74    fn extend_validity(&mut self, additional: usize) {
75        self.offsets.extend_constant(additional);
76        if let Some(validity) = &mut self.validity {
77            validity.extend_constant(additional, false);
78        }
79    }
80
81    #[inline]
82    fn len(&self) -> usize {
83        self.offsets.len() - 1
84    }
85
86    fn as_arc(&mut self) -> Arc<dyn Array> {
87        Arc::new(self.to())
88    }
89
90    fn as_box(&mut self) -> Box<dyn Array> {
91        Box::new(self.to())
92    }
93}
94
95impl<'a, O: Offset> From<GrowableUtf8<'a, O>> for Utf8Array<O> {
96    fn from(mut val: GrowableUtf8<'a, O>) -> Self {
97        val.to()
98    }
99}