polars_arrow/array/growable/
binary.rs

1use std::sync::Arc;
2
3use super::utils::extend_offset_values;
4use super::Growable;
5use crate::array::growable::utils::{extend_validity, prepare_validity};
6use crate::array::{Array, BinaryArray};
7use crate::bitmap::BitmapBuilder;
8use crate::datatypes::ArrowDataType;
9use crate::offset::{Offset, Offsets};
10
11/// Concrete [`Growable`] for the [`BinaryArray`].
12pub struct GrowableBinary<'a, O: Offset> {
13    arrays: Vec<&'a BinaryArray<O>>,
14    dtype: ArrowDataType,
15    validity: Option<BitmapBuilder>,
16    values: Vec<u8>,
17    offsets: Offsets<O>,
18}
19
20impl<'a, O: Offset> GrowableBinary<'a, O> {
21    /// Creates a new [`GrowableBinary`] bound to `arrays` with a pre-allocated `capacity`.
22    /// # Panics
23    /// If `arrays` is empty.
24    pub fn new(arrays: Vec<&'a BinaryArray<O>>, mut use_validity: bool, capacity: usize) -> Self {
25        let dtype = arrays[0].dtype().clone();
26
27        // if any of the arrays has nulls, insertions from any array requires setting bits
28        // as there is at least one array with nulls.
29        if !use_validity & arrays.iter().any(|array| array.null_count() > 0) {
30            use_validity = true;
31        };
32
33        Self {
34            arrays,
35            dtype,
36            values: Vec::with_capacity(0),
37            offsets: Offsets::with_capacity(capacity),
38            validity: prepare_validity(use_validity, capacity),
39        }
40    }
41
42    fn to(&mut self) -> BinaryArray<O> {
43        let dtype = self.dtype.clone();
44        let validity = std::mem::take(&mut self.validity);
45        let offsets = std::mem::take(&mut self.offsets);
46        let values = std::mem::take(&mut self.values);
47
48        BinaryArray::<O>::new(
49            dtype,
50            offsets.into(),
51            values.into(),
52            validity.map(|v| v.freeze()),
53        )
54    }
55}
56
57impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> {
58    unsafe fn extend(&mut self, index: usize, start: usize, len: usize) {
59        let array = *self.arrays.get_unchecked(index);
60        extend_validity(&mut self.validity, array, start, len);
61
62        let offsets = array.offsets();
63        let values = array.values();
64
65        self.offsets
66            .try_extend_from_slice(offsets, start, len)
67            .unwrap();
68
69        // values
70        extend_offset_values::<O>(&mut self.values, offsets.buffer(), values, start, len);
71    }
72
73    fn extend_validity(&mut self, additional: usize) {
74        self.offsets.extend_constant(additional);
75        if let Some(validity) = &mut self.validity {
76            validity.extend_constant(additional, false);
77        }
78    }
79
80    #[inline]
81    fn len(&self) -> usize {
82        self.offsets.len() - 1
83    }
84
85    fn as_arc(&mut self) -> Arc<dyn Array> {
86        self.to().arced()
87    }
88
89    fn as_box(&mut self) -> Box<dyn Array> {
90        self.to().boxed()
91    }
92}
93
94impl<'a, O: Offset> From<GrowableBinary<'a, O>> for BinaryArray<O> {
95    fn from(val: GrowableBinary<'a, O>) -> Self {
96        BinaryArray::<O>::new(
97            val.dtype,
98            val.offsets.into(),
99            val.values.into(),
100            val.validity.map(|v| v.freeze()),
101        )
102    }
103}