polars_arrow/legacy/array/
list.rs

1use polars_error::PolarsResult;
2
3use crate::array::{new_null_array, Array, ArrayRef, ListArray, NullArray};
4use crate::bitmap::BitmapBuilder;
5use crate::compute::concatenate;
6use crate::datatypes::ArrowDataType;
7use crate::legacy::array::is_nested_null;
8use crate::legacy::kernels::concatenate::concatenate_owned_unchecked;
9use crate::legacy::prelude::*;
10use crate::offset::Offsets;
11
12pub struct AnonymousBuilder<'a> {
13    arrays: Vec<&'a dyn Array>,
14    offsets: Vec<i64>,
15    validity: Option<BitmapBuilder>,
16    size: i64,
17}
18
19impl<'a> AnonymousBuilder<'a> {
20    pub fn new(size: usize) -> Self {
21        let mut offsets = Vec::with_capacity(size + 1);
22        offsets.push(0i64);
23        Self {
24            arrays: Vec::with_capacity(size),
25            offsets,
26            validity: None,
27            size: 0,
28        }
29    }
30    #[inline]
31    fn last_offset(&self) -> i64 {
32        *self.offsets.last().unwrap()
33    }
34
35    pub fn is_empty(&self) -> bool {
36        self.offsets.len() == 1
37    }
38
39    pub fn offsets(&self) -> &[i64] {
40        &self.offsets
41    }
42
43    pub fn take_offsets(self) -> Offsets<i64> {
44        // SAFETY: offsets are correct
45        unsafe { Offsets::new_unchecked(self.offsets) }
46    }
47
48    #[inline]
49    pub fn push(&mut self, arr: &'a dyn Array) {
50        self.size += arr.len() as i64;
51        self.offsets.push(self.size);
52        self.arrays.push(arr);
53
54        if let Some(validity) = &mut self.validity {
55            validity.push(true)
56        }
57    }
58
59    pub fn push_multiple(&mut self, arrs: &'a [ArrayRef]) {
60        for arr in arrs {
61            self.size += arr.len() as i64;
62            self.arrays.push(arr.as_ref());
63        }
64        self.offsets.push(self.size);
65        self.update_validity()
66    }
67
68    #[inline]
69    pub fn push_null(&mut self) {
70        self.offsets.push(self.last_offset());
71        match &mut self.validity {
72            Some(validity) => validity.push(false),
73            None => self.init_validity(),
74        }
75    }
76
77    #[inline]
78    pub fn push_opt(&mut self, arr: Option<&'a dyn Array>) {
79        match arr {
80            None => self.push_null(),
81            Some(arr) => self.push(arr),
82        }
83    }
84
85    pub fn push_empty(&mut self) {
86        self.offsets.push(self.last_offset());
87        self.update_validity()
88    }
89
90    fn init_validity(&mut self) {
91        let len = self.offsets.len() - 1;
92        let mut validity = BitmapBuilder::with_capacity(self.offsets.capacity());
93        if len > 0 {
94            validity.extend_constant(len - 1, true);
95            validity.push(false);
96        }
97        self.validity = Some(validity)
98    }
99
100    fn update_validity(&mut self) {
101        if let Some(validity) = &mut self.validity {
102            validity.push(true)
103        }
104    }
105
106    pub fn finish(self, inner_dtype: Option<&ArrowDataType>) -> PolarsResult<ListArray<i64>> {
107        // SAFETY:
108        // offsets are monotonically increasing
109        let offsets = unsafe { Offsets::new_unchecked(self.offsets) };
110        let (inner_dtype, values) = if self.arrays.is_empty() {
111            let len = *offsets.last() as usize;
112            match inner_dtype {
113                None => {
114                    let values = NullArray::new(ArrowDataType::Null, len).boxed();
115                    (ArrowDataType::Null, values)
116                },
117                Some(inner_dtype) => {
118                    let values = new_null_array(inner_dtype.clone(), len);
119                    (inner_dtype.clone(), values)
120                },
121            }
122        } else {
123            let inner_dtype = inner_dtype.unwrap_or_else(|| self.arrays[0].dtype());
124
125            // check if there is a dtype that is not `Null`
126            // if we find it, we will convert the null arrays
127            // to empty arrays of this dtype, otherwise the concat kernel fails.
128            let mut non_null_dtype = None;
129            if is_nested_null(inner_dtype) {
130                for arr in &self.arrays {
131                    if !is_nested_null(arr.dtype()) {
132                        non_null_dtype = Some(arr.dtype());
133                        break;
134                    }
135                }
136            };
137
138            // there are null arrays found, ensure the types are correct.
139            if let Some(dtype) = non_null_dtype {
140                let arrays = self
141                    .arrays
142                    .iter()
143                    .map(|arr| {
144                        if is_nested_null(arr.dtype()) {
145                            convert_inner_type(&**arr, dtype)
146                        } else {
147                            arr.to_boxed()
148                        }
149                    })
150                    .collect::<Vec<_>>();
151
152                let values = concatenate_owned_unchecked(&arrays)?;
153                (dtype.clone(), values)
154            } else {
155                let values = concatenate::concatenate(&self.arrays)?;
156                (inner_dtype.clone(), values)
157            }
158        };
159        let dtype = ListArray::<i64>::default_datatype(inner_dtype);
160        Ok(ListArray::<i64>::new(
161            dtype,
162            offsets.into(),
163            values,
164            self.validity
165                .and_then(|validity| validity.into_opt_validity()),
166        ))
167    }
168}