polars_arrow/array/list/
mod.rs1use super::specification::try_check_offsets_bounds;
2use super::{new_empty_array, Array, Splitable};
3use crate::bitmap::Bitmap;
4use crate::datatypes::{ArrowDataType, Field};
5use crate::offset::{Offset, Offsets, OffsetsBuffer};
6
7mod ffi;
8pub(super) mod fmt;
9mod iterator;
10pub use iterator::*;
11mod mutable;
12pub use mutable::*;
13use polars_error::{polars_bail, PolarsResult};
14use polars_utils::pl_str::PlSmallStr;
15
16#[derive(Clone)]
18pub struct ListArray<O: Offset> {
19 dtype: ArrowDataType,
20 offsets: OffsetsBuffer<O>,
21 values: Box<dyn Array>,
22 validity: Option<Bitmap>,
23}
24
25impl<O: Offset> ListArray<O> {
26 pub fn try_new(
37 dtype: ArrowDataType,
38 offsets: OffsetsBuffer<O>,
39 values: Box<dyn Array>,
40 validity: Option<Bitmap>,
41 ) -> PolarsResult<Self> {
42 try_check_offsets_bounds(&offsets, values.len())?;
43
44 if validity
45 .as_ref()
46 .is_some_and(|validity| validity.len() != offsets.len_proxy())
47 {
48 polars_bail!(ComputeError: "validity mask length must match the number of values")
49 }
50
51 let child_dtype = Self::try_get_child(&dtype)?.dtype();
52 let values_dtype = values.dtype();
53 if child_dtype != values_dtype {
54 polars_bail!(ComputeError: "ListArray's child's DataType must match. However, the expected DataType is {child_dtype:?} while it got {values_dtype:?}.");
55 }
56
57 Ok(Self {
58 dtype,
59 offsets,
60 values,
61 validity,
62 })
63 }
64
65 pub fn new(
76 dtype: ArrowDataType,
77 offsets: OffsetsBuffer<O>,
78 values: Box<dyn Array>,
79 validity: Option<Bitmap>,
80 ) -> Self {
81 Self::try_new(dtype, offsets, values, validity).unwrap()
82 }
83
84 pub fn new_empty(dtype: ArrowDataType) -> Self {
86 let values = new_empty_array(Self::get_child_type(&dtype).clone());
87 Self::new(dtype, OffsetsBuffer::default(), values, None)
88 }
89
90 #[inline]
92 pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
93 let child = Self::get_child_type(&dtype).clone();
94 Self::new(
95 dtype,
96 Offsets::new_zeroed(length).into(),
97 new_empty_array(child),
98 Some(Bitmap::new_zeroed(length)),
99 )
100 }
101}
102
103impl<O: Offset> ListArray<O> {
104 pub fn slice(&mut self, offset: usize, length: usize) {
108 assert!(
109 offset + length <= self.len(),
110 "the offset of the new Buffer cannot exceed the existing length"
111 );
112 unsafe { self.slice_unchecked(offset, length) }
113 }
114
115 pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
120 self.validity = self
121 .validity
122 .take()
123 .map(|bitmap| bitmap.sliced_unchecked(offset, length))
124 .filter(|bitmap| bitmap.unset_bits() > 0);
125 self.offsets.slice_unchecked(offset, length + 1);
126 }
127
128 impl_sliced!();
129 impl_mut_validity!();
130 impl_into_array!();
131
132 pub fn trim_to_normalized_offsets_recursive(&self) -> Self {
133 let offsets = self.offsets();
134 let values = self.values();
135
136 let first_idx = *offsets.first();
137 let len = offsets.range().to_usize();
138
139 if first_idx.to_usize() == 0 && values.len() == len {
140 return self.clone();
141 }
142
143 let offsets = if first_idx.to_usize() == 0 {
144 offsets.clone()
145 } else {
146 let v = offsets.iter().map(|x| *x - first_idx).collect::<Vec<_>>();
147 unsafe { OffsetsBuffer::<O>::new_unchecked(v.into()) }
148 };
149
150 let values = values.sliced(first_idx.to_usize(), len);
151
152 let values = match values.dtype() {
153 ArrowDataType::List(_) => {
154 let inner: &ListArray<i32> = values.as_ref().as_any().downcast_ref().unwrap();
155 Box::new(inner.trim_to_normalized_offsets_recursive()) as Box<dyn Array>
156 },
157 ArrowDataType::LargeList(_) => {
158 let inner: &ListArray<i64> = values.as_ref().as_any().downcast_ref().unwrap();
159 Box::new(inner.trim_to_normalized_offsets_recursive()) as Box<dyn Array>
160 },
161 _ => values,
162 };
163
164 assert_eq!(offsets.first().to_usize(), 0);
165 assert_eq!(values.len(), offsets.range().to_usize());
166
167 Self::new(
168 self.dtype().clone(),
169 offsets,
170 values,
171 self.validity().cloned(),
172 )
173 }
174}
175
176impl<O: Offset> ListArray<O> {
178 #[inline]
180 pub fn len(&self) -> usize {
181 self.offsets.len_proxy()
182 }
183
184 #[inline]
188 pub fn value(&self, i: usize) -> Box<dyn Array> {
189 assert!(i < self.len());
190 unsafe { self.value_unchecked(i) }
192 }
193
194 #[inline]
199 pub unsafe fn value_unchecked(&self, i: usize) -> Box<dyn Array> {
200 let (start, end) = self.offsets.start_end_unchecked(i);
202 let length = end - start;
203
204 self.values.sliced_unchecked(start, length)
206 }
207
208 #[inline]
210 pub fn validity(&self) -> Option<&Bitmap> {
211 self.validity.as_ref()
212 }
213
214 #[inline]
216 pub fn offsets(&self) -> &OffsetsBuffer<O> {
217 &self.offsets
218 }
219
220 #[inline]
222 pub fn values(&self) -> &Box<dyn Array> {
223 &self.values
224 }
225}
226
227impl<O: Offset> ListArray<O> {
228 pub fn default_datatype(dtype: ArrowDataType) -> ArrowDataType {
230 let field = Box::new(Field::new(PlSmallStr::from_static("item"), dtype, true));
231 if O::IS_LARGE {
232 ArrowDataType::LargeList(field)
233 } else {
234 ArrowDataType::List(field)
235 }
236 }
237
238 pub fn get_child_field(dtype: &ArrowDataType) -> &Field {
242 Self::try_get_child(dtype).unwrap()
243 }
244
245 pub fn try_get_child(dtype: &ArrowDataType) -> PolarsResult<&Field> {
249 if O::IS_LARGE {
250 match dtype.to_logical_type() {
251 ArrowDataType::LargeList(child) => Ok(child.as_ref()),
252 _ => polars_bail!(ComputeError: "ListArray<i64> expects DataType::LargeList"),
253 }
254 } else {
255 match dtype.to_logical_type() {
256 ArrowDataType::List(child) => Ok(child.as_ref()),
257 _ => polars_bail!(ComputeError: "ListArray<i32> expects DataType::List"),
258 }
259 }
260 }
261
262 pub fn get_child_type(dtype: &ArrowDataType) -> &ArrowDataType {
266 Self::get_child_field(dtype).dtype()
267 }
268}
269
270impl<O: Offset> Array for ListArray<O> {
271 impl_common_array!();
272
273 fn validity(&self) -> Option<&Bitmap> {
274 self.validity.as_ref()
275 }
276
277 #[inline]
278 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
279 Box::new(self.clone().with_validity(validity))
280 }
281}
282
283impl<O: Offset> Splitable for ListArray<O> {
284 fn check_bound(&self, offset: usize) -> bool {
285 offset <= self.len()
286 }
287
288 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
289 let (lhs_offsets, rhs_offsets) = unsafe { self.offsets.split_at_unchecked(offset) };
290 let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
291
292 (
293 Self {
294 dtype: self.dtype.clone(),
295 offsets: lhs_offsets,
296 validity: lhs_validity,
297 values: self.values.clone(),
298 },
299 Self {
300 dtype: self.dtype.clone(),
301 offsets: rhs_offsets,
302 validity: rhs_validity,
303 values: self.values.clone(),
304 },
305 )
306 }
307}