1use std::any::Any;
21use std::sync::Arc;
22
23use crate::bitmap::{Bitmap, MutableBitmap};
24use crate::datatypes::ArrowDataType;
25
26pub mod physical_binary;
27
28pub trait Splitable: Sized {
29 fn check_bound(&self, offset: usize) -> bool;
30
31 #[inline]
33 #[must_use]
34 fn split_at(&self, offset: usize) -> (Self, Self) {
35 assert!(self.check_bound(offset));
36 unsafe { self._split_at_unchecked(offset) }
37 }
38
39 #[inline]
45 #[must_use]
46 unsafe fn split_at_unchecked(&self, offset: usize) -> (Self, Self) {
47 debug_assert!(self.check_bound(offset));
48 unsafe { self._split_at_unchecked(offset) }
49 }
50
51 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self);
58}
59
60pub trait Array: Send + Sync + dyn_clone::DynClone + 'static {
63 fn as_any(&self) -> &dyn Any;
65
66 fn as_any_mut(&mut self) -> &mut dyn Any;
68
69 fn len(&self) -> usize;
72
73 fn is_empty(&self) -> bool {
75 self.len() == 0
76 }
77
78 fn dtype(&self) -> &ArrowDataType;
81
82 fn validity(&self) -> Option<&Bitmap>;
86
87 #[inline]
91 fn null_count(&self) -> usize {
92 if self.dtype() == &ArrowDataType::Null {
93 return self.len();
94 };
95 self.validity()
96 .as_ref()
97 .map(|x| x.unset_bits())
98 .unwrap_or(0)
99 }
100
101 #[inline]
102 fn has_nulls(&self) -> bool {
103 self.null_count() > 0
104 }
105
106 #[inline]
110 fn is_null(&self, i: usize) -> bool {
111 assert!(i < self.len());
112 unsafe { self.is_null_unchecked(i) }
113 }
114
115 #[inline]
120 unsafe fn is_null_unchecked(&self, i: usize) -> bool {
121 self.validity()
122 .as_ref()
123 .map(|x| !x.get_bit_unchecked(i))
124 .unwrap_or(false)
125 }
126
127 #[inline]
131 fn is_valid(&self, i: usize) -> bool {
132 !self.is_null(i)
133 }
134
135 #[must_use]
137 fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
138
139 #[must_use]
145 unsafe fn split_at_boxed_unchecked(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>);
146
147 fn slice(&mut self, offset: usize, length: usize);
153
154 unsafe fn slice_unchecked(&mut self, offset: usize, length: usize);
161
162 #[must_use]
168 fn sliced(&self, offset: usize, length: usize) -> Box<dyn Array> {
169 if length == 0 {
170 return new_empty_array(self.dtype().clone());
171 }
172 let mut new = self.to_boxed();
173 new.slice(offset, length);
174 new
175 }
176
177 #[must_use]
185 unsafe fn sliced_unchecked(&self, offset: usize, length: usize) -> Box<dyn Array> {
186 debug_assert!(offset + length <= self.len());
187 let mut new = self.to_boxed();
188 new.slice_unchecked(offset, length);
189 new
190 }
191
192 fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array>;
196
197 fn to_boxed(&self) -> Box<dyn Array>;
199}
200
201dyn_clone::clone_trait_object!(Array);
202
203pub trait MutableArray: std::fmt::Debug + Send + Sync {
209 fn dtype(&self) -> &ArrowDataType;
211
212 fn len(&self) -> usize;
214
215 fn is_empty(&self) -> bool {
217 self.len() == 0
218 }
219
220 fn validity(&self) -> Option<&MutableBitmap>;
222
223 fn as_box(&mut self) -> Box<dyn Array>;
225
226 fn as_arc(&mut self) -> std::sync::Arc<dyn Array> {
231 self.as_box().into()
232 }
233
234 fn as_any(&self) -> &dyn Any;
236
237 fn as_mut_any(&mut self) -> &mut dyn Any;
239
240 fn push_null(&mut self);
242
243 #[inline]
247 fn is_valid(&self, index: usize) -> bool {
248 self.validity()
249 .as_ref()
250 .map(|x| x.get(index))
251 .unwrap_or(true)
252 }
253
254 fn reserve(&mut self, additional: usize);
256
257 fn shrink_to_fit(&mut self);
259}
260
261impl MutableArray for Box<dyn MutableArray> {
262 fn len(&self) -> usize {
263 self.as_ref().len()
264 }
265
266 fn validity(&self) -> Option<&MutableBitmap> {
267 self.as_ref().validity()
268 }
269
270 fn as_box(&mut self) -> Box<dyn Array> {
271 self.as_mut().as_box()
272 }
273
274 fn as_arc(&mut self) -> Arc<dyn Array> {
275 self.as_mut().as_arc()
276 }
277
278 fn dtype(&self) -> &ArrowDataType {
279 self.as_ref().dtype()
280 }
281
282 fn as_any(&self) -> &dyn std::any::Any {
283 self.as_ref().as_any()
284 }
285
286 fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
287 self.as_mut().as_mut_any()
288 }
289
290 #[inline]
291 fn push_null(&mut self) {
292 self.as_mut().push_null()
293 }
294
295 fn shrink_to_fit(&mut self) {
296 self.as_mut().shrink_to_fit();
297 }
298
299 fn reserve(&mut self, additional: usize) {
300 self.as_mut().reserve(additional);
301 }
302}
303
304macro_rules! general_dyn {
305 ($array:expr, $ty:ty, $f:expr) => {{
306 let array = $array.as_any().downcast_ref::<$ty>().unwrap();
307 ($f)(array)
308 }};
309}
310
311macro_rules! fmt_dyn {
312 ($array:expr, $ty:ty, $f:expr) => {{
313 let mut f = |x: &$ty| x.fmt($f);
314 general_dyn!($array, $ty, f)
315 }};
316}
317
318impl std::fmt::Debug for dyn Array + '_ {
319 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
320 use crate::datatypes::PhysicalType::*;
321 match self.dtype().to_physical_type() {
322 Null => fmt_dyn!(self, NullArray, f),
323 Boolean => fmt_dyn!(self, BooleanArray, f),
324 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
325 fmt_dyn!(self, PrimitiveArray<$T>, f)
326 }),
327 BinaryView => fmt_dyn!(self, BinaryViewArray, f),
328 Utf8View => fmt_dyn!(self, Utf8ViewArray, f),
329 Binary => fmt_dyn!(self, BinaryArray<i32>, f),
330 LargeBinary => fmt_dyn!(self, BinaryArray<i64>, f),
331 FixedSizeBinary => fmt_dyn!(self, FixedSizeBinaryArray, f),
332 Utf8 => fmt_dyn!(self, Utf8Array::<i32>, f),
333 LargeUtf8 => fmt_dyn!(self, Utf8Array::<i64>, f),
334 List => fmt_dyn!(self, ListArray::<i32>, f),
335 LargeList => fmt_dyn!(self, ListArray::<i64>, f),
336 FixedSizeList => fmt_dyn!(self, FixedSizeListArray, f),
337 Struct => fmt_dyn!(self, StructArray, f),
338 Union => fmt_dyn!(self, UnionArray, f),
339 Dictionary(key_type) => {
340 match_integer_type!(key_type, |$T| {
341 fmt_dyn!(self, DictionaryArray::<$T>, f)
342 })
343 },
344 Map => fmt_dyn!(self, MapArray, f),
345 }
346 }
347}
348
349pub fn new_empty_array(dtype: ArrowDataType) -> Box<dyn Array> {
351 use crate::datatypes::PhysicalType::*;
352 match dtype.to_physical_type() {
353 Null => Box::new(NullArray::new_empty(dtype)),
354 Boolean => Box::new(BooleanArray::new_empty(dtype)),
355 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
356 Box::new(PrimitiveArray::<$T>::new_empty(dtype))
357 }),
358 Binary => Box::new(BinaryArray::<i32>::new_empty(dtype)),
359 LargeBinary => Box::new(BinaryArray::<i64>::new_empty(dtype)),
360 FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_empty(dtype)),
361 Utf8 => Box::new(Utf8Array::<i32>::new_empty(dtype)),
362 LargeUtf8 => Box::new(Utf8Array::<i64>::new_empty(dtype)),
363 List => Box::new(ListArray::<i32>::new_empty(dtype)),
364 LargeList => Box::new(ListArray::<i64>::new_empty(dtype)),
365 FixedSizeList => Box::new(FixedSizeListArray::new_empty(dtype)),
366 Struct => Box::new(StructArray::new_empty(dtype)),
367 Union => Box::new(UnionArray::new_empty(dtype)),
368 Map => Box::new(MapArray::new_empty(dtype)),
369 Utf8View => Box::new(Utf8ViewArray::new_empty(dtype)),
370 BinaryView => Box::new(BinaryViewArray::new_empty(dtype)),
371 Dictionary(key_type) => {
372 match_integer_type!(key_type, |$T| {
373 Box::new(DictionaryArray::<$T>::new_empty(dtype))
374 })
375 },
376 }
377}
378
379pub fn new_null_array(dtype: ArrowDataType, length: usize) -> Box<dyn Array> {
384 use crate::datatypes::PhysicalType::*;
385 match dtype.to_physical_type() {
386 Null => Box::new(NullArray::new_null(dtype, length)),
387 Boolean => Box::new(BooleanArray::new_null(dtype, length)),
388 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
389 Box::new(PrimitiveArray::<$T>::new_null(dtype, length))
390 }),
391 Binary => Box::new(BinaryArray::<i32>::new_null(dtype, length)),
392 LargeBinary => Box::new(BinaryArray::<i64>::new_null(dtype, length)),
393 FixedSizeBinary => Box::new(FixedSizeBinaryArray::new_null(dtype, length)),
394 Utf8 => Box::new(Utf8Array::<i32>::new_null(dtype, length)),
395 LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(dtype, length)),
396 List => Box::new(ListArray::<i32>::new_null(dtype, length)),
397 LargeList => Box::new(ListArray::<i64>::new_null(dtype, length)),
398 FixedSizeList => Box::new(FixedSizeListArray::new_null(dtype, length)),
399 Struct => Box::new(StructArray::new_null(dtype, length)),
400 Union => Box::new(UnionArray::new_null(dtype, length)),
401 Map => Box::new(MapArray::new_null(dtype, length)),
402 BinaryView => Box::new(BinaryViewArray::new_null(dtype, length)),
403 Utf8View => Box::new(Utf8ViewArray::new_null(dtype, length)),
404 Dictionary(key_type) => {
405 match_integer_type!(key_type, |$T| {
406 Box::new(DictionaryArray::<$T>::new_null(dtype, length))
407 })
408 },
409 }
410}
411
412macro_rules! clone_dyn {
413 ($array:expr, $ty:ty) => {{
414 let f = |x: &$ty| Box::new(x.clone());
415 general_dyn!($array, $ty, f)
416 }};
417}
418
419macro_rules! impl_sliced {
421 () => {
422 #[inline]
428 #[must_use]
429 pub fn sliced(self, offset: usize, length: usize) -> Self {
430 assert!(
431 offset + length <= self.len(),
432 "the offset of the new Buffer cannot exceed the existing length"
433 );
434 unsafe { Self::sliced_unchecked(self, offset, length) }
435 }
436
437 #[inline]
444 #[must_use]
445 pub unsafe fn sliced_unchecked(mut self, offset: usize, length: usize) -> Self {
446 Self::slice_unchecked(&mut self, offset, length);
447 self
448 }
449 };
450}
451
452macro_rules! impl_mut_validity {
454 () => {
455 #[must_use]
459 #[inline]
460 pub fn with_validity(mut self, validity: Option<Bitmap>) -> Self {
461 self.set_validity(validity);
462 self
463 }
464
465 #[inline]
469 pub fn set_validity(&mut self, validity: Option<Bitmap>) {
470 if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
471 panic!("validity must be equal to the array's length")
472 }
473 self.validity = validity;
474 }
475
476 #[inline]
478 pub fn take_validity(&mut self) -> Option<Bitmap> {
479 self.validity.take()
480 }
481 }
482}
483
484macro_rules! impl_mutable_array_mut_validity {
486 () => {
487 #[must_use]
491 #[inline]
492 pub fn with_validity(mut self, validity: Option<MutableBitmap>) -> Self {
493 self.set_validity(validity);
494 self
495 }
496
497 #[inline]
501 pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
502 if matches!(&validity, Some(bitmap) if bitmap.len() != self.len()) {
503 panic!("validity must be equal to the array's length")
504 }
505 self.validity = validity;
506 }
507
508 #[inline]
514 pub fn apply_validity<F: FnOnce(MutableBitmap) -> MutableBitmap>(&mut self, f: F) {
515 if let Some(validity) = std::mem::take(&mut self.validity) {
516 self.set_validity(Some(f(validity)))
517 }
518 }
519
520 }
521}
522
523macro_rules! impl_into_array {
525 () => {
526 pub fn boxed(self) -> Box<dyn Array> {
528 Box::new(self)
529 }
530
531 pub fn arced(self) -> std::sync::Arc<dyn Array> {
533 std::sync::Arc::new(self)
534 }
535 };
536}
537
538macro_rules! impl_common_array {
540 () => {
541 #[inline]
542 fn as_any(&self) -> &dyn std::any::Any {
543 self
544 }
545
546 #[inline]
547 fn as_any_mut(&mut self) -> &mut dyn std::any::Any {
548 self
549 }
550
551 #[inline]
552 fn len(&self) -> usize {
553 self.len()
554 }
555
556 #[inline]
557 fn dtype(&self) -> &ArrowDataType {
558 &self.dtype
559 }
560
561 #[inline]
562 fn split_at_boxed(&self, offset: usize) -> (Box<dyn Array>, Box<dyn Array>) {
563 let (lhs, rhs) = $crate::array::Splitable::split_at(self, offset);
564 (Box::new(lhs), Box::new(rhs))
565 }
566
567 #[inline]
568 unsafe fn split_at_boxed_unchecked(
569 &self,
570 offset: usize,
571 ) -> (Box<dyn Array>, Box<dyn Array>) {
572 let (lhs, rhs) = unsafe { $crate::array::Splitable::split_at_unchecked(self, offset) };
573 (Box::new(lhs), Box::new(rhs))
574 }
575
576 #[inline]
577 fn slice(&mut self, offset: usize, length: usize) {
578 self.slice(offset, length);
579 }
580
581 #[inline]
582 unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
583 self.slice_unchecked(offset, length);
584 }
585
586 #[inline]
587 fn to_boxed(&self) -> Box<dyn Array> {
588 Box::new(self.clone())
589 }
590 };
591}
592
593pub fn clone(array: &dyn Array) -> Box<dyn Array> {
598 use crate::datatypes::PhysicalType::*;
599 match array.dtype().to_physical_type() {
600 Null => clone_dyn!(array, NullArray),
601 Boolean => clone_dyn!(array, BooleanArray),
602 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
603 clone_dyn!(array, PrimitiveArray<$T>)
604 }),
605 Binary => clone_dyn!(array, BinaryArray<i32>),
606 LargeBinary => clone_dyn!(array, BinaryArray<i64>),
607 FixedSizeBinary => clone_dyn!(array, FixedSizeBinaryArray),
608 Utf8 => clone_dyn!(array, Utf8Array::<i32>),
609 LargeUtf8 => clone_dyn!(array, Utf8Array::<i64>),
610 List => clone_dyn!(array, ListArray::<i32>),
611 LargeList => clone_dyn!(array, ListArray::<i64>),
612 FixedSizeList => clone_dyn!(array, FixedSizeListArray),
613 Struct => clone_dyn!(array, StructArray),
614 Union => clone_dyn!(array, UnionArray),
615 Map => clone_dyn!(array, MapArray),
616 BinaryView => clone_dyn!(array, BinaryViewArray),
617 Utf8View => clone_dyn!(array, Utf8ViewArray),
618 Dictionary(key_type) => {
619 match_integer_type!(key_type, |$T| {
620 clone_dyn!(array, DictionaryArray::<$T>)
621 })
622 },
623 }
624}
625
626impl<'a> AsRef<(dyn Array + 'a)> for dyn Array {
629 fn as_ref(&self) -> &(dyn Array + 'a) {
630 self
631 }
632}
633
634mod binary;
635mod boolean;
636mod dictionary;
637mod fixed_size_binary;
638mod fixed_size_list;
639mod list;
640mod map;
641mod null;
642mod primitive;
643pub mod specification;
644mod static_array;
645mod static_array_collect;
646mod struct_;
647mod total_ord;
648mod union;
649mod utf8;
650
651mod equal;
652mod ffi;
653mod fmt;
654#[doc(hidden)]
655pub mod indexable;
656pub mod iterator;
657
658mod binview;
659pub mod growable;
660mod values;
661
662pub use binary::{BinaryArray, BinaryValueIter, MutableBinaryArray, MutableBinaryValuesArray};
663pub use binview::{
664 validate_utf8_view, BinaryViewArray, BinaryViewArrayGeneric, MutableBinaryViewArray,
665 MutablePlBinary, MutablePlString, Utf8ViewArray, View, ViewType,
666};
667pub use boolean::{BooleanArray, MutableBooleanArray};
668pub use dictionary::{DictionaryArray, DictionaryKey, MutableDictionaryArray};
669pub use equal::equal;
670pub use fixed_size_binary::{FixedSizeBinaryArray, MutableFixedSizeBinaryArray};
671pub use fixed_size_list::{FixedSizeListArray, MutableFixedSizeListArray};
672pub use fmt::{get_display, get_value_display};
673pub(crate) use iterator::ArrayAccessor;
674pub use iterator::ArrayValuesIter;
675pub use list::{ListArray, ListValuesIter, MutableListArray};
676pub use map::MapArray;
677pub use null::{MutableNullArray, NullArray};
678use polars_error::PolarsResult;
679pub use primitive::*;
680pub use static_array::{ParameterFreeDtypeStaticArray, StaticArray};
681pub use static_array_collect::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype};
682pub use struct_::StructArray;
683pub use union::UnionArray;
684pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter};
685pub use values::ValueSize;
686
687pub(crate) use self::ffi::{offset_buffers_children_dictionary, FromFfi, ToFfi};
688use crate::{match_integer_type, with_match_primitive_type_full};
689
690pub trait TryExtend<A> {
693 fn try_extend<I: IntoIterator<Item = A>>(&mut self, iter: I) -> PolarsResult<()>;
695}
696
697pub trait TryPush<A> {
699 fn try_push(&mut self, item: A) -> PolarsResult<()>;
701}
702
703pub trait PushUnchecked<A> {
705 unsafe fn push_unchecked(&mut self, item: A);
711}
712
713pub trait TryExtendFromSelf {
716 fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()>;
718}
719
720pub unsafe trait GenericBinaryArray<O: crate::offset::Offset>: Array {
727 fn values(&self) -> &[u8];
729 fn offsets(&self) -> &[O];
731}
732
733pub type ArrayRef = Box<dyn Array>;
734
735impl Splitable for Option<Bitmap> {
736 #[inline(always)]
737 fn check_bound(&self, offset: usize) -> bool {
738 self.as_ref().is_none_or(|v| offset <= v.len())
739 }
740
741 unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
742 self.as_ref().map_or((None, None), |bm| {
743 let (lhs, rhs) = unsafe { bm.split_at_unchecked(offset) };
744 (
745 (lhs.unset_bits() > 0).then_some(lhs),
746 (rhs.unset_bits() > 0).then_some(rhs),
747 )
748 })
749 }
750}