1use std::iter::Map;
3use std::sync::Arc;
4
5use arrow::array::*;
6use arrow::bitmap::Bitmap;
7use polars_compute::filter::filter_with_bitmap;
8
9use crate::prelude::*;
10
11pub mod ops;
12#[macro_use]
13pub mod arithmetic;
14pub mod builder;
15pub mod cast;
16pub mod collect;
17pub mod comparison;
18pub mod flags;
19pub mod float;
20pub mod iterator;
21#[cfg(feature = "ndarray")]
22pub(crate) mod ndarray;
23
24#[cfg(feature = "dtype-array")]
25pub(crate) mod array;
26mod binary;
27mod bitwise;
28#[cfg(feature = "object")]
29mod drop;
30mod from;
31mod from_iterator;
32pub mod from_iterator_par;
33pub(crate) mod list;
34pub(crate) mod logical;
35#[cfg(feature = "object")]
36pub mod object;
37#[cfg(feature = "random")]
38mod random;
39#[cfg(feature = "dtype-struct")]
40mod struct_;
41#[cfg(any(
42 feature = "temporal",
43 feature = "dtype-datetime",
44 feature = "dtype-date"
45))]
46pub mod temporal;
47mod to_vec;
48mod trusted_len;
49
50use std::mem;
51use std::slice::Iter;
52
53use arrow::legacy::kernels::concatenate::concatenate_owned_unchecked;
54use arrow::legacy::prelude::*;
55#[cfg(feature = "dtype-struct")]
56pub use struct_::StructChunked;
57
58use self::flags::{StatisticsFlags, StatisticsFlagsIM};
59use crate::series::IsSorted;
60use crate::utils::{first_non_null, last_non_null};
61
62#[cfg(not(feature = "dtype-categorical"))]
63pub struct RevMapping {}
64
65pub type ChunkLenIter<'a> = std::iter::Map<std::slice::Iter<'a, ArrayRef>, fn(&ArrayRef) -> usize>;
66
67pub struct ChunkedArray<T: PolarsDataType> {
142 pub(crate) field: Arc<Field>,
143 pub(crate) chunks: Vec<ArrayRef>,
144
145 pub(crate) flags: StatisticsFlagsIM,
146
147 length: usize,
148 null_count: usize,
149 _pd: std::marker::PhantomData<T>,
150}
151
152impl<T: PolarsDataType> ChunkedArray<T> {
153 fn should_rechunk(&self) -> bool {
154 self.chunks.len() > 1 && self.chunks.len() > self.len() / 3
155 }
156
157 fn optional_rechunk(self) -> Self {
158 if self.should_rechunk() {
160 self.rechunk()
161 } else {
162 self
163 }
164 }
165
166 pub(crate) fn as_any(&self) -> &dyn std::any::Any {
167 self
168 }
169
170 pub fn unpack_series_matching_type<'a>(
172 &self,
173 series: &'a Series,
174 ) -> PolarsResult<&'a ChunkedArray<T>> {
175 match self.dtype() {
176 #[cfg(feature = "dtype-decimal")]
177 DataType::Decimal(_, _) => {
178 let logical = series.decimal()?;
179
180 let ca = logical.physical();
181 Ok(ca.as_any().downcast_ref::<ChunkedArray<T>>().unwrap())
182 },
183 dt => {
184 polars_ensure!(
185 dt == series.dtype(),
186 SchemaMismatch: "cannot unpack series of type `{}` into `{}`",
187 series.dtype(),
188 dt,
189 );
190
191 Ok(unsafe { self.unpack_series_matching_physical_type(series) })
194 },
195 }
196 }
197
198 fn new_with_compute_len(field: Arc<Field>, chunks: Vec<ArrayRef>) -> Self {
203 unsafe {
204 let mut chunked_arr = Self::new_with_dims(field, chunks, 0, 0);
205 chunked_arr.compute_len();
206 chunked_arr
207 }
208 }
209
210 pub unsafe fn new_with_dims(
214 field: Arc<Field>,
215 chunks: Vec<ArrayRef>,
216 length: usize,
217 null_count: usize,
218 ) -> Self {
219 Self {
220 field,
221 chunks,
222 flags: StatisticsFlagsIM::empty(),
223
224 _pd: Default::default(),
225 length,
226 null_count,
227 }
228 }
229
230 pub(crate) fn is_sorted_ascending_flag(&self) -> bool {
231 self.get_flags().is_sorted_ascending()
232 }
233
234 pub(crate) fn is_sorted_descending_flag(&self) -> bool {
235 self.get_flags().is_sorted_descending()
236 }
237
238 pub(crate) fn is_sorted_any(&self) -> bool {
240 self.get_flags().is_sorted_any()
241 }
242
243 pub fn unset_fast_explode_list(&mut self) {
244 self.set_fast_explode_list(false)
245 }
246
247 pub fn set_fast_explode_list(&mut self, value: bool) {
248 let mut flags = self.flags.get_mut();
249 flags.set(StatisticsFlags::CAN_FAST_EXPLODE_LIST, value);
250 self.flags.set_mut(flags);
251 }
252
253 pub fn get_fast_explode_list(&self) -> bool {
254 self.get_flags().can_fast_explode_list()
255 }
256
257 pub fn get_flags(&self) -> StatisticsFlags {
258 self.flags.get()
259 }
260
261 pub(crate) fn set_flags(&mut self, flags: StatisticsFlags) {
263 self.flags = StatisticsFlagsIM::new(flags);
264 }
265
266 pub fn is_sorted_flag(&self) -> IsSorted {
267 self.get_flags().is_sorted()
268 }
269
270 pub fn retain_flags_from<U: PolarsDataType>(
271 &mut self,
272 from: &ChunkedArray<U>,
273 retain_flags: StatisticsFlags,
274 ) {
275 let flags = from.flags.get();
276 if !flags.is_empty() {
278 self.set_flags(flags & retain_flags)
279 }
280 }
281
282 pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
284 let mut flags = self.flags.get_mut();
285 flags.set_sorted(sorted);
286 self.flags.set_mut(flags);
287 }
288
289 pub fn with_sorted_flag(&self, sorted: IsSorted) -> Self {
291 let mut out = self.clone();
292 out.set_sorted_flag(sorted);
293 out
294 }
295
296 pub fn first_non_null(&self) -> Option<usize> {
298 if self.null_count() == self.len() {
299 None
300 }
301 else if self.null_count() == 0 {
303 Some(0)
304 } else if self.is_sorted_any() {
305 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
306 self.null_count()
308 } else {
309 0
311 };
312
313 debug_assert!(
314 unsafe { self.get_unchecked(out) }.is_some(),
316 "incorrect sorted flag"
317 );
318
319 Some(out)
320 } else {
321 first_non_null(self.iter_validities())
322 }
323 }
324
325 pub fn last_non_null(&self) -> Option<usize> {
327 if self.null_count() == self.len() {
328 None
329 }
330 else if self.null_count() == 0 {
332 Some(self.len() - 1)
333 } else if self.is_sorted_any() {
334 let out = if unsafe { self.downcast_get_unchecked(0).is_null_unchecked(0) } {
335 self.len() - 1
337 } else {
338 self.len() - self.null_count() - 1
340 };
341
342 debug_assert!(
343 unsafe { self.get_unchecked(out) }.is_some(),
345 "incorrect sorted flag"
346 );
347
348 Some(out)
349 } else {
350 last_non_null(self.iter_validities(), self.len())
351 }
352 }
353
354 pub fn drop_nulls(&self) -> Self {
355 if self.null_count() == 0 {
356 self.clone()
357 } else {
358 let chunks = self
359 .downcast_iter()
360 .map(|arr| {
361 if arr.null_count() == 0 {
362 arr.to_boxed()
363 } else {
364 filter_with_bitmap(arr, arr.validity().unwrap())
365 }
366 })
367 .collect();
368 unsafe {
369 Self::new_with_dims(
370 self.field.clone(),
371 chunks,
372 self.len() - self.null_count(),
373 0,
374 )
375 }
376 }
377 }
378
379 #[inline]
381 #[allow(clippy::type_complexity)]
382 pub fn iter_validities(&self) -> Map<Iter<'_, ArrayRef>, fn(&ArrayRef) -> Option<&Bitmap>> {
383 fn to_validity(arr: &ArrayRef) -> Option<&Bitmap> {
384 arr.validity()
385 }
386 self.chunks.iter().map(to_validity)
387 }
388
389 #[inline]
390 pub fn has_nulls(&self) -> bool {
392 self.null_count > 0
393 }
394
395 pub fn shrink_to_fit(&mut self) {
397 self.chunks = vec![concatenate_owned_unchecked(self.chunks.as_slice()).unwrap()];
398 }
399
400 pub fn clear(&self) -> Self {
401 let mut ca = unsafe {
403 self.copy_with_chunks(vec![new_empty_array(
404 self.chunks.first().unwrap().dtype().clone(),
405 )])
406 };
407
408 use StatisticsFlags as F;
409 ca.retain_flags_from(self, F::IS_SORTED_ANY | F::CAN_FAST_EXPLODE_LIST);
410 ca
411 }
412
413 pub(crate) unsafe fn unpack_series_matching_physical_type<'a>(
420 &self,
421 series: &'a Series,
422 ) -> &'a ChunkedArray<T> {
423 let series_trait = &**series;
424 if self.dtype() == series.dtype() {
425 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
426 } else {
427 use DataType::*;
428 match (self.dtype(), series.dtype()) {
429 (Int64, Datetime(_, _)) | (Int64, Duration(_)) | (Int32, Date) => {
430 &*(series_trait as *const dyn SeriesTrait as *const ChunkedArray<T>)
431 },
432 _ => panic!(
433 "cannot unpack series {:?} into matching type {:?}",
434 series,
435 self.dtype()
436 ),
437 }
438 }
439 }
440
441 pub fn chunk_lengths(&self) -> ChunkLenIter {
443 self.chunks.iter().map(|chunk| chunk.len())
444 }
445
446 #[inline]
448 pub fn chunks(&self) -> &Vec<ArrayRef> {
449 &self.chunks
450 }
451
452 #[inline]
458 pub unsafe fn chunks_mut(&mut self) -> &mut Vec<ArrayRef> {
459 &mut self.chunks
460 }
461
462 pub fn is_optimal_aligned(&self) -> bool {
464 self.chunks.len() == 1 && self.null_count() == 0
465 }
466
467 unsafe fn copy_with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
472 Self::new_with_compute_len(self.field.clone(), chunks)
473 }
474
475 pub fn dtype(&self) -> &DataType {
477 self.field.dtype()
478 }
479
480 pub(crate) unsafe fn set_dtype(&mut self, dtype: DataType) {
481 self.field = Arc::new(Field::new(self.name().clone(), dtype))
482 }
483
484 pub fn name(&self) -> &PlSmallStr {
486 self.field.name()
487 }
488
489 pub fn ref_field(&self) -> &Field {
491 &self.field
492 }
493
494 pub fn rename(&mut self, name: PlSmallStr) {
496 self.field = Arc::new(Field::new(name, self.field.dtype().clone()));
497 }
498
499 pub fn with_name(mut self, name: PlSmallStr) -> Self {
501 self.rename(name);
502 self
503 }
504}
505
506impl<T> ChunkedArray<T>
507where
508 T: PolarsDataType,
509{
510 #[inline]
516 pub fn get(&self, idx: usize) -> Option<T::Physical<'_>> {
517 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
518 assert!(
519 chunk_idx < self.chunks().len(),
520 "index: {} out of bounds for len: {}",
521 idx,
522 self.len()
523 );
524 unsafe {
525 let arr = self.downcast_get_unchecked(chunk_idx);
526 assert!(
527 arr_idx < arr.len(),
528 "index: {} out of bounds for len: {}",
529 idx,
530 self.len()
531 );
532 arr.get_unchecked(arr_idx)
533 }
534 }
535
536 #[inline]
542 pub unsafe fn get_unchecked(&self, idx: usize) -> Option<T::Physical<'_>> {
543 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
544
545 unsafe {
546 self.downcast_get_unchecked(chunk_idx)
548 .get_unchecked(arr_idx)
549 }
550 }
551
552 #[inline]
558 pub unsafe fn value_unchecked(&self, idx: usize) -> T::Physical<'_> {
559 let (chunk_idx, arr_idx) = self.index_to_chunked_index(idx);
560
561 unsafe {
562 self.downcast_get_unchecked(chunk_idx)
564 .value_unchecked(arr_idx)
565 }
566 }
567
568 #[inline]
569 pub fn first(&self) -> Option<T::Physical<'_>> {
570 unsafe {
571 let arr = self.downcast_get_unchecked(0);
572 arr.get_unchecked(0)
573 }
574 }
575
576 #[inline]
577 pub fn last(&self) -> Option<T::Physical<'_>> {
578 unsafe {
579 let arr = self.downcast_get_unchecked(self.chunks.len().checked_sub(1)?);
580 arr.get_unchecked(arr.len().checked_sub(1)?)
581 }
582 }
583}
584
585impl ListChunked {
586 #[inline]
587 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
588 unsafe {
589 Some(Series::from_chunks_and_dtype_unchecked(
590 self.name().clone(),
591 vec![self.get(idx)?],
592 &self.inner_dtype().to_physical(),
593 ))
594 }
595 }
596}
597
598#[cfg(feature = "dtype-array")]
599impl ArrayChunked {
600 #[inline]
601 pub fn get_as_series(&self, idx: usize) -> Option<Series> {
602 unsafe {
603 Some(Series::from_chunks_and_dtype_unchecked(
604 self.name().clone(),
605 vec![self.get(idx)?],
606 &self.inner_dtype().to_physical(),
607 ))
608 }
609 }
610}
611
612impl<T> ChunkedArray<T>
613where
614 T: PolarsDataType,
615{
616 pub(crate) fn match_chunks<I>(&self, chunk_id: I) -> Self
620 where
621 I: Iterator<Item = usize>,
622 {
623 debug_assert!(self.chunks.len() == 1);
624 let slice = |ca: &Self| {
626 let array = &ca.chunks[0];
627
628 let mut offset = 0;
629 let chunks = chunk_id
630 .map(|len| {
631 debug_assert!((offset + len) <= array.len());
633 let out = unsafe { array.sliced_unchecked(offset, len) };
634 offset += len;
635 out
636 })
637 .collect();
638
639 debug_assert_eq!(offset, array.len());
640
641 unsafe {
643 Self::from_chunks_and_dtype(self.name().clone(), chunks, self.dtype().clone())
644 }
645 };
646
647 if self.chunks.len() != 1 {
648 let out = self.rechunk();
649 slice(&out)
650 } else {
651 slice(self)
652 }
653 }
654}
655
656impl<T: PolarsDataType> AsRefDataType for ChunkedArray<T> {
657 fn as_ref_dtype(&self) -> &DataType {
658 self.dtype()
659 }
660}
661
662pub(crate) trait AsSinglePtr: AsRefDataType {
663 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
665 polars_bail!(opq = as_single_ptr, self.as_ref_dtype());
666 }
667}
668
669impl<T> AsSinglePtr for ChunkedArray<T>
670where
671 T: PolarsNumericType,
672{
673 fn as_single_ptr(&mut self) -> PolarsResult<usize> {
674 let mut ca = self.rechunk();
675 mem::swap(&mut ca, self);
676 let a = self.data_views().next().unwrap();
677 let ptr = a.as_ptr();
678 Ok(ptr as usize)
679 }
680}
681
682impl AsSinglePtr for BooleanChunked {}
683impl AsSinglePtr for ListChunked {}
684#[cfg(feature = "dtype-array")]
685impl AsSinglePtr for ArrayChunked {}
686impl AsSinglePtr for StringChunked {}
687impl AsSinglePtr for BinaryChunked {}
688#[cfg(feature = "object")]
689impl<T: PolarsObject> AsSinglePtr for ObjectChunked<T> {}
690
691pub enum ChunkedArrayLayout<'a, T: PolarsDataType> {
692 SingleNoNull(&'a T::Array),
693 Single(&'a T::Array),
694 MultiNoNull(&'a ChunkedArray<T>),
695 Multi(&'a ChunkedArray<T>),
696}
697
698impl<T> ChunkedArray<T>
699where
700 T: PolarsDataType,
701{
702 pub fn layout(&self) -> ChunkedArrayLayout<'_, T> {
703 if self.chunks.len() == 1 {
704 let arr = self.downcast_iter().next().unwrap();
705 return if arr.null_count() == 0 {
706 ChunkedArrayLayout::SingleNoNull(arr)
707 } else {
708 ChunkedArrayLayout::Single(arr)
709 };
710 }
711
712 if self.downcast_iter().all(|a| a.null_count() == 0) {
713 ChunkedArrayLayout::MultiNoNull(self)
714 } else {
715 ChunkedArrayLayout::Multi(self)
716 }
717 }
718}
719
720impl<T> ChunkedArray<T>
721where
722 T: PolarsNumericType,
723{
724 pub fn cont_slice(&self) -> PolarsResult<&[T::Native]> {
726 polars_ensure!(
727 self.chunks.len() == 1 && self.chunks[0].null_count() == 0,
728 ComputeError: "chunked array is not contiguous"
729 );
730 Ok(self.downcast_iter().next().map(|arr| arr.values()).unwrap())
731 }
732
733 pub(crate) fn cont_slice_mut(&mut self) -> Option<&mut [T::Native]> {
735 if self.chunks.len() == 1 && self.chunks[0].null_count() == 0 {
736 let arr = unsafe { self.downcast_iter_mut().next().unwrap() };
738 arr.get_mut_values()
739 } else {
740 None
741 }
742 }
743
744 pub fn data_views(&self) -> impl DoubleEndedIterator<Item = &[T::Native]> {
748 self.downcast_iter().map(|arr| arr.values().as_slice())
749 }
750
751 #[allow(clippy::wrong_self_convention)]
752 pub fn into_no_null_iter(
753 &self,
754 ) -> impl '_ + Send + Sync + ExactSizeIterator<Item = T::Native> + DoubleEndedIterator + TrustedLen
755 {
756 #[allow(clippy::map_clone)]
758 unsafe {
760 self.data_views()
761 .flatten()
762 .map(|v| *v)
763 .trust_my_length(self.len())
764 }
765 }
766}
767
768impl<T: PolarsDataType> Clone for ChunkedArray<T> {
769 fn clone(&self) -> Self {
770 ChunkedArray {
771 field: self.field.clone(),
772 chunks: self.chunks.clone(),
773 flags: self.flags.clone(),
774
775 _pd: Default::default(),
776 length: self.length,
777 null_count: self.null_count,
778 }
779 }
780}
781
782impl<T: PolarsDataType> AsRef<ChunkedArray<T>> for ChunkedArray<T> {
783 fn as_ref(&self) -> &ChunkedArray<T> {
784 self
785 }
786}
787
788impl ValueSize for ListChunked {
789 fn get_values_size(&self) -> usize {
790 self.chunks
791 .iter()
792 .fold(0usize, |acc, arr| acc + arr.get_values_size())
793 }
794}
795
796#[cfg(feature = "dtype-array")]
797impl ValueSize for ArrayChunked {
798 fn get_values_size(&self) -> usize {
799 self.chunks
800 .iter()
801 .fold(0usize, |acc, arr| acc + arr.get_values_size())
802 }
803}
804impl ValueSize for StringChunked {
805 fn get_values_size(&self) -> usize {
806 self.chunks
807 .iter()
808 .fold(0usize, |acc, arr| acc + arr.get_values_size())
809 }
810}
811
812impl ValueSize for BinaryOffsetChunked {
813 fn get_values_size(&self) -> usize {
814 self.chunks
815 .iter()
816 .fold(0usize, |acc, arr| acc + arr.get_values_size())
817 }
818}
819
820pub(crate) fn to_primitive<T: PolarsNumericType>(
821 values: Vec<T::Native>,
822 validity: Option<Bitmap>,
823) -> PrimitiveArray<T::Native> {
824 PrimitiveArray::new(
825 T::get_dtype().to_arrow(CompatLevel::newest()),
826 values.into(),
827 validity,
828 )
829}
830
831pub(crate) fn to_array<T: PolarsNumericType>(
832 values: Vec<T::Native>,
833 validity: Option<Bitmap>,
834) -> ArrayRef {
835 Box::new(to_primitive::<T>(values, validity))
836}
837
838impl<T: PolarsDataType> Default for ChunkedArray<T> {
839 fn default() -> Self {
840 let dtype = T::get_dtype();
841 let arrow_dtype = dtype.to_physical().to_arrow(CompatLevel::newest());
842 ChunkedArray {
843 field: Arc::new(Field::new(PlSmallStr::EMPTY, dtype)),
844 chunks: vec![new_empty_array(arrow_dtype)],
846 flags: StatisticsFlagsIM::empty(),
847
848 _pd: Default::default(),
849 length: 0,
850 null_count: 0,
851 }
852 }
853}
854
855#[cfg(test)]
856pub(crate) mod test {
857 use crate::prelude::*;
858
859 pub(crate) fn get_chunked_array() -> Int32Chunked {
860 ChunkedArray::new(PlSmallStr::from_static("a"), &[1, 2, 3])
861 }
862
863 #[test]
864 fn test_sort() {
865 let a = Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 9, 3, 2]);
866 let b = a
867 .sort(false)
868 .into_iter()
869 .map(|opt| opt.unwrap())
870 .collect::<Vec<_>>();
871 assert_eq!(b, [1, 2, 3, 9]);
872 let a = StringChunked::new(PlSmallStr::from_static("a"), &["b", "a", "c"]);
873 let a = a.sort(false);
874 let b = a.into_iter().collect::<Vec<_>>();
875 assert_eq!(b, [Some("a"), Some("b"), Some("c")]);
876 assert!(a.is_sorted_ascending_flag());
877 }
878
879 #[test]
880 fn arithmetic() {
881 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 6, 40]);
882 let b = &Int32Chunked::new(PlSmallStr::from_static("b"), &[-1, 2, 3, 4]);
883
884 println!("{:?}", a + b);
887 println!("{:?}", a - b);
888 println!("{:?}", a * b);
889 println!("{:?}", a / b);
890 }
891
892 #[test]
893 fn iter() {
894 let s1 = get_chunked_array();
895 assert_eq!(s1.into_iter().fold(0, |acc, val| { acc + val.unwrap() }), 6)
897 }
898
899 #[test]
900 fn limit() {
901 let a = get_chunked_array();
902 let b = a.limit(2);
903 println!("{:?}", b);
904 assert_eq!(b.len(), 2)
905 }
906
907 #[test]
908 fn filter() {
909 let a = get_chunked_array();
910 let b = a
911 .filter(&BooleanChunked::new(
912 PlSmallStr::from_static("filter"),
913 &[true, false, false],
914 ))
915 .unwrap();
916 assert_eq!(b.len(), 1);
917 assert_eq!(b.into_iter().next(), Some(Some(1)));
918 }
919
920 #[test]
921 fn aggregates() {
922 let a = &Int32Chunked::new(PlSmallStr::from_static("a"), &[1, 100, 10, 9]);
923 assert_eq!(a.max(), Some(100));
924 assert_eq!(a.min(), Some(1));
925 assert_eq!(a.sum(), Some(120))
926 }
927
928 #[test]
929 fn take() {
930 let a = get_chunked_array();
931 let new = a.take(&[0 as IdxSize, 1]).unwrap();
932 assert_eq!(new.len(), 2)
933 }
934
935 #[test]
936 fn cast() {
937 let a = get_chunked_array();
938 let b = a.cast(&DataType::Int64).unwrap();
939 assert_eq!(b.dtype(), &DataType::Int64)
940 }
941
942 fn assert_slice_equal<T>(ca: &ChunkedArray<T>, eq: &[T::Native])
943 where
944 T: PolarsNumericType,
945 {
946 assert_eq!(ca.iter().map(|opt| opt.unwrap()).collect::<Vec<_>>(), eq)
947 }
948
949 #[test]
950 fn slice() {
951 let mut first = UInt32Chunked::new(PlSmallStr::from_static("first"), &[0, 1, 2]);
952 let second = UInt32Chunked::new(PlSmallStr::from_static("second"), &[3, 4, 5]);
953 first.append(&second).unwrap();
954 assert_slice_equal(&first.slice(0, 3), &[0, 1, 2]);
955 assert_slice_equal(&first.slice(0, 4), &[0, 1, 2, 3]);
956 assert_slice_equal(&first.slice(1, 4), &[1, 2, 3, 4]);
957 assert_slice_equal(&first.slice(3, 2), &[3, 4]);
958 assert_slice_equal(&first.slice(3, 3), &[3, 4, 5]);
959 assert_slice_equal(&first.slice(-3, 3), &[3, 4, 5]);
960 assert_slice_equal(&first.slice(-6, 6), &[0, 1, 2, 3, 4, 5]);
961
962 assert_eq!(first.slice(-7, 2).len(), 1);
963 assert_eq!(first.slice(-3, 4).len(), 3);
964 assert_eq!(first.slice(3, 4).len(), 3);
965 assert_eq!(first.slice(10, 4).len(), 0);
966 }
967
968 #[test]
969 fn sorting() {
970 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[9, 2, 4]);
971 let sorted = s.sort(false);
972 assert_slice_equal(&sorted, &[2, 4, 9]);
973 let sorted = s.sort(true);
974 assert_slice_equal(&sorted, &[9, 4, 2]);
975
976 let s: StringChunked = ["b", "a", "z"].iter().collect();
977 let sorted = s.sort(false);
978 assert_eq!(
979 sorted.into_iter().collect::<Vec<_>>(),
980 &[Some("a"), Some("b"), Some("z")]
981 );
982 let sorted = s.sort(true);
983 assert_eq!(
984 sorted.into_iter().collect::<Vec<_>>(),
985 &[Some("z"), Some("b"), Some("a")]
986 );
987 let s: StringChunked = [Some("b"), None, Some("z")].iter().copied().collect();
988 let sorted = s.sort(false);
989 assert_eq!(
990 sorted.into_iter().collect::<Vec<_>>(),
991 &[None, Some("b"), Some("z")]
992 );
993 }
994
995 #[test]
996 fn reverse() {
997 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[1, 2, 3]);
998 assert_slice_equal(&s.reverse(), &[3, 2, 1]);
1000 let s = UInt32Chunked::new(PlSmallStr::EMPTY, &[Some(1), None, Some(3)]);
1002 assert_eq!(Vec::from(&s.reverse()), &[Some(3), None, Some(1)]);
1003 let s = BooleanChunked::new(PlSmallStr::EMPTY, &[true, false]);
1004 assert_eq!(Vec::from(&s.reverse()), &[Some(false), Some(true)]);
1005
1006 let s = StringChunked::new(PlSmallStr::EMPTY, &["a", "b", "c"]);
1007 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), Some("b"), Some("a")]);
1008
1009 let s = StringChunked::new(PlSmallStr::EMPTY, &[Some("a"), None, Some("c")]);
1010 assert_eq!(Vec::from(&s.reverse()), &[Some("c"), None, Some("a")]);
1011 }
1012
1013 #[test]
1014 #[cfg(feature = "dtype-categorical")]
1015 fn test_iter_categorical() {
1016 use crate::{disable_string_cache, SINGLE_LOCK};
1017 let _lock = SINGLE_LOCK.lock();
1018 disable_string_cache();
1019 let ca = StringChunked::new(
1020 PlSmallStr::EMPTY,
1021 &[Some("foo"), None, Some("bar"), Some("ham")],
1022 );
1023 let ca = ca
1024 .cast(&DataType::Categorical(None, Default::default()))
1025 .unwrap();
1026 let ca = ca.categorical().unwrap();
1027 let v: Vec<_> = ca.physical().into_iter().collect();
1028 assert_eq!(v, &[Some(0), None, Some(1), Some(2)]);
1029 }
1030
1031 #[test]
1032 #[ignore]
1033 fn test_shrink_to_fit() {
1034 let mut builder = StringChunkedBuilder::new(PlSmallStr::from_static("foo"), 2048);
1035 builder.append_value("foo");
1036 let mut arr = builder.finish();
1037 let before = arr
1038 .chunks()
1039 .iter()
1040 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1041 .sum::<usize>();
1042 arr.shrink_to_fit();
1043 let after = arr
1044 .chunks()
1045 .iter()
1046 .map(|arr| arrow::compute::aggregate::estimated_bytes_size(arr.as_ref()))
1047 .sum::<usize>();
1048 assert!(before > after);
1049 }
1050}