polars_core/frame/column/
mod.rs

1use std::borrow::Cow;
2
3use arrow::bitmap::BitmapBuilder;
4use arrow::trusted_len::TrustMyLength;
5use num_traits::{Num, NumCast};
6use polars_error::PolarsResult;
7use polars_utils::index::check_bounds;
8use polars_utils::pl_str::PlSmallStr;
9pub use scalar::ScalarColumn;
10
11use self::compare_inner::{TotalEqInner, TotalOrdInner};
12use self::gather::check_bounds_ca;
13use self::partitioned::PartitionedColumn;
14use self::series::SeriesColumn;
15use crate::chunked_array::cast::CastOptions;
16use crate::chunked_array::flags::StatisticsFlags;
17use crate::datatypes::ReshapeDimension;
18use crate::prelude::*;
19use crate::series::{BitRepr, IsSorted, SeriesPhysIter};
20use crate::utils::{slice_offsets, Container};
21use crate::{HEAD_DEFAULT_LENGTH, TAIL_DEFAULT_LENGTH};
22
23mod arithmetic;
24mod compare;
25mod partitioned;
26mod scalar;
27mod series;
28
29/// A column within a [`DataFrame`].
30///
31/// This is lazily initialized to a [`Series`] with methods like
32/// [`as_materialized_series`][Column::as_materialized_series] and
33/// [`take_materialized_series`][Column::take_materialized_series].
34///
35/// Currently, there are two ways to represent a [`Column`].
36/// 1. A [`Series`] of values
37/// 2. A [`ScalarColumn`] that repeats a single [`Scalar`]
38#[derive(Debug, Clone)]
39#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
40pub enum Column {
41    Series(SeriesColumn),
42    Partitioned(PartitionedColumn),
43    Scalar(ScalarColumn),
44}
45
46/// Convert `Self` into a [`Column`]
47pub trait IntoColumn: Sized {
48    fn into_column(self) -> Column;
49}
50
51impl Column {
52    #[inline]
53    #[track_caller]
54    pub fn new<T, Phantom>(name: PlSmallStr, values: T) -> Self
55    where
56        Phantom: ?Sized,
57        Series: NamedFrom<T, Phantom>,
58    {
59        Self::Series(SeriesColumn::new(NamedFrom::new(name, values)))
60    }
61
62    #[inline]
63    pub fn new_empty(name: PlSmallStr, dtype: &DataType) -> Self {
64        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), 0)
65    }
66
67    #[inline]
68    pub fn new_scalar(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
69        Self::Scalar(ScalarColumn::new(name, scalar, length))
70    }
71
72    #[inline]
73    pub fn new_partitioned(name: PlSmallStr, scalar: Scalar, length: usize) -> Self {
74        Self::Scalar(ScalarColumn::new(name, scalar, length))
75    }
76
77    // # Materialize
78    /// Get a reference to a [`Series`] for this [`Column`]
79    ///
80    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
81    #[inline]
82    pub fn as_materialized_series(&self) -> &Series {
83        match self {
84            Column::Series(s) => s,
85            Column::Partitioned(s) => s.as_materialized_series(),
86            Column::Scalar(s) => s.as_materialized_series(),
87        }
88    }
89    /// Turn [`Column`] into a [`Column::Series`].
90    ///
91    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
92    #[inline]
93    pub fn into_materialized_series(&mut self) -> &mut Series {
94        match self {
95            Column::Series(s) => s,
96            Column::Partitioned(s) => {
97                let series = std::mem::replace(
98                    s,
99                    PartitionedColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
100                )
101                .take_materialized_series();
102                *self = Column::Series(series.into());
103                let Column::Series(s) = self else {
104                    unreachable!();
105                };
106                s
107            },
108            Column::Scalar(s) => {
109                let series = std::mem::replace(
110                    s,
111                    ScalarColumn::new_empty(PlSmallStr::EMPTY, DataType::Null),
112                )
113                .take_materialized_series();
114                *self = Column::Series(series.into());
115                let Column::Series(s) = self else {
116                    unreachable!();
117                };
118                s
119            },
120        }
121    }
122    /// Take [`Series`] from a [`Column`]
123    ///
124    /// This may need to materialize the [`Series`] on the first invocation for a specific column.
125    #[inline]
126    pub fn take_materialized_series(self) -> Series {
127        match self {
128            Column::Series(s) => s.take(),
129            Column::Partitioned(s) => s.take_materialized_series(),
130            Column::Scalar(s) => s.take_materialized_series(),
131        }
132    }
133
134    #[inline]
135    pub fn dtype(&self) -> &DataType {
136        match self {
137            Column::Series(s) => s.dtype(),
138            Column::Partitioned(s) => s.dtype(),
139            Column::Scalar(s) => s.dtype(),
140        }
141    }
142
143    #[inline]
144    pub fn field(&self) -> Cow<Field> {
145        match self {
146            Column::Series(s) => s.field(),
147            Column::Partitioned(s) => s.field(),
148            Column::Scalar(s) => match s.lazy_as_materialized_series() {
149                None => Cow::Owned(Field::new(s.name().clone(), s.dtype().clone())),
150                Some(s) => s.field(),
151            },
152        }
153    }
154
155    #[inline]
156    pub fn name(&self) -> &PlSmallStr {
157        match self {
158            Column::Series(s) => s.name(),
159            Column::Partitioned(s) => s.name(),
160            Column::Scalar(s) => s.name(),
161        }
162    }
163
164    #[inline]
165    pub fn len(&self) -> usize {
166        match self {
167            Column::Series(s) => s.len(),
168            Column::Partitioned(s) => s.len(),
169            Column::Scalar(s) => s.len(),
170        }
171    }
172
173    #[inline]
174    pub fn with_name(mut self, name: PlSmallStr) -> Column {
175        self.rename(name);
176        self
177    }
178
179    #[inline]
180    pub fn rename(&mut self, name: PlSmallStr) {
181        match self {
182            Column::Series(s) => _ = s.rename(name),
183            Column::Partitioned(s) => _ = s.rename(name),
184            Column::Scalar(s) => _ = s.rename(name),
185        }
186    }
187
188    // # Downcasting
189    #[inline]
190    pub fn as_series(&self) -> Option<&Series> {
191        match self {
192            Column::Series(s) => Some(s),
193            _ => None,
194        }
195    }
196    #[inline]
197    pub fn as_partitioned_column(&self) -> Option<&PartitionedColumn> {
198        match self {
199            Column::Partitioned(s) => Some(s),
200            _ => None,
201        }
202    }
203    #[inline]
204    pub fn as_scalar_column(&self) -> Option<&ScalarColumn> {
205        match self {
206            Column::Scalar(s) => Some(s),
207            _ => None,
208        }
209    }
210
211    // # Try to Chunked Arrays
212    pub fn try_bool(&self) -> Option<&BooleanChunked> {
213        self.as_materialized_series().try_bool()
214    }
215    pub fn try_i8(&self) -> Option<&Int8Chunked> {
216        self.as_materialized_series().try_i8()
217    }
218    pub fn try_i16(&self) -> Option<&Int16Chunked> {
219        self.as_materialized_series().try_i16()
220    }
221    pub fn try_i32(&self) -> Option<&Int32Chunked> {
222        self.as_materialized_series().try_i32()
223    }
224    pub fn try_i64(&self) -> Option<&Int64Chunked> {
225        self.as_materialized_series().try_i64()
226    }
227    pub fn try_u8(&self) -> Option<&UInt8Chunked> {
228        self.as_materialized_series().try_u8()
229    }
230    pub fn try_u16(&self) -> Option<&UInt16Chunked> {
231        self.as_materialized_series().try_u16()
232    }
233    pub fn try_u32(&self) -> Option<&UInt32Chunked> {
234        self.as_materialized_series().try_u32()
235    }
236    pub fn try_u64(&self) -> Option<&UInt64Chunked> {
237        self.as_materialized_series().try_u64()
238    }
239    pub fn try_f32(&self) -> Option<&Float32Chunked> {
240        self.as_materialized_series().try_f32()
241    }
242    pub fn try_f64(&self) -> Option<&Float64Chunked> {
243        self.as_materialized_series().try_f64()
244    }
245    pub fn try_str(&self) -> Option<&StringChunked> {
246        self.as_materialized_series().try_str()
247    }
248    pub fn try_list(&self) -> Option<&ListChunked> {
249        self.as_materialized_series().try_list()
250    }
251    pub fn try_binary(&self) -> Option<&BinaryChunked> {
252        self.as_materialized_series().try_binary()
253    }
254    pub fn try_idx(&self) -> Option<&IdxCa> {
255        self.as_materialized_series().try_idx()
256    }
257    pub fn try_binary_offset(&self) -> Option<&BinaryOffsetChunked> {
258        self.as_materialized_series().try_binary_offset()
259    }
260    #[cfg(feature = "dtype-datetime")]
261    pub fn try_datetime(&self) -> Option<&DatetimeChunked> {
262        self.as_materialized_series().try_datetime()
263    }
264    #[cfg(feature = "dtype-struct")]
265    pub fn try_struct(&self) -> Option<&StructChunked> {
266        self.as_materialized_series().try_struct()
267    }
268    #[cfg(feature = "dtype-decimal")]
269    pub fn try_decimal(&self) -> Option<&DecimalChunked> {
270        self.as_materialized_series().try_decimal()
271    }
272    #[cfg(feature = "dtype-array")]
273    pub fn try_array(&self) -> Option<&ArrayChunked> {
274        self.as_materialized_series().try_array()
275    }
276    #[cfg(feature = "dtype-categorical")]
277    pub fn try_categorical(&self) -> Option<&CategoricalChunked> {
278        self.as_materialized_series().try_categorical()
279    }
280    #[cfg(feature = "dtype-date")]
281    pub fn try_date(&self) -> Option<&DateChunked> {
282        self.as_materialized_series().try_date()
283    }
284    #[cfg(feature = "dtype-duration")]
285    pub fn try_duration(&self) -> Option<&DurationChunked> {
286        self.as_materialized_series().try_duration()
287    }
288
289    // # To Chunked Arrays
290    pub fn bool(&self) -> PolarsResult<&BooleanChunked> {
291        self.as_materialized_series().bool()
292    }
293    pub fn i8(&self) -> PolarsResult<&Int8Chunked> {
294        self.as_materialized_series().i8()
295    }
296    pub fn i16(&self) -> PolarsResult<&Int16Chunked> {
297        self.as_materialized_series().i16()
298    }
299    pub fn i32(&self) -> PolarsResult<&Int32Chunked> {
300        self.as_materialized_series().i32()
301    }
302    pub fn i64(&self) -> PolarsResult<&Int64Chunked> {
303        self.as_materialized_series().i64()
304    }
305    #[cfg(feature = "dtype-i128")]
306    pub fn i128(&self) -> PolarsResult<&Int128Chunked> {
307        self.as_materialized_series().i128()
308    }
309    pub fn u8(&self) -> PolarsResult<&UInt8Chunked> {
310        self.as_materialized_series().u8()
311    }
312    pub fn u16(&self) -> PolarsResult<&UInt16Chunked> {
313        self.as_materialized_series().u16()
314    }
315    pub fn u32(&self) -> PolarsResult<&UInt32Chunked> {
316        self.as_materialized_series().u32()
317    }
318    pub fn u64(&self) -> PolarsResult<&UInt64Chunked> {
319        self.as_materialized_series().u64()
320    }
321    pub fn f32(&self) -> PolarsResult<&Float32Chunked> {
322        self.as_materialized_series().f32()
323    }
324    pub fn f64(&self) -> PolarsResult<&Float64Chunked> {
325        self.as_materialized_series().f64()
326    }
327    pub fn str(&self) -> PolarsResult<&StringChunked> {
328        self.as_materialized_series().str()
329    }
330    pub fn list(&self) -> PolarsResult<&ListChunked> {
331        self.as_materialized_series().list()
332    }
333    pub fn binary(&self) -> PolarsResult<&BinaryChunked> {
334        self.as_materialized_series().binary()
335    }
336    pub fn idx(&self) -> PolarsResult<&IdxCa> {
337        self.as_materialized_series().idx()
338    }
339    pub fn binary_offset(&self) -> PolarsResult<&BinaryOffsetChunked> {
340        self.as_materialized_series().binary_offset()
341    }
342    #[cfg(feature = "dtype-datetime")]
343    pub fn datetime(&self) -> PolarsResult<&DatetimeChunked> {
344        self.as_materialized_series().datetime()
345    }
346    #[cfg(feature = "dtype-struct")]
347    pub fn struct_(&self) -> PolarsResult<&StructChunked> {
348        self.as_materialized_series().struct_()
349    }
350    #[cfg(feature = "dtype-decimal")]
351    pub fn decimal(&self) -> PolarsResult<&DecimalChunked> {
352        self.as_materialized_series().decimal()
353    }
354    #[cfg(feature = "dtype-array")]
355    pub fn array(&self) -> PolarsResult<&ArrayChunked> {
356        self.as_materialized_series().array()
357    }
358    #[cfg(feature = "dtype-categorical")]
359    pub fn categorical(&self) -> PolarsResult<&CategoricalChunked> {
360        self.as_materialized_series().categorical()
361    }
362    #[cfg(feature = "dtype-date")]
363    pub fn date(&self) -> PolarsResult<&DateChunked> {
364        self.as_materialized_series().date()
365    }
366    #[cfg(feature = "dtype-duration")]
367    pub fn duration(&self) -> PolarsResult<&DurationChunked> {
368        self.as_materialized_series().duration()
369    }
370
371    // # Casting
372    pub fn cast_with_options(&self, dtype: &DataType, options: CastOptions) -> PolarsResult<Self> {
373        match self {
374            Column::Series(s) => s.cast_with_options(dtype, options).map(Column::from),
375            Column::Partitioned(s) => s.cast_with_options(dtype, options).map(Column::from),
376            Column::Scalar(s) => s.cast_with_options(dtype, options).map(Column::from),
377        }
378    }
379    pub fn strict_cast(&self, dtype: &DataType) -> PolarsResult<Self> {
380        match self {
381            Column::Series(s) => s.strict_cast(dtype).map(Column::from),
382            Column::Partitioned(s) => s.strict_cast(dtype).map(Column::from),
383            Column::Scalar(s) => s.strict_cast(dtype).map(Column::from),
384        }
385    }
386    pub fn cast(&self, dtype: &DataType) -> PolarsResult<Column> {
387        match self {
388            Column::Series(s) => s.cast(dtype).map(Column::from),
389            Column::Partitioned(s) => s.cast(dtype).map(Column::from),
390            Column::Scalar(s) => s.cast(dtype).map(Column::from),
391        }
392    }
393    /// # Safety
394    ///
395    /// This can lead to invalid memory access in downstream code.
396    pub unsafe fn cast_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
397        match self {
398            Column::Series(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
399            Column::Partitioned(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
400            Column::Scalar(s) => unsafe { s.cast_unchecked(dtype) }.map(Column::from),
401        }
402    }
403
404    pub fn clear(&self) -> Self {
405        match self {
406            Column::Series(s) => s.clear().into(),
407            Column::Partitioned(s) => s.clear().into(),
408            Column::Scalar(s) => s.resize(0).into(),
409        }
410    }
411
412    #[inline]
413    pub fn shrink_to_fit(&mut self) {
414        match self {
415            Column::Series(s) => s.shrink_to_fit(),
416            // @partition-opt
417            Column::Partitioned(_) => {},
418            Column::Scalar(_) => {},
419        }
420    }
421
422    #[inline]
423    pub fn new_from_index(&self, index: usize, length: usize) -> Self {
424        if index >= self.len() {
425            return Self::full_null(self.name().clone(), length, self.dtype());
426        }
427
428        match self {
429            Column::Series(s) => {
430                // SAFETY: Bounds check done before.
431                let av = unsafe { s.get_unchecked(index) };
432                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
433                Self::new_scalar(self.name().clone(), scalar, length)
434            },
435            Column::Partitioned(s) => {
436                // SAFETY: Bounds check done before.
437                let av = unsafe { s.get_unchecked(index) };
438                let scalar = Scalar::new(self.dtype().clone(), av.into_static());
439                Self::new_scalar(self.name().clone(), scalar, length)
440            },
441            Column::Scalar(s) => s.resize(length).into(),
442        }
443    }
444
445    #[inline]
446    pub fn has_nulls(&self) -> bool {
447        match self {
448            Self::Series(s) => s.has_nulls(),
449            // @partition-opt
450            Self::Partitioned(s) => s.as_materialized_series().has_nulls(),
451            Self::Scalar(s) => s.has_nulls(),
452        }
453    }
454
455    #[inline]
456    pub fn is_null(&self) -> BooleanChunked {
457        match self {
458            Self::Series(s) => s.is_null(),
459            // @partition-opt
460            Self::Partitioned(s) => s.as_materialized_series().is_null(),
461            Self::Scalar(s) => {
462                BooleanChunked::full(s.name().clone(), s.scalar().is_null(), s.len())
463            },
464        }
465    }
466    #[inline]
467    pub fn is_not_null(&self) -> BooleanChunked {
468        match self {
469            Self::Series(s) => s.is_not_null(),
470            // @partition-opt
471            Self::Partitioned(s) => s.as_materialized_series().is_not_null(),
472            Self::Scalar(s) => {
473                BooleanChunked::full(s.name().clone(), !s.scalar().is_null(), s.len())
474            },
475        }
476    }
477
478    pub fn to_physical_repr(&self) -> Column {
479        // @scalar-opt
480        self.as_materialized_series()
481            .to_physical_repr()
482            .into_owned()
483            .into()
484    }
485    /// # Safety
486    ///
487    /// This can lead to invalid memory access in downstream code.
488    pub unsafe fn from_physical_unchecked(&self, dtype: &DataType) -> PolarsResult<Column> {
489        // @scalar-opt
490        self.as_materialized_series()
491            .from_physical_unchecked(dtype)
492            .map(Column::from)
493    }
494
495    pub fn head(&self, length: Option<usize>) -> Column {
496        let len = length.unwrap_or(HEAD_DEFAULT_LENGTH);
497        let len = usize::min(len, self.len());
498        self.slice(0, len)
499    }
500    pub fn tail(&self, length: Option<usize>) -> Column {
501        let len = length.unwrap_or(TAIL_DEFAULT_LENGTH);
502        let len = usize::min(len, self.len());
503        debug_assert!(len <= i64::MAX as usize);
504        self.slice(-(len as i64), len)
505    }
506    pub fn slice(&self, offset: i64, length: usize) -> Column {
507        match self {
508            Column::Series(s) => s.slice(offset, length).into(),
509            // @partition-opt
510            Column::Partitioned(s) => s.as_materialized_series().slice(offset, length).into(),
511            Column::Scalar(s) => {
512                let (_, length) = slice_offsets(offset, length, s.len());
513                s.resize(length).into()
514            },
515        }
516    }
517
518    pub fn split_at(&self, offset: i64) -> (Column, Column) {
519        // @scalar-opt
520        let (l, r) = self.as_materialized_series().split_at(offset);
521        (l.into(), r.into())
522    }
523
524    #[inline]
525    pub fn null_count(&self) -> usize {
526        match self {
527            Self::Series(s) => s.null_count(),
528            Self::Partitioned(s) => s.null_count(),
529            Self::Scalar(s) if s.scalar().is_null() => s.len(),
530            Self::Scalar(_) => 0,
531        }
532    }
533
534    pub fn take(&self, indices: &IdxCa) -> PolarsResult<Column> {
535        check_bounds_ca(indices, self.len() as IdxSize)?;
536        Ok(unsafe { self.take_unchecked(indices) })
537    }
538    pub fn take_slice(&self, indices: &[IdxSize]) -> PolarsResult<Column> {
539        check_bounds(indices, self.len() as IdxSize)?;
540        Ok(unsafe { self.take_slice_unchecked(indices) })
541    }
542    /// # Safety
543    ///
544    /// No bounds on the indexes are performed.
545    pub unsafe fn take_unchecked(&self, indices: &IdxCa) -> Column {
546        debug_assert!(check_bounds_ca(indices, self.len() as IdxSize).is_ok());
547
548        match self {
549            Self::Series(s) => unsafe { s.take_unchecked(indices) }.into(),
550            Self::Partitioned(s) => {
551                let s = s.as_materialized_series();
552                unsafe { s.take_unchecked(indices) }.into()
553            },
554            Self::Scalar(s) => {
555                let idxs_length = indices.len();
556                let idxs_null_count = indices.null_count();
557
558                let scalar = ScalarColumn::from_single_value_series(
559                    s.as_single_value_series().take_unchecked(&IdxCa::new(
560                        indices.name().clone(),
561                        &[0][..s.len().min(1)],
562                    )),
563                    idxs_length,
564                );
565
566                // We need to make sure that null values in `idx` become null values in the result
567                if idxs_null_count == 0 || scalar.has_nulls() {
568                    scalar.into_column()
569                } else if idxs_null_count == idxs_length {
570                    scalar.into_nulls().into_column()
571                } else {
572                    let validity = indices.rechunk_validity();
573                    let series = scalar.take_materialized_series();
574                    let name = series.name().clone();
575                    let dtype = series.dtype().clone();
576                    let mut chunks = series.into_chunks();
577                    assert_eq!(chunks.len(), 1);
578                    chunks[0] = chunks[0].with_validity(validity);
579                    unsafe { Series::from_chunks_and_dtype_unchecked(name, chunks, &dtype) }
580                        .into_column()
581                }
582            },
583        }
584    }
585    /// # Safety
586    ///
587    /// No bounds on the indexes are performed.
588    pub unsafe fn take_slice_unchecked(&self, indices: &[IdxSize]) -> Column {
589        debug_assert!(check_bounds(indices, self.len() as IdxSize).is_ok());
590
591        match self {
592            Self::Series(s) => unsafe { s.take_slice_unchecked(indices) }.into(),
593            Self::Partitioned(s) => {
594                let s = s.as_materialized_series();
595                unsafe { s.take_slice_unchecked(indices) }.into()
596            },
597            Self::Scalar(s) => ScalarColumn::from_single_value_series(
598                s.as_single_value_series()
599                    .take_slice_unchecked(&[0][..s.len().min(1)]),
600                indices.len(),
601            )
602            .into(),
603        }
604    }
605
606    /// General implementation for aggregation where a non-missing scalar would map to itself.
607    #[inline(always)]
608    #[cfg(any(feature = "algorithm_group_by", feature = "bitwise"))]
609    fn agg_with_unit_scalar(
610        &self,
611        groups: &GroupsType,
612        series_agg: impl Fn(&Series, &GroupsType) -> Series,
613    ) -> Column {
614        match self {
615            Column::Series(s) => series_agg(s, groups).into_column(),
616            // @partition-opt
617            Column::Partitioned(s) => series_agg(s.as_materialized_series(), groups).into_column(),
618            Column::Scalar(s) => {
619                if s.is_empty() {
620                    return series_agg(s.as_materialized_series(), groups).into_column();
621                }
622
623                // We utilize the aggregation on Series to see:
624                // 1. the output datatype of the aggregation
625                // 2. whether this aggregation is even defined
626                let series_aggregation = series_agg(
627                    &s.as_single_value_series(),
628                    &GroupsType::Slice {
629                        // @NOTE: this group is always valid since s is non-empty.
630                        groups: vec![[0, 1]],
631                        rolling: false,
632                    },
633                );
634
635                // If the aggregation is not defined, just return all nulls.
636                if series_aggregation.has_nulls() {
637                    return Self::new_scalar(
638                        series_aggregation.name().clone(),
639                        Scalar::new(series_aggregation.dtype().clone(), AnyValue::Null),
640                        groups.len(),
641                    );
642                }
643
644                let mut scalar_col = s.resize(groups.len());
645                // The aggregation might change the type (e.g. mean changes int -> float), so we do
646                // a cast here to the output type.
647                if series_aggregation.dtype() != s.dtype() {
648                    scalar_col = scalar_col.cast(series_aggregation.dtype()).unwrap();
649                }
650
651                let Some(first_empty_idx) = groups.iter().position(|g| g.is_empty()) else {
652                    // Fast path: no empty groups. keep the scalar intact.
653                    return scalar_col.into_column();
654                };
655
656                // All empty groups produce a *missing* or `null` value.
657                let mut validity = BitmapBuilder::with_capacity(groups.len());
658                validity.extend_constant(first_empty_idx, true);
659                // SAFETY: We trust the length of this iterator.
660                let iter = unsafe {
661                    TrustMyLength::new(
662                        groups.iter().skip(first_empty_idx).map(|g| !g.is_empty()),
663                        groups.len() - first_empty_idx,
664                    )
665                };
666                validity.extend_trusted_len_iter(iter);
667
668                let mut s = scalar_col.take_materialized_series().rechunk();
669                // SAFETY: We perform a compute_len afterwards.
670                let chunks = unsafe { s.chunks_mut() };
671                let arr = &mut chunks[0];
672                *arr = arr.with_validity(validity.into_opt_validity());
673                s.compute_len();
674
675                s.into_column()
676            },
677        }
678    }
679
680    /// # Safety
681    ///
682    /// Does no bounds checks, groups must be correct.
683    #[cfg(feature = "algorithm_group_by")]
684    pub unsafe fn agg_min(&self, groups: &GroupsType) -> Self {
685        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_min(g) })
686    }
687
688    /// # Safety
689    ///
690    /// Does no bounds checks, groups must be correct.
691    #[cfg(feature = "algorithm_group_by")]
692    pub unsafe fn agg_max(&self, groups: &GroupsType) -> Self {
693        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_max(g) })
694    }
695
696    /// # Safety
697    ///
698    /// Does no bounds checks, groups must be correct.
699    #[cfg(feature = "algorithm_group_by")]
700    pub unsafe fn agg_mean(&self, groups: &GroupsType) -> Self {
701        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_mean(g) })
702    }
703
704    /// # Safety
705    ///
706    /// Does no bounds checks, groups must be correct.
707    #[cfg(feature = "algorithm_group_by")]
708    pub unsafe fn agg_sum(&self, groups: &GroupsType) -> Self {
709        // @scalar-opt
710        unsafe { self.as_materialized_series().agg_sum(groups) }.into()
711    }
712
713    /// # Safety
714    ///
715    /// Does no bounds checks, groups must be correct.
716    #[cfg(feature = "algorithm_group_by")]
717    pub unsafe fn agg_first(&self, groups: &GroupsType) -> Self {
718        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_first(g) })
719    }
720
721    /// # Safety
722    ///
723    /// Does no bounds checks, groups must be correct.
724    #[cfg(feature = "algorithm_group_by")]
725    pub unsafe fn agg_last(&self, groups: &GroupsType) -> Self {
726        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_last(g) })
727    }
728
729    /// # Safety
730    ///
731    /// Does no bounds checks, groups must be correct.
732    #[cfg(feature = "algorithm_group_by")]
733    pub unsafe fn agg_n_unique(&self, groups: &GroupsType) -> Self {
734        // @scalar-opt
735        unsafe { self.as_materialized_series().agg_n_unique(groups) }.into()
736    }
737
738    /// # Safety
739    ///
740    /// Does no bounds checks, groups must be correct.
741    #[cfg(feature = "algorithm_group_by")]
742    pub unsafe fn agg_quantile(
743        &self,
744        groups: &GroupsType,
745        quantile: f64,
746        method: QuantileMethod,
747    ) -> Self {
748        // @scalar-opt
749        unsafe {
750            self.as_materialized_series()
751                .agg_quantile(groups, quantile, method)
752        }
753        .into()
754    }
755
756    /// # Safety
757    ///
758    /// Does no bounds checks, groups must be correct.
759    #[cfg(feature = "algorithm_group_by")]
760    pub unsafe fn agg_median(&self, groups: &GroupsType) -> Self {
761        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_median(g) })
762    }
763
764    /// # Safety
765    ///
766    /// Does no bounds checks, groups must be correct.
767    #[cfg(feature = "algorithm_group_by")]
768    pub unsafe fn agg_var(&self, groups: &GroupsType, ddof: u8) -> Self {
769        // @scalar-opt
770        unsafe { self.as_materialized_series().agg_var(groups, ddof) }.into()
771    }
772
773    /// # Safety
774    ///
775    /// Does no bounds checks, groups must be correct.
776    #[cfg(feature = "algorithm_group_by")]
777    pub unsafe fn agg_std(&self, groups: &GroupsType, ddof: u8) -> Self {
778        // @scalar-opt
779        unsafe { self.as_materialized_series().agg_std(groups, ddof) }.into()
780    }
781
782    /// # Safety
783    ///
784    /// Does no bounds checks, groups must be correct.
785    #[cfg(feature = "algorithm_group_by")]
786    pub unsafe fn agg_list(&self, groups: &GroupsType) -> Self {
787        // @scalar-opt
788        unsafe { self.as_materialized_series().agg_list(groups) }.into()
789    }
790
791    /// # Safety
792    ///
793    /// Does no bounds checks, groups must be correct.
794    #[cfg(feature = "algorithm_group_by")]
795    pub fn agg_valid_count(&self, groups: &GroupsType) -> Self {
796        // @partition-opt
797        // @scalar-opt
798        unsafe { self.as_materialized_series().agg_valid_count(groups) }.into()
799    }
800
801    /// # Safety
802    ///
803    /// Does no bounds checks, groups must be correct.
804    #[cfg(feature = "bitwise")]
805    pub fn agg_and(&self, groups: &GroupsType) -> Self {
806        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_and(g) })
807    }
808    /// # Safety
809    ///
810    /// Does no bounds checks, groups must be correct.
811    #[cfg(feature = "bitwise")]
812    pub fn agg_or(&self, groups: &GroupsType) -> Self {
813        self.agg_with_unit_scalar(groups, |s, g| unsafe { s.agg_or(g) })
814    }
815    /// # Safety
816    ///
817    /// Does no bounds checks, groups must be correct.
818    #[cfg(feature = "bitwise")]
819    pub fn agg_xor(&self, groups: &GroupsType) -> Self {
820        // @partition-opt
821        // @scalar-opt
822        unsafe { self.as_materialized_series().agg_xor(groups) }.into()
823    }
824
825    pub fn full_null(name: PlSmallStr, size: usize, dtype: &DataType) -> Self {
826        Self::new_scalar(name, Scalar::new(dtype.clone(), AnyValue::Null), size)
827    }
828
829    pub fn is_empty(&self) -> bool {
830        self.len() == 0
831    }
832
833    pub fn reverse(&self) -> Column {
834        match self {
835            Column::Series(s) => s.reverse().into(),
836            Column::Partitioned(s) => s.reverse().into(),
837            Column::Scalar(_) => self.clone(),
838        }
839    }
840
841    pub fn equals(&self, other: &Column) -> bool {
842        // @scalar-opt
843        self.as_materialized_series()
844            .equals(other.as_materialized_series())
845    }
846
847    pub fn equals_missing(&self, other: &Column) -> bool {
848        // @scalar-opt
849        self.as_materialized_series()
850            .equals_missing(other.as_materialized_series())
851    }
852
853    pub fn set_sorted_flag(&mut self, sorted: IsSorted) {
854        // @scalar-opt
855        match self {
856            Column::Series(s) => s.set_sorted_flag(sorted),
857            Column::Partitioned(s) => s.set_sorted_flag(sorted),
858            Column::Scalar(_) => {},
859        }
860    }
861
862    pub fn get_flags(&self) -> StatisticsFlags {
863        match self {
864            Column::Series(s) => s.get_flags(),
865            // @partition-opt
866            Column::Partitioned(_) => StatisticsFlags::empty(),
867            Column::Scalar(_) => {
868                StatisticsFlags::IS_SORTED_ASC | StatisticsFlags::CAN_FAST_EXPLODE_LIST
869            },
870        }
871    }
872
873    /// Returns whether the flags were set
874    pub fn set_flags(&mut self, flags: StatisticsFlags) -> bool {
875        match self {
876            Column::Series(s) => {
877                s.set_flags(flags);
878                true
879            },
880            // @partition-opt
881            Column::Partitioned(_) => false,
882            Column::Scalar(_) => false,
883        }
884    }
885
886    pub fn vec_hash(&self, build_hasher: PlRandomState, buf: &mut Vec<u64>) -> PolarsResult<()> {
887        // @scalar-opt?
888        self.as_materialized_series().vec_hash(build_hasher, buf)
889    }
890
891    pub fn vec_hash_combine(
892        &self,
893        build_hasher: PlRandomState,
894        hashes: &mut [u64],
895    ) -> PolarsResult<()> {
896        // @scalar-opt?
897        self.as_materialized_series()
898            .vec_hash_combine(build_hasher, hashes)
899    }
900
901    pub fn append(&mut self, other: &Column) -> PolarsResult<&mut Self> {
902        // @scalar-opt
903        self.into_materialized_series()
904            .append(other.as_materialized_series())?;
905        Ok(self)
906    }
907    pub fn append_owned(&mut self, other: Column) -> PolarsResult<&mut Self> {
908        self.into_materialized_series()
909            .append_owned(other.take_materialized_series())?;
910        Ok(self)
911    }
912
913    pub fn arg_sort(&self, options: SortOptions) -> IdxCa {
914        if self.is_empty() {
915            return IdxCa::from_vec(self.name().clone(), Vec::new());
916        }
917
918        if self.null_count() == self.len() {
919            // We might need to maintain order so just respect the descending parameter.
920            let values = if options.descending {
921                (0..self.len() as IdxSize).rev().collect()
922            } else {
923                (0..self.len() as IdxSize).collect()
924            };
925
926            return IdxCa::from_vec(self.name().clone(), values);
927        }
928
929        let is_sorted = Some(self.is_sorted_flag());
930        let Some(is_sorted) = is_sorted.filter(|v| !matches!(v, IsSorted::Not)) else {
931            return self.as_materialized_series().arg_sort(options);
932        };
933
934        // Fast path: the data is sorted.
935        let is_sorted_dsc = matches!(is_sorted, IsSorted::Descending);
936        let invert = options.descending != is_sorted_dsc;
937
938        let mut values = Vec::with_capacity(self.len());
939
940        #[inline(never)]
941        fn extend(
942            start: IdxSize,
943            end: IdxSize,
944            slf: &Column,
945            values: &mut Vec<IdxSize>,
946            is_only_nulls: bool,
947            invert: bool,
948            maintain_order: bool,
949        ) {
950            debug_assert!(start <= end);
951            debug_assert!(start as usize <= slf.len());
952            debug_assert!(end as usize <= slf.len());
953
954            if !invert || is_only_nulls {
955                values.extend(start..end);
956                return;
957            }
958
959            // If we don't have to maintain order but we have to invert. Just flip it around.
960            if !maintain_order {
961                values.extend((start..end).rev());
962                return;
963            }
964
965            // If we want to maintain order but we also needs to invert, we need to invert
966            // per group of items.
967            //
968            // @NOTE: Since the column is sorted, arg_unique can also take a fast path and
969            // just do a single traversal.
970            let arg_unique = slf
971                .slice(start as i64, (end - start) as usize)
972                .arg_unique()
973                .unwrap();
974
975            assert!(!arg_unique.has_nulls());
976
977            let num_unique = arg_unique.len();
978
979            // Fast path: all items are unique.
980            if num_unique == (end - start) as usize {
981                values.extend((start..end).rev());
982                return;
983            }
984
985            if num_unique == 1 {
986                values.extend(start..end);
987                return;
988            }
989
990            let mut prev_idx = end - start;
991            for chunk in arg_unique.downcast_iter() {
992                for &idx in chunk.values().as_slice().iter().rev() {
993                    values.extend(start + idx..start + prev_idx);
994                    prev_idx = idx;
995                }
996            }
997        }
998        macro_rules! extend {
999            ($start:expr, $end:expr) => {
1000                extend!($start, $end, is_only_nulls = false);
1001            };
1002            ($start:expr, $end:expr, is_only_nulls = $is_only_nulls:expr) => {
1003                extend(
1004                    $start,
1005                    $end,
1006                    self,
1007                    &mut values,
1008                    $is_only_nulls,
1009                    invert,
1010                    options.maintain_order,
1011                );
1012            };
1013        }
1014
1015        let length = self.len() as IdxSize;
1016        let null_count = self.null_count() as IdxSize;
1017
1018        if null_count == 0 {
1019            extend!(0, length);
1020        } else {
1021            let has_nulls_last = self.get(self.len() - 1).unwrap().is_null();
1022            match (options.nulls_last, has_nulls_last) {
1023                (true, true) => {
1024                    // Current: Nulls last, Wanted: Nulls last
1025                    extend!(0, length - null_count);
1026                    extend!(length - null_count, length, is_only_nulls = true);
1027                },
1028                (true, false) => {
1029                    // Current: Nulls first, Wanted: Nulls last
1030                    extend!(null_count, length);
1031                    extend!(0, null_count, is_only_nulls = true);
1032                },
1033                (false, true) => {
1034                    // Current: Nulls last, Wanted: Nulls first
1035                    extend!(length - null_count, length, is_only_nulls = true);
1036                    extend!(0, length - null_count);
1037                },
1038                (false, false) => {
1039                    // Current: Nulls first, Wanted: Nulls first
1040                    extend!(0, null_count, is_only_nulls = true);
1041                    extend!(null_count, length);
1042                },
1043            }
1044        }
1045
1046        // @NOTE: This can theoretically be pushed into the previous operation but it is really
1047        // worth it... probably not...
1048        if let Some(limit) = options.limit {
1049            let limit = limit.min(length);
1050            values.truncate(limit as usize);
1051        }
1052
1053        IdxCa::from_vec(self.name().clone(), values)
1054    }
1055
1056    pub fn arg_sort_multiple(
1057        &self,
1058        by: &[Column],
1059        options: &SortMultipleOptions,
1060    ) -> PolarsResult<IdxCa> {
1061        // @scalar-opt
1062        self.as_materialized_series().arg_sort_multiple(by, options)
1063    }
1064
1065    pub fn arg_unique(&self) -> PolarsResult<IdxCa> {
1066        match self {
1067            Column::Scalar(s) => Ok(IdxCa::new_vec(s.name().clone(), vec![0])),
1068            _ => self.as_materialized_series().arg_unique(),
1069        }
1070    }
1071
1072    pub fn bit_repr(&self) -> Option<BitRepr> {
1073        // @scalar-opt
1074        self.as_materialized_series().bit_repr()
1075    }
1076
1077    pub fn into_frame(self) -> DataFrame {
1078        // SAFETY: A single-column dataframe cannot have length mismatches or duplicate names
1079        unsafe { DataFrame::new_no_checks(self.len(), vec![self]) }
1080    }
1081
1082    pub fn extend(&mut self, other: &Column) -> PolarsResult<&mut Self> {
1083        // @scalar-opt
1084        self.into_materialized_series()
1085            .extend(other.as_materialized_series())?;
1086        Ok(self)
1087    }
1088
1089    pub fn rechunk(&self) -> Column {
1090        match self {
1091            Column::Series(s) => s.rechunk().into(),
1092            Column::Partitioned(s) => {
1093                if let Some(s) = s.lazy_as_materialized_series() {
1094                    // This should always hold for partitioned.
1095                    debug_assert_eq!(s.n_chunks(), 1)
1096                }
1097                self.clone()
1098            },
1099            Column::Scalar(s) => {
1100                if s.lazy_as_materialized_series()
1101                    .filter(|x| x.n_chunks() > 1)
1102                    .is_some()
1103                {
1104                    Column::Scalar(ScalarColumn::new(
1105                        s.name().clone(),
1106                        s.scalar().clone(),
1107                        s.len(),
1108                    ))
1109                } else {
1110                    self.clone()
1111                }
1112            },
1113        }
1114    }
1115
1116    pub fn explode(&self) -> PolarsResult<Column> {
1117        self.as_materialized_series().explode().map(Column::from)
1118    }
1119    pub fn implode(&self) -> PolarsResult<ListChunked> {
1120        self.as_materialized_series().implode()
1121    }
1122
1123    pub fn fill_null(&self, strategy: FillNullStrategy) -> PolarsResult<Self> {
1124        // @scalar-opt
1125        self.as_materialized_series()
1126            .fill_null(strategy)
1127            .map(Column::from)
1128    }
1129
1130    pub fn divide(&self, rhs: &Column) -> PolarsResult<Self> {
1131        // @scalar-opt
1132        self.as_materialized_series()
1133            .divide(rhs.as_materialized_series())
1134            .map(Column::from)
1135    }
1136
1137    pub fn shift(&self, periods: i64) -> Column {
1138        // @scalar-opt
1139        self.as_materialized_series().shift(periods).into()
1140    }
1141
1142    #[cfg(feature = "zip_with")]
1143    pub fn zip_with(&self, mask: &BooleanChunked, other: &Self) -> PolarsResult<Self> {
1144        // @scalar-opt
1145        self.as_materialized_series()
1146            .zip_with(mask, other.as_materialized_series())
1147            .map(Self::from)
1148    }
1149
1150    #[cfg(feature = "zip_with")]
1151    pub fn zip_with_same_type(
1152        &self,
1153        mask: &ChunkedArray<BooleanType>,
1154        other: &Column,
1155    ) -> PolarsResult<Column> {
1156        // @scalar-opt
1157        self.as_materialized_series()
1158            .zip_with_same_type(mask, other.as_materialized_series())
1159            .map(Column::from)
1160    }
1161
1162    pub fn drop_nulls(&self) -> Column {
1163        match self {
1164            Column::Series(s) => s.drop_nulls().into_column(),
1165            // @partition-opt
1166            Column::Partitioned(s) => s.as_materialized_series().drop_nulls().into_column(),
1167            Column::Scalar(s) => s.drop_nulls().into_column(),
1168        }
1169    }
1170
1171    /// Packs every element into a list.
1172    pub fn as_list(&self) -> ListChunked {
1173        // @scalar-opt
1174        // @partition-opt
1175        self.as_materialized_series().as_list()
1176    }
1177
1178    pub fn is_sorted_flag(&self) -> IsSorted {
1179        match self {
1180            Column::Series(s) => s.is_sorted_flag(),
1181            Column::Partitioned(s) => s.partitions().is_sorted_flag(),
1182            Column::Scalar(_) => IsSorted::Ascending,
1183        }
1184    }
1185
1186    pub fn unique(&self) -> PolarsResult<Column> {
1187        match self {
1188            Column::Series(s) => s.unique().map(Column::from),
1189            // @partition-opt
1190            Column::Partitioned(s) => s.as_materialized_series().unique().map(Column::from),
1191            Column::Scalar(s) => {
1192                _ = s.as_single_value_series().unique()?;
1193                if s.is_empty() {
1194                    return Ok(s.clone().into_column());
1195                }
1196
1197                Ok(s.resize(1).into_column())
1198            },
1199        }
1200    }
1201    pub fn unique_stable(&self) -> PolarsResult<Column> {
1202        match self {
1203            Column::Series(s) => s.unique_stable().map(Column::from),
1204            // @partition-opt
1205            Column::Partitioned(s) => s.as_materialized_series().unique_stable().map(Column::from),
1206            Column::Scalar(s) => {
1207                _ = s.as_single_value_series().unique_stable()?;
1208                if s.is_empty() {
1209                    return Ok(s.clone().into_column());
1210                }
1211
1212                Ok(s.resize(1).into_column())
1213            },
1214        }
1215    }
1216
1217    pub fn reshape_list(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1218        // @scalar-opt
1219        self.as_materialized_series()
1220            .reshape_list(dimensions)
1221            .map(Self::from)
1222    }
1223
1224    #[cfg(feature = "dtype-array")]
1225    pub fn reshape_array(&self, dimensions: &[ReshapeDimension]) -> PolarsResult<Self> {
1226        // @scalar-opt
1227        self.as_materialized_series()
1228            .reshape_array(dimensions)
1229            .map(Self::from)
1230    }
1231
1232    pub fn sort(&self, sort_options: SortOptions) -> PolarsResult<Self> {
1233        // @scalar-opt
1234        self.as_materialized_series()
1235            .sort(sort_options)
1236            .map(Self::from)
1237    }
1238
1239    pub fn filter(&self, filter: &BooleanChunked) -> PolarsResult<Self> {
1240        match self {
1241            Column::Series(s) => s.filter(filter).map(Column::from),
1242            Column::Partitioned(s) => s.as_materialized_series().filter(filter).map(Column::from),
1243            Column::Scalar(s) => {
1244                if s.is_empty() {
1245                    return Ok(s.clone().into_column());
1246                }
1247
1248                // Broadcasting
1249                if filter.len() == 1 {
1250                    return match filter.get(0) {
1251                        Some(true) => Ok(s.clone().into_column()),
1252                        _ => Ok(s.resize(0).into_column()),
1253                    };
1254                }
1255
1256                Ok(s.resize(filter.sum().unwrap() as usize).into_column())
1257            },
1258        }
1259    }
1260
1261    #[cfg(feature = "random")]
1262    pub fn shuffle(&self, seed: Option<u64>) -> Self {
1263        // @scalar-opt
1264        self.as_materialized_series().shuffle(seed).into()
1265    }
1266
1267    #[cfg(feature = "random")]
1268    pub fn sample_frac(
1269        &self,
1270        frac: f64,
1271        with_replacement: bool,
1272        shuffle: bool,
1273        seed: Option<u64>,
1274    ) -> PolarsResult<Self> {
1275        self.as_materialized_series()
1276            .sample_frac(frac, with_replacement, shuffle, seed)
1277            .map(Self::from)
1278    }
1279
1280    #[cfg(feature = "random")]
1281    pub fn sample_n(
1282        &self,
1283        n: usize,
1284        with_replacement: bool,
1285        shuffle: bool,
1286        seed: Option<u64>,
1287    ) -> PolarsResult<Self> {
1288        self.as_materialized_series()
1289            .sample_n(n, with_replacement, shuffle, seed)
1290            .map(Self::from)
1291    }
1292
1293    pub fn gather_every(&self, n: usize, offset: usize) -> Column {
1294        if self.len().saturating_sub(offset) == 0 {
1295            return self.clear();
1296        }
1297
1298        match self {
1299            Column::Series(s) => s.gather_every(n, offset).into(),
1300            Column::Partitioned(s) => s.as_materialized_series().gather_every(n, offset).into(),
1301            Column::Scalar(s) => {
1302                let total = s.len() - offset;
1303                s.resize(1 + (total - 1) / n).into()
1304            },
1305        }
1306    }
1307
1308    pub fn extend_constant(&self, value: AnyValue, n: usize) -> PolarsResult<Self> {
1309        if self.is_empty() {
1310            return Ok(Self::new_scalar(
1311                self.name().clone(),
1312                Scalar::new(self.dtype().clone(), value.into_static()),
1313                n,
1314            ));
1315        }
1316
1317        match self {
1318            Column::Series(s) => s.extend_constant(value, n).map(Column::from),
1319            Column::Partitioned(s) => s.extend_constant(value, n).map(Column::from),
1320            Column::Scalar(s) => {
1321                if s.scalar().as_any_value() == value {
1322                    Ok(s.resize(s.len() + n).into())
1323                } else {
1324                    s.as_materialized_series()
1325                        .extend_constant(value, n)
1326                        .map(Column::from)
1327                }
1328            },
1329        }
1330    }
1331
1332    pub fn is_finite(&self) -> PolarsResult<BooleanChunked> {
1333        self.try_map_unary_elementwise_to_bool(|s| s.is_finite())
1334    }
1335    pub fn is_infinite(&self) -> PolarsResult<BooleanChunked> {
1336        self.try_map_unary_elementwise_to_bool(|s| s.is_infinite())
1337    }
1338    pub fn is_nan(&self) -> PolarsResult<BooleanChunked> {
1339        self.try_map_unary_elementwise_to_bool(|s| s.is_nan())
1340    }
1341    pub fn is_not_nan(&self) -> PolarsResult<BooleanChunked> {
1342        self.try_map_unary_elementwise_to_bool(|s| s.is_not_nan())
1343    }
1344
1345    pub fn wrapping_trunc_div_scalar<T>(&self, rhs: T) -> Self
1346    where
1347        T: Num + NumCast,
1348    {
1349        // @scalar-opt
1350        self.as_materialized_series()
1351            .wrapping_trunc_div_scalar(rhs)
1352            .into()
1353    }
1354
1355    pub fn product(&self) -> PolarsResult<Scalar> {
1356        // @scalar-opt
1357        self.as_materialized_series().product()
1358    }
1359
1360    pub fn phys_iter(&self) -> SeriesPhysIter<'_> {
1361        // @scalar-opt
1362        self.as_materialized_series().phys_iter()
1363    }
1364
1365    #[inline]
1366    pub fn get(&self, index: usize) -> PolarsResult<AnyValue> {
1367        polars_ensure!(index < self.len(), oob = index, self.len());
1368
1369        // SAFETY: Bounds check done just before.
1370        Ok(unsafe { self.get_unchecked(index) })
1371    }
1372    /// # Safety
1373    ///
1374    /// Does not perform bounds check on `index`
1375    #[inline(always)]
1376    pub unsafe fn get_unchecked(&self, index: usize) -> AnyValue {
1377        debug_assert!(index < self.len());
1378
1379        match self {
1380            Column::Series(s) => unsafe { s.get_unchecked(index) },
1381            Column::Partitioned(s) => unsafe { s.get_unchecked(index) },
1382            Column::Scalar(s) => s.scalar().as_any_value(),
1383        }
1384    }
1385
1386    #[cfg(feature = "object")]
1387    pub fn get_object(
1388        &self,
1389        index: usize,
1390    ) -> Option<&dyn crate::chunked_array::object::PolarsObjectSafe> {
1391        self.as_materialized_series().get_object(index)
1392    }
1393
1394    pub fn bitand(&self, rhs: &Self) -> PolarsResult<Self> {
1395        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l & r)
1396    }
1397    pub fn bitor(&self, rhs: &Self) -> PolarsResult<Self> {
1398        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l | r)
1399    }
1400    pub fn bitxor(&self, rhs: &Self) -> PolarsResult<Self> {
1401        self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l ^ r)
1402    }
1403
1404    pub fn try_add_owned(self, other: Self) -> PolarsResult<Self> {
1405        match (self, other) {
1406            (Column::Series(lhs), Column::Series(rhs)) => {
1407                lhs.take().try_add_owned(rhs.take()).map(Column::from)
1408            },
1409            (lhs, rhs) => lhs + rhs,
1410        }
1411    }
1412    pub fn try_sub_owned(self, other: Self) -> PolarsResult<Self> {
1413        match (self, other) {
1414            (Column::Series(lhs), Column::Series(rhs)) => {
1415                lhs.take().try_sub_owned(rhs.take()).map(Column::from)
1416            },
1417            (lhs, rhs) => lhs - rhs,
1418        }
1419    }
1420    pub fn try_mul_owned(self, other: Self) -> PolarsResult<Self> {
1421        match (self, other) {
1422            (Column::Series(lhs), Column::Series(rhs)) => {
1423                lhs.take().try_mul_owned(rhs.take()).map(Column::from)
1424            },
1425            (lhs, rhs) => lhs * rhs,
1426        }
1427    }
1428
1429    pub(crate) fn str_value(&self, index: usize) -> PolarsResult<Cow<str>> {
1430        Ok(self.get(index)?.str_value())
1431    }
1432
1433    pub fn min_reduce(&self) -> PolarsResult<Scalar> {
1434        match self {
1435            Column::Series(s) => s.min_reduce(),
1436            Column::Partitioned(s) => s.min_reduce(),
1437            Column::Scalar(s) => {
1438                // We don't really want to deal with handling the full semantics here so we just
1439                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1440                s.as_single_value_series().min_reduce()
1441            },
1442        }
1443    }
1444    pub fn max_reduce(&self) -> PolarsResult<Scalar> {
1445        match self {
1446            Column::Series(s) => s.max_reduce(),
1447            Column::Partitioned(s) => s.max_reduce(),
1448            Column::Scalar(s) => {
1449                // We don't really want to deal with handling the full semantics here so we just
1450                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1451                s.as_single_value_series().max_reduce()
1452            },
1453        }
1454    }
1455    pub fn median_reduce(&self) -> PolarsResult<Scalar> {
1456        match self {
1457            Column::Series(s) => s.median_reduce(),
1458            Column::Partitioned(s) => s.as_materialized_series().median_reduce(),
1459            Column::Scalar(s) => {
1460                // We don't really want to deal with handling the full semantics here so we just
1461                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1462                s.as_single_value_series().median_reduce()
1463            },
1464        }
1465    }
1466    pub fn mean_reduce(&self) -> Scalar {
1467        match self {
1468            Column::Series(s) => s.mean_reduce(),
1469            Column::Partitioned(s) => s.as_materialized_series().mean_reduce(),
1470            Column::Scalar(s) => {
1471                // We don't really want to deal with handling the full semantics here so we just
1472                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1473                s.as_single_value_series().mean_reduce()
1474            },
1475        }
1476    }
1477    pub fn std_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1478        match self {
1479            Column::Series(s) => s.std_reduce(ddof),
1480            Column::Partitioned(s) => s.as_materialized_series().std_reduce(ddof),
1481            Column::Scalar(s) => {
1482                // We don't really want to deal with handling the full semantics here so we just
1483                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1484                s.as_single_value_series().std_reduce(ddof)
1485            },
1486        }
1487    }
1488    pub fn var_reduce(&self, ddof: u8) -> PolarsResult<Scalar> {
1489        match self {
1490            Column::Series(s) => s.var_reduce(ddof),
1491            Column::Partitioned(s) => s.as_materialized_series().var_reduce(ddof),
1492            Column::Scalar(s) => {
1493                // We don't really want to deal with handling the full semantics here so we just
1494                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1495                s.as_single_value_series().var_reduce(ddof)
1496            },
1497        }
1498    }
1499    pub fn sum_reduce(&self) -> PolarsResult<Scalar> {
1500        // @partition-opt
1501        // @scalar-opt
1502        self.as_materialized_series().sum_reduce()
1503    }
1504    pub fn and_reduce(&self) -> PolarsResult<Scalar> {
1505        match self {
1506            Column::Series(s) => s.and_reduce(),
1507            Column::Partitioned(s) => s.and_reduce(),
1508            Column::Scalar(s) => {
1509                // We don't really want to deal with handling the full semantics here so we just
1510                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1511                s.as_single_value_series().and_reduce()
1512            },
1513        }
1514    }
1515    pub fn or_reduce(&self) -> PolarsResult<Scalar> {
1516        match self {
1517            Column::Series(s) => s.or_reduce(),
1518            Column::Partitioned(s) => s.or_reduce(),
1519            Column::Scalar(s) => {
1520                // We don't really want to deal with handling the full semantics here so we just
1521                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1522                s.as_single_value_series().or_reduce()
1523            },
1524        }
1525    }
1526    pub fn xor_reduce(&self) -> PolarsResult<Scalar> {
1527        match self {
1528            Column::Series(s) => s.xor_reduce(),
1529            // @partition-opt
1530            Column::Partitioned(s) => s.as_materialized_series().xor_reduce(),
1531            Column::Scalar(s) => {
1532                // We don't really want to deal with handling the full semantics here so we just
1533                // cast to a single value series. This is a tiny bit wasteful, but probably fine.
1534                //
1535                // We have to deal with the fact that xor is 0 if there is an even number of
1536                // elements and the value if there is an odd number of elements. If there are zero
1537                // elements the result should be `null`.
1538                s.as_n_values_series(2 - s.len() % 2).xor_reduce()
1539            },
1540        }
1541    }
1542    pub fn n_unique(&self) -> PolarsResult<usize> {
1543        match self {
1544            Column::Series(s) => s.n_unique(),
1545            Column::Partitioned(s) => s.partitions().n_unique(),
1546            Column::Scalar(s) => s.as_single_value_series().n_unique(),
1547        }
1548    }
1549    pub fn quantile_reduce(&self, quantile: f64, method: QuantileMethod) -> PolarsResult<Scalar> {
1550        self.as_materialized_series()
1551            .quantile_reduce(quantile, method)
1552    }
1553
1554    pub(crate) fn estimated_size(&self) -> usize {
1555        // @scalar-opt
1556        self.as_materialized_series().estimated_size()
1557    }
1558
1559    pub fn sort_with(&self, options: SortOptions) -> PolarsResult<Self> {
1560        match self {
1561            Column::Series(s) => s.sort_with(options).map(Self::from),
1562            // @partition-opt
1563            Column::Partitioned(s) => s
1564                .as_materialized_series()
1565                .sort_with(options)
1566                .map(Self::from),
1567            Column::Scalar(s) => {
1568                // This makes this function throw the same errors as Series::sort_with
1569                _ = s.as_single_value_series().sort_with(options)?;
1570
1571                Ok(self.clone())
1572            },
1573        }
1574    }
1575
1576    pub fn map_unary_elementwise_to_bool(
1577        &self,
1578        f: impl Fn(&Series) -> BooleanChunked,
1579    ) -> BooleanChunked {
1580        self.try_map_unary_elementwise_to_bool(|s| Ok(f(s)))
1581            .unwrap()
1582    }
1583    pub fn try_map_unary_elementwise_to_bool(
1584        &self,
1585        f: impl Fn(&Series) -> PolarsResult<BooleanChunked>,
1586    ) -> PolarsResult<BooleanChunked> {
1587        match self {
1588            Column::Series(s) => f(s),
1589            Column::Partitioned(s) => f(s.as_materialized_series()),
1590            Column::Scalar(s) => Ok(f(&s.as_single_value_series())?.new_from_index(0, s.len())),
1591        }
1592    }
1593
1594    pub fn apply_unary_elementwise(&self, f: impl Fn(&Series) -> Series) -> Column {
1595        self.try_apply_unary_elementwise(|s| Ok(f(s))).unwrap()
1596    }
1597    pub fn try_apply_unary_elementwise(
1598        &self,
1599        f: impl Fn(&Series) -> PolarsResult<Series>,
1600    ) -> PolarsResult<Column> {
1601        match self {
1602            Column::Series(s) => f(s).map(Column::from),
1603            Column::Partitioned(s) => s.try_apply_unary_elementwise(f).map(Self::from),
1604            Column::Scalar(s) => Ok(ScalarColumn::from_single_value_series(
1605                f(&s.as_single_value_series())?,
1606                s.len(),
1607            )
1608            .into()),
1609        }
1610    }
1611
1612    pub fn apply_broadcasting_binary_elementwise(
1613        &self,
1614        other: &Self,
1615        op: impl Fn(&Series, &Series) -> Series,
1616    ) -> PolarsResult<Column> {
1617        self.try_apply_broadcasting_binary_elementwise(other, |lhs, rhs| Ok(op(lhs, rhs)))
1618    }
1619    pub fn try_apply_broadcasting_binary_elementwise(
1620        &self,
1621        other: &Self,
1622        op: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1623    ) -> PolarsResult<Column> {
1624        fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
1625            match (a.len(), b.len()) {
1626                // broadcasting
1627                (1, o) | (o, 1) => Ok(o),
1628                // equal
1629                (a, b) if a == b => Ok(a),
1630                // unequal
1631                (a, b) => {
1632                    polars_bail!(InvalidOperation: "cannot do a binary operation on columns of different lengths: got {} and {}", a, b)
1633                },
1634            }
1635        }
1636
1637        // Here we rely on the underlying broadcast operations.
1638        let length = output_length(self, other)?;
1639        match (self, other) {
1640            (Column::Series(lhs), Column::Series(rhs)) => op(lhs, rhs).map(Column::from),
1641            (Column::Series(lhs), Column::Scalar(rhs)) => {
1642                op(lhs, &rhs.as_single_value_series()).map(Column::from)
1643            },
1644            (Column::Scalar(lhs), Column::Series(rhs)) => {
1645                op(&lhs.as_single_value_series(), rhs).map(Column::from)
1646            },
1647            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1648                let lhs = lhs.as_single_value_series();
1649                let rhs = rhs.as_single_value_series();
1650
1651                Ok(ScalarColumn::from_single_value_series(op(&lhs, &rhs)?, length).into_column())
1652            },
1653            // @partition-opt
1654            (lhs, rhs) => {
1655                op(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1656            },
1657        }
1658    }
1659
1660    pub fn apply_binary_elementwise(
1661        &self,
1662        other: &Self,
1663        f: impl Fn(&Series, &Series) -> Series,
1664        f_lb: impl Fn(&Scalar, &Series) -> Series,
1665        f_rb: impl Fn(&Series, &Scalar) -> Series,
1666    ) -> Column {
1667        self.try_apply_binary_elementwise(
1668            other,
1669            |lhs, rhs| Ok(f(lhs, rhs)),
1670            |lhs, rhs| Ok(f_lb(lhs, rhs)),
1671            |lhs, rhs| Ok(f_rb(lhs, rhs)),
1672        )
1673        .unwrap()
1674    }
1675    pub fn try_apply_binary_elementwise(
1676        &self,
1677        other: &Self,
1678        f: impl Fn(&Series, &Series) -> PolarsResult<Series>,
1679        f_lb: impl Fn(&Scalar, &Series) -> PolarsResult<Series>,
1680        f_rb: impl Fn(&Series, &Scalar) -> PolarsResult<Series>,
1681    ) -> PolarsResult<Column> {
1682        debug_assert_eq!(self.len(), other.len());
1683
1684        match (self, other) {
1685            (Column::Series(lhs), Column::Series(rhs)) => f(lhs, rhs).map(Column::from),
1686            (Column::Series(lhs), Column::Scalar(rhs)) => f_rb(lhs, rhs.scalar()).map(Column::from),
1687            (Column::Scalar(lhs), Column::Series(rhs)) => f_lb(lhs.scalar(), rhs).map(Column::from),
1688            (Column::Scalar(lhs), Column::Scalar(rhs)) => {
1689                let lhs = lhs.as_single_value_series();
1690                let rhs = rhs.as_single_value_series();
1691
1692                Ok(
1693                    ScalarColumn::from_single_value_series(f(&lhs, &rhs)?, self.len())
1694                        .into_column(),
1695                )
1696            },
1697            // @partition-opt
1698            (lhs, rhs) => {
1699                f(lhs.as_materialized_series(), rhs.as_materialized_series()).map(Column::from)
1700            },
1701        }
1702    }
1703
1704    #[cfg(feature = "approx_unique")]
1705    pub fn approx_n_unique(&self) -> PolarsResult<IdxSize> {
1706        match self {
1707            Column::Series(s) => s.approx_n_unique(),
1708            // @partition-opt
1709            Column::Partitioned(s) => s.as_materialized_series().approx_n_unique(),
1710            Column::Scalar(s) => {
1711                // @NOTE: We do this for the error handling.
1712                s.as_single_value_series().approx_n_unique()?;
1713                Ok(1)
1714            },
1715        }
1716    }
1717
1718    pub fn n_chunks(&self) -> usize {
1719        match self {
1720            Column::Series(s) => s.n_chunks(),
1721            Column::Scalar(s) => s.lazy_as_materialized_series().map_or(1, |x| x.n_chunks()),
1722            Column::Partitioned(s) => {
1723                if let Some(s) = s.lazy_as_materialized_series() {
1724                    // This should always hold for partitioned.
1725                    debug_assert_eq!(s.n_chunks(), 1)
1726                }
1727                1
1728            },
1729        }
1730    }
1731
1732    #[expect(clippy::wrong_self_convention)]
1733    pub(crate) fn into_total_ord_inner<'a>(&'a self) -> Box<dyn TotalOrdInner + 'a> {
1734        // @scalar-opt
1735        self.as_materialized_series().into_total_ord_inner()
1736    }
1737    #[expect(unused, clippy::wrong_self_convention)]
1738    pub(crate) fn into_total_eq_inner<'a>(&'a self) -> Box<dyn TotalEqInner + 'a> {
1739        // @scalar-opt
1740        self.as_materialized_series().into_total_eq_inner()
1741    }
1742}
1743
1744impl Default for Column {
1745    fn default() -> Self {
1746        Self::new_scalar(
1747            PlSmallStr::EMPTY,
1748            Scalar::new(DataType::Int64, AnyValue::Null),
1749            0,
1750        )
1751    }
1752}
1753
1754impl PartialEq for Column {
1755    fn eq(&self, other: &Self) -> bool {
1756        // @scalar-opt
1757        self.as_materialized_series()
1758            .eq(other.as_materialized_series())
1759    }
1760}
1761
1762impl From<Series> for Column {
1763    #[inline]
1764    fn from(series: Series) -> Self {
1765        // We instantiate a Scalar Column if the Series is length is 1. This makes it possible for
1766        // future operations to be faster.
1767        if series.len() == 1 {
1768            return Self::Scalar(ScalarColumn::unit_scalar_from_series(series));
1769        }
1770
1771        Self::Series(SeriesColumn::new(series))
1772    }
1773}
1774
1775impl<T: IntoSeries> IntoColumn for T {
1776    #[inline]
1777    fn into_column(self) -> Column {
1778        self.into_series().into()
1779    }
1780}
1781
1782impl IntoColumn for Column {
1783    #[inline(always)]
1784    fn into_column(self) -> Column {
1785        self
1786    }
1787}
1788
1789/// We don't want to serialize the scalar columns. So this helps pretend that columns are always
1790/// initialized without implementing From<Column> for Series.
1791///
1792/// Those casts should be explicit.
1793#[derive(Clone)]
1794#[cfg_attr(feature = "serde", derive(serde::Serialize))]
1795#[cfg_attr(feature = "serde", serde(into = "Series"))]
1796struct _SerdeSeries(Series);
1797
1798impl From<Column> for _SerdeSeries {
1799    #[inline]
1800    fn from(value: Column) -> Self {
1801        Self(value.take_materialized_series())
1802    }
1803}
1804
1805impl From<_SerdeSeries> for Series {
1806    #[inline]
1807    fn from(value: _SerdeSeries) -> Self {
1808        value.0
1809    }
1810}