polars_core/chunked_array/list/
mod.rs

1//! Special list utility methods
2pub(super) mod iterator;
3
4use std::borrow::Cow;
5
6use crate::prelude::*;
7
8impl ListChunked {
9    /// Get the inner data type of the list.
10    pub fn inner_dtype(&self) -> &DataType {
11        match self.dtype() {
12            DataType::List(dt) => dt.as_ref(),
13            _ => unreachable!(),
14        }
15    }
16
17    pub fn set_inner_dtype(&mut self, dtype: DataType) {
18        assert_eq!(dtype.to_physical(), self.inner_dtype().to_physical());
19        let field = Arc::make_mut(&mut self.field);
20        field.coerce(DataType::List(Box::new(dtype)));
21    }
22
23    pub fn set_fast_explode(&mut self) {
24        self.set_fast_explode_list(true)
25    }
26
27    pub fn _can_fast_explode(&self) -> bool {
28        self.get_fast_explode_list()
29    }
30
31    /// Set the logical type of the [`ListChunked`].
32    ///
33    /// # Safety
34    /// The caller must ensure that the logical type given fits the physical type of the array.
35    pub unsafe fn to_logical(&mut self, inner_dtype: DataType) {
36        debug_assert_eq!(&inner_dtype.to_physical(), self.inner_dtype());
37        let fld = Arc::make_mut(&mut self.field);
38        fld.coerce(DataType::List(Box::new(inner_dtype)))
39    }
40
41    /// Convert the datatype of the list into the physical datatype.
42    pub fn to_physical_repr(&self) -> Cow<ListChunked> {
43        let Cow::Owned(physical_repr) = self.get_inner().to_physical_repr() else {
44            return Cow::Borrowed(self);
45        };
46
47        assert_eq!(self.chunks().len(), physical_repr.chunks().len());
48
49        let chunks: Vec<_> = self
50            .downcast_iter()
51            .zip(physical_repr.into_chunks())
52            .map(|(chunk, values)| {
53                LargeListArray::new(
54                    ArrowDataType::LargeList(Box::new(ArrowField::new(
55                        PlSmallStr::from_static("item"),
56                        values.dtype().clone(),
57                        true,
58                    ))),
59                    chunk.offsets().clone(),
60                    values,
61                    chunk.validity().cloned(),
62                )
63                .to_boxed()
64            })
65            .collect();
66
67        let name = self.name().clone();
68        let dtype = DataType::List(Box::new(self.inner_dtype().to_physical()));
69        Cow::Owned(unsafe { ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
70    }
71
72    /// Convert a non-logical [`ListChunked`] back into a logical [`ListChunked`] without casting.
73    ///
74    /// # Safety
75    ///
76    /// This can lead to invalid memory access in downstream code.
77    pub unsafe fn from_physical_unchecked(
78        &self,
79        to_inner_dtype: DataType,
80    ) -> PolarsResult<ListChunked> {
81        debug_assert!(!self.inner_dtype().is_logical());
82
83        let inner_chunks = self
84            .downcast_iter()
85            .map(|chunk| chunk.values())
86            .cloned()
87            .collect();
88
89        let inner = unsafe {
90            Series::from_chunks_and_dtype_unchecked(
91                PlSmallStr::EMPTY,
92                inner_chunks,
93                self.inner_dtype(),
94            )
95        };
96        let inner = unsafe { inner.from_physical_unchecked(&to_inner_dtype) }?;
97
98        let chunks: Vec<_> = self
99            .downcast_iter()
100            .zip(inner.into_chunks())
101            .map(|(chunk, values)| {
102                LargeListArray::new(
103                    ArrowDataType::LargeList(Box::new(ArrowField::new(
104                        PlSmallStr::from_static("item"),
105                        values.dtype().clone(),
106                        true,
107                    ))),
108                    chunk.offsets().clone(),
109                    values,
110                    chunk.validity().cloned(),
111                )
112                .to_boxed()
113            })
114            .collect();
115
116        let name = self.name().clone();
117        let dtype = DataType::List(Box::new(to_inner_dtype));
118        Ok(unsafe { ListChunked::from_chunks_and_dtype_unchecked(name, chunks, dtype) })
119    }
120
121    /// Get the inner values as [`Series`], ignoring the list offsets.
122    pub fn get_inner(&self) -> Series {
123        let chunks: Vec<_> = self.downcast_iter().map(|c| c.values().clone()).collect();
124
125        // SAFETY: Data type of arrays matches because they are chunks from the same array.
126        unsafe {
127            Series::from_chunks_and_dtype_unchecked(self.name().clone(), chunks, self.inner_dtype())
128        }
129    }
130
131    /// Ignore the list indices and apply `func` to the inner type as [`Series`].
132    pub fn apply_to_inner(
133        &self,
134        func: &dyn Fn(Series) -> PolarsResult<Series>,
135    ) -> PolarsResult<ListChunked> {
136        // generated Series will have wrong length otherwise.
137        let ca = self.rechunk();
138        let arr = ca.downcast_iter().next().unwrap();
139
140        // SAFETY:
141        // Inner dtype is passed correctly
142        let elements = unsafe {
143            Series::from_chunks_and_dtype_unchecked(
144                self.name().clone(),
145                vec![arr.values().clone()],
146                ca.inner_dtype(),
147            )
148        };
149
150        let expected_len = elements.len();
151        let out: Series = func(elements)?;
152        polars_ensure!(
153            out.len() == expected_len,
154            ComputeError: "the function should apply element-wise, it removed elements instead"
155        );
156        let out = out.rechunk();
157        let values = out.chunks()[0].clone();
158
159        let inner_dtype = LargeListArray::default_datatype(values.dtype().clone());
160        let arr = LargeListArray::new(
161            inner_dtype,
162            (*arr.offsets()).clone(),
163            values,
164            arr.validity().cloned(),
165        );
166
167        // SAFETY: arr's inner dtype is derived from out dtype.
168        Ok(unsafe {
169            ListChunked::from_chunks_and_dtype_unchecked(
170                ca.name().clone(),
171                vec![Box::new(arr)],
172                DataType::List(Box::new(out.dtype().clone())),
173            )
174        })
175    }
176
177    pub fn rechunk_and_trim_to_normalized_offsets(&self) -> Self {
178        Self::with_chunk(
179            self.name().clone(),
180            self.rechunk()
181                .downcast_get(0)
182                .unwrap()
183                .trim_to_normalized_offsets_recursive(),
184        )
185    }
186}