polars_core/chunked_array/
from.rs1use super::*;
2
3#[allow(clippy::all)]
4fn from_chunks_list_dtype(chunks: &mut Vec<ArrayRef>, dtype: DataType) -> DataType {
5 let dtype = if let Some(arr) = chunks.get(0) {
7 DataType::from_arrow_dtype(arr.dtype())
8 } else {
9 dtype
10 };
11
12 match dtype {
13 #[cfg(feature = "dtype-categorical")]
14 DataType::List(inner)
18 if matches!(
19 *inner,
20 DataType::Categorical(None, _) | DataType::Enum(None, _)
21 ) =>
22 {
23 let array = concatenate_owned_unchecked(chunks).unwrap();
24 let list_arr = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
25 let values_arr = list_arr.values();
26 let cat = unsafe {
27 Series::_try_from_arrow_unchecked(
28 PlSmallStr::EMPTY,
29 vec![values_arr.clone()],
30 values_arr.dtype(),
31 )
32 .unwrap()
33 };
34
35 let arrow_dtype = ListArray::<i64>::default_datatype(ArrowDataType::UInt32);
38 let new_array = ListArray::new(
39 arrow_dtype,
40 list_arr.offsets().clone(),
41 cat.array_ref(0).clone(),
42 list_arr.validity().cloned(),
43 );
44 chunks.clear();
45 chunks.push(Box::new(new_array));
46 DataType::List(Box::new(cat.dtype().clone()))
47 },
48 #[cfg(all(feature = "dtype-array", feature = "dtype-categorical"))]
49 DataType::Array(inner, width)
50 if matches!(
51 *inner,
52 DataType::Categorical(None, _) | DataType::Enum(None, _)
53 ) =>
54 {
55 let array = concatenate_owned_unchecked(chunks).unwrap();
56 let list_arr = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
57 let values_arr = list_arr.values();
58 let cat = unsafe {
59 Series::_try_from_arrow_unchecked(
60 PlSmallStr::EMPTY,
61 vec![values_arr.clone()],
62 values_arr.dtype(),
63 )
64 .unwrap()
65 };
66
67 let arrow_dtype = FixedSizeListArray::default_datatype(ArrowDataType::UInt32, width);
70 let new_array = FixedSizeListArray::new(
71 arrow_dtype,
72 values_arr.len(),
73 cat.array_ref(0).clone(),
74 list_arr.validity().cloned(),
75 );
76 chunks.clear();
77 chunks.push(Box::new(new_array));
78 DataType::Array(Box::new(cat.dtype().clone()), width)
79 },
80 _ => dtype,
81 }
82}
83
84impl<T, A> From<A> for ChunkedArray<T>
85where
86 T: PolarsDataType<Array = A>,
87 A: Array,
88{
89 fn from(arr: A) -> Self {
90 Self::with_chunk(PlSmallStr::EMPTY, arr)
91 }
92}
93
94impl<T> ChunkedArray<T>
95where
96 T: PolarsDataType,
97{
98 pub fn with_chunk<A>(name: PlSmallStr, arr: A) -> Self
99 where
100 A: Array,
101 T: PolarsDataType<Array = A>,
102 {
103 unsafe { Self::from_chunks(name, vec![Box::new(arr)]) }
104 }
105
106 pub fn with_chunk_like<A>(ca: &Self, arr: A) -> Self
107 where
108 A: Array,
109 T: PolarsDataType<Array = A>,
110 {
111 Self::from_chunk_iter_like(ca, std::iter::once(arr))
112 }
113
114 pub fn from_chunk_iter<I>(name: PlSmallStr, iter: I) -> Self
115 where
116 I: IntoIterator,
117 T: PolarsDataType<Array = <I as IntoIterator>::Item>,
118 <I as IntoIterator>::Item: Array,
119 {
120 let chunks = iter
121 .into_iter()
122 .map(|x| Box::new(x) as Box<dyn Array>)
123 .collect();
124 unsafe { Self::from_chunks(name, chunks) }
125 }
126
127 pub fn from_chunk_iter_like<I>(ca: &Self, iter: I) -> Self
128 where
129 I: IntoIterator,
130 T: PolarsDataType<Array = <I as IntoIterator>::Item>,
131 <I as IntoIterator>::Item: Array,
132 {
133 let chunks = iter
134 .into_iter()
135 .map(|x| Box::new(x) as Box<dyn Array>)
136 .collect();
137 unsafe {
138 Self::from_chunks_and_dtype_unchecked(ca.name().clone(), chunks, ca.dtype().clone())
139 }
140 }
141
142 pub fn try_from_chunk_iter<I, A, E>(name: PlSmallStr, iter: I) -> Result<Self, E>
143 where
144 I: IntoIterator<Item = Result<A, E>>,
145 T: PolarsDataType<Array = A>,
146 A: Array,
147 {
148 let chunks: Result<_, _> = iter
149 .into_iter()
150 .map(|x| Ok(Box::new(x?) as Box<dyn Array>))
151 .collect();
152 unsafe { Ok(Self::from_chunks(name, chunks?)) }
153 }
154
155 pub(crate) fn from_chunk_iter_and_field<I>(field: Arc<Field>, chunks: I) -> Self
156 where
157 I: IntoIterator,
158 T: PolarsDataType<Array = <I as IntoIterator>::Item>,
159 <I as IntoIterator>::Item: Array,
160 {
161 assert_eq!(
162 std::mem::discriminant(&T::get_dtype()),
163 std::mem::discriminant(&field.dtype)
164 );
165
166 let mut length = 0;
167 let mut null_count = 0;
168 let chunks = chunks
169 .into_iter()
170 .map(|x| {
171 length += x.len();
172 null_count += x.null_count();
173 Box::new(x) as Box<dyn Array>
174 })
175 .collect();
176
177 unsafe { ChunkedArray::new_with_dims(field, chunks, length, null_count) }
178 }
179
180 pub unsafe fn from_chunks(name: PlSmallStr, mut chunks: Vec<ArrayRef>) -> Self {
185 let dtype = match T::get_dtype() {
186 dtype @ DataType::List(_) => from_chunks_list_dtype(&mut chunks, dtype),
187 #[cfg(feature = "dtype-array")]
188 dtype @ DataType::Array(_, _) => from_chunks_list_dtype(&mut chunks, dtype),
189 #[cfg(feature = "dtype-struct")]
190 dtype @ DataType::Struct(_) => from_chunks_list_dtype(&mut chunks, dtype),
191 dt => dt,
192 };
193 Self::from_chunks_and_dtype(name, chunks, dtype)
194 }
195
196 pub unsafe fn with_chunks(&self, chunks: Vec<ArrayRef>) -> Self {
199 ChunkedArray::new_with_compute_len(self.field.clone(), chunks)
200 }
201
202 pub unsafe fn from_chunks_and_dtype(
208 name: PlSmallStr,
209 chunks: Vec<ArrayRef>,
210 dtype: DataType,
211 ) -> Self {
212 #[cfg(debug_assertions)]
215 {
216 if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() {
217 assert_eq!(chunks[0].dtype(), &dtype.to_arrow(CompatLevel::newest()))
218 }
219 }
220
221 Self::from_chunks_and_dtype_unchecked(name, chunks, dtype)
222 }
223
224 pub(crate) unsafe fn from_chunks_and_dtype_unchecked(
230 name: PlSmallStr,
231 chunks: Vec<ArrayRef>,
232 dtype: DataType,
233 ) -> Self {
234 let field = Arc::new(Field::new(name, dtype));
235 ChunkedArray::new_with_compute_len(field, chunks)
236 }
237
238 pub fn full_null_like(ca: &Self, length: usize) -> Self {
239 let chunks = std::iter::once(T::Array::full_null(
240 length,
241 ca.dtype().to_arrow(CompatLevel::newest()),
242 ));
243 Self::from_chunk_iter_like(ca, chunks)
244 }
245}
246
247impl<T> ChunkedArray<T>
248where
249 T: PolarsNumericType,
250{
251 pub fn from_vec(name: PlSmallStr, v: Vec<T::Native>) -> Self {
253 Self::with_chunk(name, to_primitive::<T>(v, None))
254 }
255
256 pub fn from_vec_validity(
258 name: PlSmallStr,
259 values: Vec<T::Native>,
260 buffer: Option<Bitmap>,
261 ) -> Self {
262 let arr = to_array::<T>(values, buffer);
263 ChunkedArray::new_with_compute_len(Arc::new(Field::new(name, T::get_dtype())), vec![arr])
264 }
265
266 pub unsafe fn mmap_slice(name: PlSmallStr, values: &[T::Native]) -> Self {
272 Self::with_chunk(name, arrow::ffi::mmap::slice(values))
273 }
274}
275
276impl BooleanChunked {
277 pub unsafe fn mmap_slice(name: PlSmallStr, values: &[u8], offset: usize, len: usize) -> Self {
283 let arr = arrow::ffi::mmap::bitmap(values, offset, len).unwrap();
284 Self::with_chunk(name, arr)
285 }
286}
287
288impl<'a, T> From<&'a ChunkedArray<T>> for Vec<Option<T::Physical<'a>>>
289where
290 T: PolarsDataType,
291{
292 fn from(ca: &'a ChunkedArray<T>) -> Self {
293 let mut out = Vec::with_capacity(ca.len());
294 for arr in ca.downcast_iter() {
295 out.extend(arr.iter())
296 }
297 out
298 }
299}
300impl From<StringChunked> for Vec<Option<String>> {
301 fn from(ca: StringChunked) -> Self {
302 ca.iter().map(|opt| opt.map(|s| s.to_string())).collect()
303 }
304}
305
306impl From<BooleanChunked> for Vec<Option<bool>> {
307 fn from(ca: BooleanChunked) -> Self {
308 let mut out = Vec::with_capacity(ca.len());
309 for arr in ca.downcast_iter() {
310 out.extend(arr.iter())
311 }
312 out
313 }
314}