polars_core/series/
any_value.rs

1use std::fmt::Write;
2
3use arrow::bitmap::MutableBitmap;
4
5use crate::chunked_array::builder::{get_list_builder, AnonymousOwnedListBuilder};
6#[cfg(feature = "object")]
7use crate::chunked_array::object::registry::ObjectRegistry;
8use crate::prelude::*;
9use crate::utils::any_values_to_supertype;
10
11impl<'a, T: AsRef<[AnyValue<'a>]>> NamedFrom<T, [AnyValue<'a>]> for Series {
12    /// Construct a new [`Series`] from a collection of [`AnyValue`].
13    ///
14    /// # Panics
15    ///
16    /// Panics if the values do not all share the same data type (with the exception
17    /// of [`DataType::Null`], which is always allowed).
18    ///
19    /// [`AnyValue`]: crate::datatypes::AnyValue
20    fn new(name: PlSmallStr, values: T) -> Self {
21        let values = values.as_ref();
22        Series::from_any_values(name, values, true).expect("data types of values should match")
23    }
24}
25
26impl Series {
27    /// Construct a new [`Series`] from a slice of AnyValues.
28    ///
29    /// The data type of the resulting Series is determined by the `values`
30    /// and the `strict` parameter:
31    /// - If `strict` is `true`, the data type is equal to the data type of the
32    ///   first non-null value. If any other non-null values do not match this
33    ///   data type, an error is raised.
34    /// - If `strict` is `false`, the data type is the supertype of the `values`.
35    ///   An error is returned if no supertype can be determined.
36    ///   **WARNING**: A full pass over the values is required to determine the supertype.
37    /// - If no values were passed, the resulting data type is `Null`.
38    pub fn from_any_values(
39        name: PlSmallStr,
40        values: &[AnyValue],
41        strict: bool,
42    ) -> PolarsResult<Self> {
43        fn get_first_non_null_dtype(values: &[AnyValue]) -> DataType {
44            let mut all_flat_null = true;
45            let first_non_null = values.iter().find(|av| {
46                if !av.is_null() {
47                    all_flat_null = false
48                };
49                !av.is_nested_null()
50            });
51            match first_non_null {
52                Some(av) => av.dtype(),
53                None => {
54                    if all_flat_null {
55                        DataType::Null
56                    } else {
57                        // Second pass to check for the nested null value that
58                        // toggled `all_flat_null` to false, e.g. a List(Null).
59                        let first_nested_null = values.iter().find(|av| !av.is_null()).unwrap();
60                        first_nested_null.dtype()
61                    }
62                },
63            }
64        }
65        let dtype = if strict {
66            get_first_non_null_dtype(values)
67        } else {
68            // Currently does not work correctly for Decimal because equality is not implemented.
69            any_values_to_supertype(values)?
70        };
71
72        // TODO: Remove this when Decimal data type equality is implemented.
73        #[cfg(feature = "dtype-decimal")]
74        if dtype.is_decimal() {
75            let dtype = DataType::Decimal(None, None);
76            return Self::from_any_values_and_dtype(name, values, &dtype, strict);
77        }
78
79        Self::from_any_values_and_dtype(name, values, &dtype, strict)
80    }
81
82    /// Construct a new [`Series`] with the given `dtype` from a slice of AnyValues.
83    ///
84    /// If `strict` is `true`, an error is returned if the values do not match the given
85    /// data type. If `strict` is `false`, values that do not match the given data type
86    /// are cast. If casting is not possible, the values are set to null instead.
87    pub fn from_any_values_and_dtype(
88        name: PlSmallStr,
89        values: &[AnyValue],
90        dtype: &DataType,
91        strict: bool,
92    ) -> PolarsResult<Self> {
93        if values.is_empty() {
94            return Ok(Self::new_empty(name, dtype));
95        }
96
97        let mut s = match dtype {
98            #[cfg(feature = "dtype-i8")]
99            DataType::Int8 => any_values_to_integer::<Int8Type>(values, strict)?.into_series(),
100            #[cfg(feature = "dtype-i16")]
101            DataType::Int16 => any_values_to_integer::<Int16Type>(values, strict)?.into_series(),
102            DataType::Int32 => any_values_to_integer::<Int32Type>(values, strict)?.into_series(),
103            DataType::Int64 => any_values_to_integer::<Int64Type>(values, strict)?.into_series(),
104            #[cfg(feature = "dtype-i128")]
105            DataType::Int128 => any_values_to_integer::<Int128Type>(values, strict)?.into_series(),
106            #[cfg(feature = "dtype-u8")]
107            DataType::UInt8 => any_values_to_integer::<UInt8Type>(values, strict)?.into_series(),
108            #[cfg(feature = "dtype-u16")]
109            DataType::UInt16 => any_values_to_integer::<UInt16Type>(values, strict)?.into_series(),
110            DataType::UInt32 => any_values_to_integer::<UInt32Type>(values, strict)?.into_series(),
111            DataType::UInt64 => any_values_to_integer::<UInt64Type>(values, strict)?.into_series(),
112            DataType::Float32 => any_values_to_f32(values, strict)?.into_series(),
113            DataType::Float64 => any_values_to_f64(values, strict)?.into_series(),
114            DataType::Boolean => any_values_to_bool(values, strict)?.into_series(),
115            DataType::String => any_values_to_string(values, strict)?.into_series(),
116            DataType::Binary => any_values_to_binary(values, strict)?.into_series(),
117            #[cfg(feature = "dtype-date")]
118            DataType::Date => any_values_to_date(values, strict)?.into_series(),
119            #[cfg(feature = "dtype-time")]
120            DataType::Time => any_values_to_time(values, strict)?.into_series(),
121            #[cfg(feature = "dtype-datetime")]
122            DataType::Datetime(tu, tz) => {
123                any_values_to_datetime(values, *tu, (*tz).clone(), strict)?.into_series()
124            },
125            #[cfg(feature = "dtype-duration")]
126            DataType::Duration(tu) => any_values_to_duration(values, *tu, strict)?.into_series(),
127            #[cfg(feature = "dtype-categorical")]
128            dt @ DataType::Categorical(_, _) => any_values_to_categorical(values, dt, strict)?,
129            #[cfg(feature = "dtype-categorical")]
130            dt @ DataType::Enum(_, _) => any_values_to_enum(values, dt, strict)?,
131            #[cfg(feature = "dtype-decimal")]
132            DataType::Decimal(precision, scale) => {
133                any_values_to_decimal(values, *precision, *scale, strict)?.into_series()
134            },
135            DataType::List(inner) => any_values_to_list(values, inner, strict)?.into_series(),
136            #[cfg(feature = "dtype-array")]
137            DataType::Array(inner, size) => any_values_to_array(values, inner, strict, *size)?
138                .into_series()
139                .cast(&DataType::Array(inner.clone(), *size))?,
140            #[cfg(feature = "dtype-struct")]
141            DataType::Struct(fields) => any_values_to_struct(values, fields, strict)?,
142            #[cfg(feature = "object")]
143            DataType::Object(_, registry) => any_values_to_object(values, registry)?,
144            DataType::Null => Series::new_null(PlSmallStr::EMPTY, values.len()),
145            dt => {
146                polars_bail!(
147                    InvalidOperation:
148                    "constructing a Series with data type {dt:?} from AnyValues is not supported"
149                )
150            },
151        };
152        s.rename(name);
153        Ok(s)
154    }
155}
156
157fn any_values_to_primitive_nonstrict<T: PolarsNumericType>(values: &[AnyValue]) -> ChunkedArray<T> {
158    values
159        .iter()
160        .map(|av| av.extract::<T::Native>())
161        .collect_trusted()
162}
163
164fn any_values_to_integer<T: PolarsIntegerType>(
165    values: &[AnyValue],
166    strict: bool,
167) -> PolarsResult<ChunkedArray<T>> {
168    fn any_values_to_integer_strict<T: PolarsIntegerType>(
169        values: &[AnyValue],
170    ) -> PolarsResult<ChunkedArray<T>> {
171        let mut builder = PrimitiveChunkedBuilder::<T>::new(PlSmallStr::EMPTY, values.len());
172        for av in values {
173            match &av {
174                av if av.is_integer() => {
175                    let opt_val = av.extract::<T::Native>();
176                    let val = match opt_val {
177                        Some(v) => v,
178                        None => return Err(invalid_value_error(&T::get_dtype(), av)),
179                    };
180                    builder.append_value(val)
181                },
182                AnyValue::Null => builder.append_null(),
183                av => return Err(invalid_value_error(&T::get_dtype(), av)),
184            }
185        }
186        Ok(builder.finish())
187    }
188
189    if strict {
190        any_values_to_integer_strict::<T>(values)
191    } else {
192        Ok(any_values_to_primitive_nonstrict::<T>(values))
193    }
194}
195
196fn any_values_to_f32(values: &[AnyValue], strict: bool) -> PolarsResult<Float32Chunked> {
197    fn any_values_to_f32_strict(values: &[AnyValue]) -> PolarsResult<Float32Chunked> {
198        let mut builder =
199            PrimitiveChunkedBuilder::<Float32Type>::new(PlSmallStr::EMPTY, values.len());
200        for av in values {
201            match av {
202                AnyValue::Float32(i) => builder.append_value(*i),
203                AnyValue::Null => builder.append_null(),
204                av => return Err(invalid_value_error(&DataType::Float32, av)),
205            }
206        }
207        Ok(builder.finish())
208    }
209    if strict {
210        any_values_to_f32_strict(values)
211    } else {
212        Ok(any_values_to_primitive_nonstrict::<Float32Type>(values))
213    }
214}
215fn any_values_to_f64(values: &[AnyValue], strict: bool) -> PolarsResult<Float64Chunked> {
216    fn any_values_to_f64_strict(values: &[AnyValue]) -> PolarsResult<Float64Chunked> {
217        let mut builder =
218            PrimitiveChunkedBuilder::<Float64Type>::new(PlSmallStr::EMPTY, values.len());
219        for av in values {
220            match av {
221                AnyValue::Float64(i) => builder.append_value(*i),
222                AnyValue::Float32(i) => builder.append_value(*i as f64),
223                AnyValue::Null => builder.append_null(),
224                av => return Err(invalid_value_error(&DataType::Float64, av)),
225            }
226        }
227        Ok(builder.finish())
228    }
229    if strict {
230        any_values_to_f64_strict(values)
231    } else {
232        Ok(any_values_to_primitive_nonstrict::<Float64Type>(values))
233    }
234}
235
236fn any_values_to_bool(values: &[AnyValue], strict: bool) -> PolarsResult<BooleanChunked> {
237    let mut builder = BooleanChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
238    for av in values {
239        match av {
240            AnyValue::Boolean(b) => builder.append_value(*b),
241            AnyValue::Null => builder.append_null(),
242            av => {
243                if strict {
244                    return Err(invalid_value_error(&DataType::Boolean, av));
245                }
246                match av.cast(&DataType::Boolean) {
247                    AnyValue::Boolean(b) => builder.append_value(b),
248                    _ => builder.append_null(),
249                }
250            },
251        }
252    }
253    Ok(builder.finish())
254}
255
256fn any_values_to_string(values: &[AnyValue], strict: bool) -> PolarsResult<StringChunked> {
257    fn any_values_to_string_strict(values: &[AnyValue]) -> PolarsResult<StringChunked> {
258        let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
259        for av in values {
260            match av {
261                AnyValue::String(s) => builder.append_value(s),
262                AnyValue::StringOwned(s) => builder.append_value(s),
263                AnyValue::Null => builder.append_null(),
264                av => return Err(invalid_value_error(&DataType::String, av)),
265            }
266        }
267        Ok(builder.finish())
268    }
269    fn any_values_to_string_nonstrict(values: &[AnyValue]) -> StringChunked {
270        let mut builder = StringChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
271        let mut owned = String::new(); // Amortize allocations.
272        for av in values {
273            match av {
274                AnyValue::String(s) => builder.append_value(s),
275                AnyValue::StringOwned(s) => builder.append_value(s),
276                AnyValue::Null => builder.append_null(),
277                AnyValue::Binary(_) | AnyValue::BinaryOwned(_) => builder.append_null(),
278                av => {
279                    owned.clear();
280                    write!(owned, "{av}").unwrap();
281                    builder.append_value(&owned);
282                },
283            }
284        }
285        builder.finish()
286    }
287    if strict {
288        any_values_to_string_strict(values)
289    } else {
290        Ok(any_values_to_string_nonstrict(values))
291    }
292}
293
294fn any_values_to_binary(values: &[AnyValue], strict: bool) -> PolarsResult<BinaryChunked> {
295    fn any_values_to_binary_strict(values: &[AnyValue]) -> PolarsResult<BinaryChunked> {
296        let mut builder = BinaryChunkedBuilder::new(PlSmallStr::EMPTY, values.len());
297        for av in values {
298            match av {
299                AnyValue::Binary(s) => builder.append_value(*s),
300                AnyValue::BinaryOwned(s) => builder.append_value(&**s),
301                AnyValue::Null => builder.append_null(),
302                av => return Err(invalid_value_error(&DataType::Binary, av)),
303            }
304        }
305        Ok(builder.finish())
306    }
307    fn any_values_to_binary_nonstrict(values: &[AnyValue]) -> BinaryChunked {
308        values
309            .iter()
310            .map(|av| match av {
311                AnyValue::Binary(b) => Some(*b),
312                AnyValue::BinaryOwned(b) => Some(&**b),
313                AnyValue::String(s) => Some(s.as_bytes()),
314                AnyValue::StringOwned(s) => Some(s.as_str().as_bytes()),
315                _ => None,
316            })
317            .collect_trusted()
318    }
319    if strict {
320        any_values_to_binary_strict(values)
321    } else {
322        Ok(any_values_to_binary_nonstrict(values))
323    }
324}
325
326#[cfg(feature = "dtype-date")]
327fn any_values_to_date(values: &[AnyValue], strict: bool) -> PolarsResult<DateChunked> {
328    let mut builder = PrimitiveChunkedBuilder::<Int32Type>::new(PlSmallStr::EMPTY, values.len());
329    for av in values {
330        match av {
331            AnyValue::Date(i) => builder.append_value(*i),
332            AnyValue::Null => builder.append_null(),
333            av => {
334                if strict {
335                    return Err(invalid_value_error(&DataType::Date, av));
336                }
337                match av.cast(&DataType::Date) {
338                    AnyValue::Date(i) => builder.append_value(i),
339                    _ => builder.append_null(),
340                }
341            },
342        }
343    }
344    Ok(builder.finish().into())
345}
346
347#[cfg(feature = "dtype-time")]
348fn any_values_to_time(values: &[AnyValue], strict: bool) -> PolarsResult<TimeChunked> {
349    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
350    for av in values {
351        match av {
352            AnyValue::Time(i) => builder.append_value(*i),
353            AnyValue::Null => builder.append_null(),
354            av => {
355                if strict {
356                    return Err(invalid_value_error(&DataType::Time, av));
357                }
358                match av.cast(&DataType::Time) {
359                    AnyValue::Time(i) => builder.append_value(i),
360                    _ => builder.append_null(),
361                }
362            },
363        }
364    }
365    Ok(builder.finish().into())
366}
367
368#[cfg(feature = "dtype-datetime")]
369fn any_values_to_datetime(
370    values: &[AnyValue],
371    time_unit: TimeUnit,
372    time_zone: Option<TimeZone>,
373    strict: bool,
374) -> PolarsResult<DatetimeChunked> {
375    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
376    let target_dtype = DataType::Datetime(time_unit, time_zone.clone());
377    for av in values {
378        match av {
379            AnyValue::Datetime(i, tu, _) if *tu == time_unit => builder.append_value(*i),
380            AnyValue::DatetimeOwned(i, tu, _) if *tu == time_unit => builder.append_value(*i),
381            AnyValue::Null => builder.append_null(),
382            av => {
383                if strict {
384                    return Err(invalid_value_error(&target_dtype, av));
385                }
386                match av.cast(&target_dtype) {
387                    AnyValue::Datetime(i, _, _) => builder.append_value(i),
388                    AnyValue::DatetimeOwned(i, _, _) => builder.append_value(i),
389                    _ => builder.append_null(),
390                }
391            },
392        }
393    }
394    Ok(builder.finish().into_datetime(time_unit, time_zone))
395}
396
397#[cfg(feature = "dtype-duration")]
398fn any_values_to_duration(
399    values: &[AnyValue],
400    time_unit: TimeUnit,
401    strict: bool,
402) -> PolarsResult<DurationChunked> {
403    let mut builder = PrimitiveChunkedBuilder::<Int64Type>::new(PlSmallStr::EMPTY, values.len());
404    let target_dtype = DataType::Duration(time_unit);
405    for av in values {
406        match av {
407            AnyValue::Duration(i, tu) if *tu == time_unit => builder.append_value(*i),
408            AnyValue::Null => builder.append_null(),
409            av => {
410                if strict {
411                    return Err(invalid_value_error(&target_dtype, av));
412                }
413                match av.cast(&target_dtype) {
414                    AnyValue::Duration(i, _) => builder.append_value(i),
415                    _ => builder.append_null(),
416                }
417            },
418        }
419    }
420    Ok(builder.finish().into_duration(time_unit))
421}
422
423#[cfg(feature = "dtype-categorical")]
424fn any_values_to_categorical(
425    values: &[AnyValue],
426    dtype: &DataType,
427    strict: bool,
428) -> PolarsResult<Series> {
429    let ordering = match dtype {
430        DataType::Categorical(_, ordering) => ordering,
431        _ => panic!("any_values_to_categorical with dtype={dtype:?}"),
432    };
433
434    let mut builder = CategoricalChunkedBuilder::new(PlSmallStr::EMPTY, values.len(), *ordering);
435
436    let mut owned = String::new(); // Amortize allocations.
437    for av in values {
438        match av {
439            AnyValue::String(s) => builder.append_value(s),
440            AnyValue::StringOwned(s) => builder.append_value(s),
441
442            AnyValue::Enum(s, rev, _) => builder.append_value(rev.get(*s)),
443            AnyValue::EnumOwned(s, rev, _) => builder.append_value(rev.get(*s)),
444
445            AnyValue::Categorical(s, rev, _) => builder.append_value(rev.get(*s)),
446            AnyValue::CategoricalOwned(s, rev, _) => builder.append_value(rev.get(*s)),
447
448            AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => builder.append_null(),
449            AnyValue::Null => builder.append_null(),
450
451            av => {
452                if strict {
453                    return Err(invalid_value_error(&DataType::String, av));
454                }
455
456                owned.clear();
457                write!(owned, "{av}").unwrap();
458                builder.append_value(&owned);
459            },
460        }
461    }
462
463    let ca = builder.finish();
464
465    Ok(ca.into_series())
466}
467
468#[cfg(feature = "dtype-categorical")]
469fn any_values_to_enum(values: &[AnyValue], dtype: &DataType, strict: bool) -> PolarsResult<Series> {
470    use self::enum_::EnumChunkedBuilder;
471
472    let (rev, ordering) = match dtype {
473        DataType::Enum(rev, ordering) => (rev.clone(), ordering),
474        _ => panic!("any_values_to_categorical with dtype={dtype:?}"),
475    };
476
477    let Some(rev) = rev else {
478        polars_bail!(nyi = "Not yet possible to create enum series without a rev-map");
479    };
480
481    let mut builder =
482        EnumChunkedBuilder::new(PlSmallStr::EMPTY, values.len(), rev, *ordering, strict);
483
484    let mut owned = String::new(); // Amortize allocations.
485    for av in values {
486        match av {
487            AnyValue::String(s) => builder.append_str(s)?,
488            AnyValue::StringOwned(s) => builder.append_str(s)?,
489
490            AnyValue::Enum(s, rev, _) => builder.append_enum(*s, rev)?,
491            AnyValue::EnumOwned(s, rev, _) => builder.append_enum(*s, rev)?,
492
493            AnyValue::Categorical(s, rev, _) => builder.append_str(rev.get(*s))?,
494            AnyValue::CategoricalOwned(s, rev, _) => builder.append_str(rev.get(*s))?,
495
496            AnyValue::Binary(_) | AnyValue::BinaryOwned(_) if !strict => builder.append_null(),
497            AnyValue::Null => builder.append_null(),
498
499            av => {
500                if strict {
501                    return Err(invalid_value_error(&DataType::String, av));
502                }
503
504                owned.clear();
505                write!(owned, "{av}").unwrap();
506                builder.append_str(&owned)?
507            },
508        };
509    }
510
511    let ca = builder.finish();
512
513    Ok(ca.into_series())
514}
515
516#[cfg(feature = "dtype-decimal")]
517fn any_values_to_decimal(
518    values: &[AnyValue],
519    precision: Option<usize>,
520    scale: Option<usize>, // If None, we're inferring the scale.
521    strict: bool,
522) -> PolarsResult<DecimalChunked> {
523    /// Get the maximum scale among AnyValues
524    fn infer_scale(
525        values: &[AnyValue],
526        precision: Option<usize>,
527        strict: bool,
528    ) -> PolarsResult<usize> {
529        let mut max_scale = 0;
530        for av in values {
531            let av_scale = match av {
532                AnyValue::Decimal(_, scale) => *scale,
533                AnyValue::Null => continue,
534                av => {
535                    if strict {
536                        let target_dtype = DataType::Decimal(precision, None);
537                        return Err(invalid_value_error(&target_dtype, av));
538                    }
539                    continue;
540                },
541            };
542            max_scale = max_scale.max(av_scale);
543        }
544        Ok(max_scale)
545    }
546    let scale = match scale {
547        Some(s) => s,
548        None => infer_scale(values, precision, strict)?,
549    };
550    let target_dtype = DataType::Decimal(precision, Some(scale));
551
552    let mut builder = PrimitiveChunkedBuilder::<Int128Type>::new(PlSmallStr::EMPTY, values.len());
553    for av in values {
554        match av {
555            // Allow equal or less scale. We do want to support different scales even in 'strict' mode.
556            AnyValue::Decimal(v, s) if *s <= scale => {
557                if *s == scale {
558                    builder.append_value(*v)
559                } else {
560                    match av.strict_cast(&target_dtype) {
561                        Some(AnyValue::Decimal(i, _)) => builder.append_value(i),
562                        _ => builder.append_null(),
563                    }
564                }
565            },
566            AnyValue::Null => builder.append_null(),
567            av => {
568                if strict {
569                    return Err(invalid_value_error(&target_dtype, av));
570                }
571                // TODO: Precision check, else set to null
572                match av.strict_cast(&target_dtype) {
573                    Some(AnyValue::Decimal(i, _)) => builder.append_value(i),
574                    _ => builder.append_null(),
575                }
576            },
577        };
578    }
579
580    // Build the array and do a precision check if needed.
581    builder.finish().into_decimal(precision, scale)
582}
583
584fn any_values_to_list(
585    avs: &[AnyValue],
586    inner_type: &DataType,
587    strict: bool,
588) -> PolarsResult<ListChunked> {
589    // GB:
590    // Lord forgive for the sins I have committed in this function. The amount of strange
591    // exceptions that need to happen for this to work are insane and I feel like I am going crazy.
592    //
593    // This function is essentially a copy of the `<ListChunked as FromIterator>` where it does not
594    // sample the datatype from the first element and instead we give it explicitly. This allows
595    // this function to properly assign a datatype if `avs` starts with a `null` value. Previously,
596    // this was solved by assigning the `dtype` again afterwards, but why? We should not link the
597    // implementation of these functions. We still need to assign the dtype of the ListArray and
598    // such, anyways.
599    //
600    // Then, `collect_ca_with_dtype` does not possess the necessary exceptions shown in this
601    // function to use that. I have tried adding the exceptions there and it broke other things. I
602    // really do feel like this is the simplest solution.
603
604    let mut valid = true;
605    let capacity = avs.len();
606
607    let ca = match inner_type {
608        // AnyValues with empty lists in python can create
609        // Series of an unknown dtype.
610        // We use the anonymousbuilder without a dtype
611        // the empty arrays is then not added (we add an extra offset instead)
612        // the next non-empty series then must have the correct dtype.
613        DataType::Null => {
614            let mut builder = AnonymousOwnedListBuilder::new(PlSmallStr::EMPTY, capacity, None);
615            for av in avs {
616                match av {
617                    AnyValue::List(b) => builder.append_series(b)?,
618                    AnyValue::Null => builder.append_null(),
619                    _ => {
620                        valid = false;
621                        builder.append_null();
622                    },
623                }
624            }
625            builder.finish()
626        },
627
628        #[cfg(feature = "object")]
629        DataType::Object(_, _) => polars_bail!(nyi = "Nested object types"),
630
631        _ => {
632            let list_inner_type = match inner_type {
633                // Categoricals may not have a revmap yet. We just give them an empty one here and
634                // the list builder takes care of the rest.
635                #[cfg(feature = "dtype-categorical")]
636                DataType::Categorical(None, ordering) => {
637                    DataType::Categorical(Some(Arc::new(RevMapping::default())), *ordering)
638                },
639
640                _ => inner_type.clone(),
641            };
642
643            let mut builder =
644                get_list_builder(&list_inner_type, capacity * 5, capacity, PlSmallStr::EMPTY);
645
646            for av in avs {
647                match av {
648                    AnyValue::List(b) => match b.cast(inner_type) {
649                        Ok(casted) => {
650                            if casted.null_count() != b.null_count() {
651                                valid = !strict;
652                            }
653                            builder.append_series(&casted)?;
654                        },
655                        Err(_) => {
656                            valid = false;
657                            for _ in 0..b.len() {
658                                builder.append_null();
659                            }
660                        },
661                    },
662                    AnyValue::Null => builder.append_null(),
663                    _ => {
664                        valid = false;
665                        builder.append_null()
666                    },
667                }
668            }
669
670            builder.finish()
671        },
672    };
673
674    if strict && !valid {
675        polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", DataType::List(Box::new(inner_type.clone())));
676    }
677
678    Ok(ca)
679}
680
681#[cfg(feature = "dtype-array")]
682fn any_values_to_array(
683    avs: &[AnyValue],
684    inner_type: &DataType,
685    strict: bool,
686    width: usize,
687) -> PolarsResult<ArrayChunked> {
688    fn to_arr(s: &Series) -> Option<ArrayRef> {
689        if s.chunks().len() > 1 {
690            let s = s.rechunk();
691            Some(s.chunks()[0].clone())
692        } else {
693            Some(s.chunks()[0].clone())
694        }
695    }
696
697    let target_dtype = DataType::Array(Box::new(inner_type.clone()), width);
698
699    // This is handled downstream. The builder will choose the first non null type.
700    let mut valid = true;
701    #[allow(unused_mut)]
702    let mut out: ArrayChunked = if inner_type == &DataType::Null {
703        avs.iter()
704            .map(|av| match av {
705                AnyValue::List(b) | AnyValue::Array(b, _) => to_arr(b),
706                AnyValue::Null => None,
707                _ => {
708                    valid = false;
709                    None
710                },
711            })
712            .collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
713    }
714    // Make sure that wrongly inferred AnyValues don't deviate from the datatype.
715    else {
716        avs.iter()
717            .map(|av| match av {
718                AnyValue::List(b) | AnyValue::Array(b, _) => {
719                    if b.dtype() == inner_type {
720                        to_arr(b)
721                    } else {
722                        let s = match b.cast(inner_type) {
723                            Ok(out) => out,
724                            Err(_) => Series::full_null(b.name().clone(), b.len(), inner_type),
725                        };
726                        to_arr(&s)
727                    }
728                },
729                AnyValue::Null => None,
730                _ => {
731                    valid = false;
732                    None
733                },
734            })
735            .collect_ca_with_dtype(PlSmallStr::EMPTY, target_dtype.clone())
736    };
737
738    if strict && !valid {
739        polars_bail!(SchemaMismatch: "unexpected value while building Series of type {:?}", target_dtype);
740    }
741    polars_ensure!(
742        out.width() == width,
743        SchemaMismatch: "got mixed size array widths where width {} was expected", width
744    );
745
746    // Ensure the logical type is correct for nested types.
747    #[cfg(feature = "dtype-struct")]
748    if !matches!(inner_type, DataType::Null) && out.inner_dtype().is_nested() {
749        unsafe {
750            out.set_dtype(target_dtype.clone());
751        };
752    }
753
754    Ok(out)
755}
756
757#[cfg(feature = "dtype-struct")]
758fn _any_values_to_struct<'a>(
759    av_fields: &[Field],
760    av_values: &[AnyValue<'a>],
761    field_index: usize,
762    field: &Field,
763    fields: &[Field],
764    field_avs: &mut Vec<AnyValue<'a>>,
765) {
766    // TODO: Optimize.
767
768    let mut append_by_search = || {
769        // Search for the name.
770        if let Some(i) = av_fields
771            .iter()
772            .position(|av_fld| av_fld.name == field.name)
773        {
774            field_avs.push(av_values[i].clone());
775            return;
776        }
777        field_avs.push(AnyValue::Null)
778    };
779
780    // All fields are available in this single value.
781    // We can use the index to get value.
782    if fields.len() == av_fields.len() {
783        if fields.iter().zip(av_fields.iter()).any(|(l, r)| l != r) {
784            append_by_search()
785        } else {
786            let av_val = av_values
787                .get(field_index)
788                .cloned()
789                .unwrap_or(AnyValue::Null);
790            field_avs.push(av_val)
791        }
792    }
793    // Not all fields are available, we search the proper field.
794    else {
795        // Search for the name.
796        append_by_search()
797    }
798}
799
800#[cfg(feature = "dtype-struct")]
801fn any_values_to_struct(
802    values: &[AnyValue],
803    fields: &[Field],
804    strict: bool,
805) -> PolarsResult<Series> {
806    // Fast path for structs with no fields.
807    if fields.is_empty() {
808        return Ok(
809            StructChunked::from_series(PlSmallStr::EMPTY, values.len(), [].iter())?.into_series(),
810        );
811    }
812
813    // The physical series fields of the struct.
814    let mut series_fields = Vec::with_capacity(fields.len());
815    let mut has_outer_validity = false;
816    let mut field_avs = Vec::with_capacity(values.len());
817    for (i, field) in fields.iter().enumerate() {
818        field_avs.clear();
819
820        for av in values.iter() {
821            match av {
822                AnyValue::StructOwned(payload) => {
823                    let av_fields = &payload.1;
824                    let av_values = &payload.0;
825                    _any_values_to_struct(av_fields, av_values, i, field, fields, &mut field_avs);
826                },
827                AnyValue::Struct(_, _, av_fields) => {
828                    let av_values: Vec<_> = av._iter_struct_av().collect();
829                    _any_values_to_struct(av_fields, &av_values, i, field, fields, &mut field_avs);
830                },
831                _ => {
832                    has_outer_validity = true;
833                    field_avs.push(AnyValue::Null)
834                },
835            }
836        }
837        // If the inferred dtype is null, we let auto inference work.
838        let s = if matches!(field.dtype, DataType::Null) {
839            Series::from_any_values(field.name().clone(), &field_avs, strict)?
840        } else {
841            Series::from_any_values_and_dtype(
842                field.name().clone(),
843                &field_avs,
844                &field.dtype,
845                strict,
846            )?
847        };
848        series_fields.push(s)
849    }
850
851    let mut out =
852        StructChunked::from_series(PlSmallStr::EMPTY, values.len(), series_fields.iter())?;
853    if has_outer_validity {
854        let mut validity = MutableBitmap::new();
855        validity.extend_constant(values.len(), true);
856        for (i, v) in values.iter().enumerate() {
857            if matches!(v, AnyValue::Null) {
858                unsafe { validity.set_unchecked(i, false) }
859            }
860        }
861        out.set_outer_validity(Some(validity.freeze()))
862    }
863    Ok(out.into_series())
864}
865
866#[cfg(feature = "object")]
867fn any_values_to_object(
868    values: &[AnyValue],
869    registry: &Option<Arc<ObjectRegistry>>,
870) -> PolarsResult<Series> {
871    let mut builder = match registry {
872        None => {
873            use crate::chunked_array::object::registry;
874            let converter = registry::get_object_converter();
875            let mut builder = registry::get_object_builder(PlSmallStr::EMPTY, values.len());
876            for av in values {
877                match av {
878                    AnyValue::Object(val) => builder.append_value(val.as_any()),
879                    AnyValue::Null => builder.append_null(),
880                    _ => {
881                        // This is needed because in Python users can send mixed types.
882                        // This only works if you set a global converter.
883                        let any = converter(av.as_borrowed());
884                        builder.append_value(&*any)
885                    },
886                }
887            }
888            builder
889        },
890        Some(registry) => {
891            let mut builder = (*registry.builder_constructor)(PlSmallStr::EMPTY, values.len());
892            for av in values {
893                match av {
894                    AnyValue::Object(val) => builder.append_value(val.as_any()),
895                    AnyValue::ObjectOwned(val) => builder.append_value(val.0.as_any()),
896                    AnyValue::Null => builder.append_null(),
897                    _ => {
898                        polars_bail!(SchemaMismatch: "expected object");
899                    },
900                }
901            }
902            builder
903        },
904    };
905
906    Ok(builder.to_series())
907}
908
909fn invalid_value_error(dtype: &DataType, value: &AnyValue) -> PolarsError {
910    polars_err!(
911        SchemaMismatch:
912        "unexpected value while building Series of type {:?}; found value of type {:?}: {}",
913        dtype,
914        value.dtype(),
915        value
916    )
917}