polars_compute/cast/
binview_to.rs

1use arrow::array::*;
2#[cfg(feature = "dtype-decimal")]
3use arrow::compute::decimal::deserialize_decimal;
4use arrow::datatypes::{ArrowDataType, TimeUnit};
5use arrow::offset::Offset;
6use arrow::types::NativeType;
7use chrono::Datelike;
8use polars_error::PolarsResult;
9
10use super::binary_to::Parse;
11use super::temporal::EPOCH_DAYS_FROM_CE;
12use super::CastOptionsImpl;
13
14pub(super) const RFC3339: &str = "%Y-%m-%dT%H:%M:%S%.f%:z";
15
16/// Cast [`BinaryViewArray`] to [`DictionaryArray`], also known as packing.
17/// # Errors
18/// This function errors if the maximum key is smaller than the number of distinct elements
19/// in the array.
20pub(super) fn binview_to_dictionary<K: DictionaryKey>(
21    from: &BinaryViewArray,
22) -> PolarsResult<DictionaryArray<K>> {
23    let mut array = MutableDictionaryArray::<K, MutableBinaryViewArray<[u8]>>::new();
24    array.reserve(from.len());
25    array.try_extend(from.iter())?;
26
27    Ok(array.into())
28}
29
30pub(super) fn utf8view_to_dictionary<K: DictionaryKey>(
31    from: &Utf8ViewArray,
32) -> PolarsResult<DictionaryArray<K>> {
33    let mut array = MutableDictionaryArray::<K, MutableBinaryViewArray<str>>::new();
34    array.reserve(from.len());
35    array.try_extend(from.iter())?;
36
37    Ok(array.into())
38}
39
40pub(super) fn view_to_binary<O: Offset>(array: &BinaryViewArray) -> BinaryArray<O> {
41    let len: usize = Array::len(array);
42    let mut mutable = MutableBinaryValuesArray::<O>::with_capacities(len, array.total_bytes_len());
43    for slice in array.values_iter() {
44        mutable.push(slice)
45    }
46    let out: BinaryArray<O> = mutable.into();
47    out.with_validity(array.validity().cloned())
48}
49
50pub fn utf8view_to_utf8<O: Offset>(array: &Utf8ViewArray) -> Utf8Array<O> {
51    let array = array.to_binview();
52    let out = view_to_binary::<O>(&array);
53
54    let dtype = Utf8Array::<O>::default_dtype();
55    unsafe {
56        Utf8Array::new_unchecked(
57            dtype,
58            out.offsets().clone(),
59            out.values().clone(),
60            out.validity().cloned(),
61        )
62    }
63}
64/// Casts a [`BinaryArray`] to a [`PrimitiveArray`], making any uncastable value a Null.
65pub(super) fn binview_to_primitive<T>(
66    from: &BinaryViewArray,
67    to: &ArrowDataType,
68) -> PrimitiveArray<T>
69where
70    T: NativeType + Parse,
71{
72    let iter = from.iter().map(|x| x.and_then::<T, _>(|x| T::parse(x)));
73
74    PrimitiveArray::<T>::from_trusted_len_iter(iter).to(to.clone())
75}
76
77pub(super) fn binview_to_primitive_dyn<T>(
78    from: &dyn Array,
79    to: &ArrowDataType,
80    options: CastOptionsImpl,
81) -> PolarsResult<Box<dyn Array>>
82where
83    T: NativeType + Parse,
84{
85    let from = from.as_any().downcast_ref().unwrap();
86    if options.partial {
87        unimplemented!()
88    } else {
89        Ok(Box::new(binview_to_primitive::<T>(from, to)))
90    }
91}
92
93#[cfg(feature = "dtype-decimal")]
94pub fn binview_to_decimal(
95    array: &BinaryViewArray,
96    precision: Option<usize>,
97    scale: usize,
98) -> PrimitiveArray<i128> {
99    let precision = precision.map(|p| p as u8);
100    PrimitiveArray::<i128>::from_trusted_len_iter(
101        array
102            .iter()
103            .map(|val| val.and_then(|val| deserialize_decimal(val, precision, scale as u8))),
104    )
105    .to(ArrowDataType::Decimal(
106        precision.unwrap_or(38).into(),
107        scale,
108    ))
109}
110
111pub(super) fn utf8view_to_naive_timestamp_dyn(
112    from: &dyn Array,
113    time_unit: TimeUnit,
114) -> PolarsResult<Box<dyn Array>> {
115    let from = from.as_any().downcast_ref().unwrap();
116    Ok(Box::new(utf8view_to_naive_timestamp(from, time_unit)))
117}
118
119/// [`super::temporal::utf8view_to_timestamp`] applied for RFC3339 formatting
120pub fn utf8view_to_naive_timestamp(
121    from: &Utf8ViewArray,
122    time_unit: TimeUnit,
123) -> PrimitiveArray<i64> {
124    super::temporal::utf8view_to_naive_timestamp(from, RFC3339, time_unit)
125}
126
127pub(super) fn utf8view_to_date32(from: &Utf8ViewArray) -> PrimitiveArray<i32> {
128    let iter = from.iter().map(|x| {
129        x.and_then(|x| {
130            x.parse::<chrono::NaiveDate>()
131                .ok()
132                .map(|x| x.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
133        })
134    });
135    PrimitiveArray::<i32>::from_trusted_len_iter(iter).to(ArrowDataType::Date32)
136}
137
138pub(super) fn utf8view_to_date32_dyn(from: &dyn Array) -> PolarsResult<Box<dyn Array>> {
139    let from = from.as_any().downcast_ref().unwrap();
140    Ok(Box::new(utf8view_to_date32(from)))
141}