polars_compute/cast/
primitive_to.rs

1use std::hash::Hash;
2
3use arrow::array::*;
4use arrow::bitmap::Bitmap;
5use arrow::compute::arity::unary;
6use arrow::datatypes::{ArrowDataType, TimeUnit};
7use arrow::offset::{Offset, Offsets};
8use arrow::types::{f16, NativeType};
9use num_traits::{AsPrimitive, Float, ToPrimitive};
10use polars_error::PolarsResult;
11use polars_utils::pl_str::PlSmallStr;
12
13use super::temporal::*;
14use super::CastOptionsImpl;
15
16pub trait SerPrimitive {
17    fn write(f: &mut Vec<u8>, val: Self) -> usize
18    where
19        Self: Sized;
20}
21
22macro_rules! impl_ser_primitive {
23    ($ptype:ident) => {
24        impl SerPrimitive for $ptype {
25            fn write(f: &mut Vec<u8>, val: Self) -> usize
26            where
27                Self: Sized,
28            {
29                let mut buffer = itoa::Buffer::new();
30                let value = buffer.format(val);
31                f.extend_from_slice(value.as_bytes());
32                value.len()
33            }
34        }
35    };
36}
37
38impl_ser_primitive!(i8);
39impl_ser_primitive!(i16);
40impl_ser_primitive!(i32);
41impl_ser_primitive!(i64);
42impl_ser_primitive!(u8);
43impl_ser_primitive!(u16);
44impl_ser_primitive!(u32);
45impl_ser_primitive!(u64);
46
47impl SerPrimitive for f32 {
48    fn write(f: &mut Vec<u8>, val: Self) -> usize
49    where
50        Self: Sized,
51    {
52        let mut buffer = ryu::Buffer::new();
53        let value = buffer.format(val);
54        f.extend_from_slice(value.as_bytes());
55        value.len()
56    }
57}
58
59impl SerPrimitive for f64 {
60    fn write(f: &mut Vec<u8>, val: Self) -> usize
61    where
62        Self: Sized,
63    {
64        let mut buffer = ryu::Buffer::new();
65        let value = buffer.format(val);
66        f.extend_from_slice(value.as_bytes());
67        value.len()
68    }
69}
70
71fn primitive_to_values_and_offsets<T: NativeType + SerPrimitive, O: Offset>(
72    from: &PrimitiveArray<T>,
73) -> (Vec<u8>, Offsets<O>) {
74    let mut values: Vec<u8> = Vec::with_capacity(from.len());
75    let mut offsets: Vec<O> = Vec::with_capacity(from.len() + 1);
76    offsets.push(O::default());
77
78    let mut offset: usize = 0;
79
80    unsafe {
81        for &x in from.values().iter() {
82            let len = T::write(&mut values, x);
83
84            offset += len;
85            offsets.push(O::from_as_usize(offset));
86        }
87        values.set_len(offset);
88        values.shrink_to_fit();
89        // SAFETY: offsets _are_ monotonically increasing
90        let offsets = Offsets::new_unchecked(offsets);
91
92        (values, offsets)
93    }
94}
95
96/// Returns a [`BooleanArray`] where every element is different from zero.
97/// Validity is preserved.
98pub fn primitive_to_boolean<T: NativeType>(
99    from: &PrimitiveArray<T>,
100    to_type: ArrowDataType,
101) -> BooleanArray {
102    let iter = from.values().iter().map(|v| *v != T::default());
103    let values = Bitmap::from_trusted_len_iter(iter);
104
105    BooleanArray::new(to_type, values, from.validity().cloned())
106}
107
108pub(super) fn primitive_to_boolean_dyn<T>(
109    from: &dyn Array,
110    to_type: ArrowDataType,
111) -> PolarsResult<Box<dyn Array>>
112where
113    T: NativeType,
114{
115    let from = from.as_any().downcast_ref().unwrap();
116    Ok(Box::new(primitive_to_boolean::<T>(from, to_type)))
117}
118
119/// Returns a [`Utf8Array`] where every element is the utf8 representation of the number.
120pub(super) fn primitive_to_utf8<T: NativeType + SerPrimitive, O: Offset>(
121    from: &PrimitiveArray<T>,
122) -> Utf8Array<O> {
123    let (values, offsets) = primitive_to_values_and_offsets(from);
124    unsafe {
125        Utf8Array::<O>::new_unchecked(
126            Utf8Array::<O>::default_dtype(),
127            offsets.into(),
128            values.into(),
129            from.validity().cloned(),
130        )
131    }
132}
133
134pub(super) fn primitive_to_utf8_dyn<T, O>(from: &dyn Array) -> PolarsResult<Box<dyn Array>>
135where
136    O: Offset,
137    T: NativeType + SerPrimitive,
138{
139    let from = from.as_any().downcast_ref().unwrap();
140    Ok(Box::new(primitive_to_utf8::<T, O>(from)))
141}
142
143pub(super) fn primitive_to_primitive_dyn<I, O>(
144    from: &dyn Array,
145    to_type: &ArrowDataType,
146    options: CastOptionsImpl,
147) -> PolarsResult<Box<dyn Array>>
148where
149    I: NativeType + num_traits::NumCast + num_traits::AsPrimitive<O>,
150    O: NativeType + num_traits::NumCast,
151{
152    let from = from.as_any().downcast_ref::<PrimitiveArray<I>>().unwrap();
153    if options.wrapped {
154        Ok(Box::new(primitive_as_primitive::<I, O>(from, to_type)))
155    } else {
156        Ok(Box::new(primitive_to_primitive::<I, O>(from, to_type)))
157    }
158}
159
160/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of another physical type via numeric conversion.
161pub fn primitive_to_primitive<I, O>(
162    from: &PrimitiveArray<I>,
163    to_type: &ArrowDataType,
164) -> PrimitiveArray<O>
165where
166    I: NativeType + num_traits::NumCast,
167    O: NativeType + num_traits::NumCast,
168{
169    let iter = from
170        .iter()
171        .map(|v| v.and_then(|x| num_traits::cast::cast::<I, O>(*x)));
172    PrimitiveArray::<O>::from_trusted_len_iter(iter).to(to_type.clone())
173}
174
175/// Returns a [`PrimitiveArray<i128>`] with the cast values. Values are `None` on overflow
176pub fn integer_to_decimal<T: NativeType + AsPrimitive<i128>>(
177    from: &PrimitiveArray<T>,
178    to_precision: usize,
179    to_scale: usize,
180) -> PrimitiveArray<i128> {
181    let multiplier = 10_i128.pow(to_scale as u32);
182
183    let min_for_precision = 9_i128
184        .saturating_pow(1 + to_precision as u32)
185        .saturating_neg();
186    let max_for_precision = 9_i128.saturating_pow(1 + to_precision as u32);
187
188    let values = from.iter().map(|x| {
189        x.and_then(|x| {
190            x.as_().checked_mul(multiplier).and_then(|x| {
191                if x > max_for_precision || x < min_for_precision {
192                    None
193                } else {
194                    Some(x)
195                }
196            })
197        })
198    });
199
200    PrimitiveArray::<i128>::from_trusted_len_iter(values)
201        .to(ArrowDataType::Decimal(to_precision, to_scale))
202}
203
204pub(super) fn integer_to_decimal_dyn<T>(
205    from: &dyn Array,
206    precision: usize,
207    scale: usize,
208) -> PolarsResult<Box<dyn Array>>
209where
210    T: NativeType + AsPrimitive<i128>,
211{
212    let from = from.as_any().downcast_ref().unwrap();
213    Ok(Box::new(integer_to_decimal::<T>(from, precision, scale)))
214}
215
216/// Returns a [`PrimitiveArray<i128>`] with the cast values. Values are `None` on overflow
217pub fn float_to_decimal<T>(
218    from: &PrimitiveArray<T>,
219    to_precision: usize,
220    to_scale: usize,
221) -> PrimitiveArray<i128>
222where
223    T: NativeType + Float + ToPrimitive,
224    f64: AsPrimitive<T>,
225{
226    // 1.2 => 12
227    let multiplier: T = (10_f64).powi(to_scale as i32).as_();
228
229    let min_for_precision = 9_i128
230        .saturating_pow(1 + to_precision as u32)
231        .saturating_neg();
232    let max_for_precision = 9_i128.saturating_pow(1 + to_precision as u32);
233
234    let values = from.iter().map(|x| {
235        x.and_then(|x| {
236            let x = (*x * multiplier).to_i128()?;
237            if x > max_for_precision || x < min_for_precision {
238                None
239            } else {
240                Some(x)
241            }
242        })
243    });
244
245    PrimitiveArray::<i128>::from_trusted_len_iter(values)
246        .to(ArrowDataType::Decimal(to_precision, to_scale))
247}
248
249pub(super) fn float_to_decimal_dyn<T>(
250    from: &dyn Array,
251    precision: usize,
252    scale: usize,
253) -> PolarsResult<Box<dyn Array>>
254where
255    T: NativeType + Float + ToPrimitive,
256    f64: AsPrimitive<T>,
257{
258    let from = from.as_any().downcast_ref().unwrap();
259    Ok(Box::new(float_to_decimal::<T>(from, precision, scale)))
260}
261
262/// Cast [`PrimitiveArray`] as a [`PrimitiveArray`]
263/// Same as `number as to_number_type` in rust
264pub fn primitive_as_primitive<I, O>(
265    from: &PrimitiveArray<I>,
266    to_type: &ArrowDataType,
267) -> PrimitiveArray<O>
268where
269    I: NativeType + num_traits::AsPrimitive<O>,
270    O: NativeType,
271{
272    unary(from, num_traits::AsPrimitive::<O>::as_, to_type.clone())
273}
274
275/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of the same physical type.
276/// This is O(1).
277pub fn primitive_to_same_primitive<T>(
278    from: &PrimitiveArray<T>,
279    to_type: &ArrowDataType,
280) -> PrimitiveArray<T>
281where
282    T: NativeType,
283{
284    PrimitiveArray::<T>::new(
285        to_type.clone(),
286        from.values().clone(),
287        from.validity().cloned(),
288    )
289}
290
291/// Cast [`PrimitiveArray`] to a [`PrimitiveArray`] of the same physical type.
292/// This is O(1).
293pub(super) fn primitive_to_same_primitive_dyn<T>(
294    from: &dyn Array,
295    to_type: &ArrowDataType,
296) -> PolarsResult<Box<dyn Array>>
297where
298    T: NativeType,
299{
300    let from = from.as_any().downcast_ref().unwrap();
301    Ok(Box::new(primitive_to_same_primitive::<T>(from, to_type)))
302}
303
304pub(super) fn primitive_to_dictionary_dyn<T: NativeType + Eq + Hash, K: DictionaryKey>(
305    from: &dyn Array,
306) -> PolarsResult<Box<dyn Array>> {
307    let from = from.as_any().downcast_ref().unwrap();
308    primitive_to_dictionary::<T, K>(from).map(|x| Box::new(x) as Box<dyn Array>)
309}
310
311/// Cast [`PrimitiveArray`] to [`DictionaryArray`]. Also known as packing.
312/// # Errors
313/// This function errors if the maximum key is smaller than the number of distinct elements
314/// in the array.
315pub fn primitive_to_dictionary<T: NativeType + Eq + Hash, K: DictionaryKey>(
316    from: &PrimitiveArray<T>,
317) -> PolarsResult<DictionaryArray<K>> {
318    let iter = from.iter().map(|x| x.copied());
319    let mut array = MutableDictionaryArray::<K, _>::try_empty(MutablePrimitiveArray::<T>::from(
320        from.dtype().clone(),
321    ))?;
322    array.reserve(from.len());
323    array.try_extend(iter)?;
324
325    Ok(array.into())
326}
327
328/// # Safety
329///
330/// `dtype` should be valid for primitive.
331pub unsafe fn primitive_map_is_valid<T: NativeType>(
332    from: &PrimitiveArray<T>,
333    f: impl Fn(T) -> bool,
334    dtype: ArrowDataType,
335) -> PrimitiveArray<T> {
336    let values = from.values().clone();
337
338    let validity: Bitmap = values.iter().map(|&v| f(v)).collect();
339
340    let validity = if validity.unset_bits() > 0 {
341        let new_validity = match from.validity() {
342            None => validity,
343            Some(v) => v & &validity,
344        };
345
346        Some(new_validity)
347    } else {
348        from.validity().cloned()
349    };
350
351    // SAFETY:
352    // - Validity did not change length
353    // - dtype should be valid
354    unsafe { PrimitiveArray::new_unchecked(dtype, values, validity) }
355}
356
357/// Conversion of `Int32` to `Time32(TimeUnit::Second)`
358pub fn int32_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
359    // SAFETY: Time32(TimeUnit::Second) is valid for Int32
360    unsafe {
361        primitive_map_is_valid(
362            from,
363            |v| (0..SECONDS_IN_DAY as i32).contains(&v),
364            ArrowDataType::Time32(TimeUnit::Second),
365        )
366    }
367}
368
369/// Conversion of `Int32` to `Time32(TimeUnit::Millisecond)`
370pub fn int32_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
371    // SAFETY: Time32(TimeUnit::Millisecond) is valid for Int32
372    unsafe {
373        primitive_map_is_valid(
374            from,
375            |v| (0..MILLISECONDS_IN_DAY as i32).contains(&v),
376            ArrowDataType::Time32(TimeUnit::Millisecond),
377        )
378    }
379}
380
381/// Conversion of `Int64` to `Time32(TimeUnit::Microsecond)`
382pub fn int64_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
383    // SAFETY: Time64(TimeUnit::Microsecond) is valid for Int64
384    unsafe {
385        primitive_map_is_valid(
386            from,
387            |v| (0..MICROSECONDS_IN_DAY).contains(&v),
388            ArrowDataType::Time32(TimeUnit::Microsecond),
389        )
390    }
391}
392
393/// Conversion of `Int64` to `Time32(TimeUnit::Nanosecond)`
394pub fn int64_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
395    // SAFETY: Time64(TimeUnit::Nanosecond) is valid for Int64
396    unsafe {
397        primitive_map_is_valid(
398            from,
399            |v| (0..NANOSECONDS_IN_DAY).contains(&v),
400            ArrowDataType::Time64(TimeUnit::Nanosecond),
401        )
402    }
403}
404
405/// Conversion of dates
406pub fn date32_to_date64(from: &PrimitiveArray<i32>) -> PrimitiveArray<i64> {
407    unary(
408        from,
409        |x| x as i64 * MILLISECONDS_IN_DAY,
410        ArrowDataType::Date64,
411    )
412}
413
414/// Conversion of dates
415pub fn date64_to_date32(from: &PrimitiveArray<i64>) -> PrimitiveArray<i32> {
416    unary(
417        from,
418        |x| (x / MILLISECONDS_IN_DAY) as i32,
419        ArrowDataType::Date32,
420    )
421}
422
423/// Conversion of times
424pub fn time32s_to_time32ms(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
425    unary(
426        from,
427        |x| x * 1000,
428        ArrowDataType::Time32(TimeUnit::Millisecond),
429    )
430}
431
432/// Conversion of times
433pub fn time32ms_to_time32s(from: &PrimitiveArray<i32>) -> PrimitiveArray<i32> {
434    unary(from, |x| x / 1000, ArrowDataType::Time32(TimeUnit::Second))
435}
436
437/// Conversion of times
438pub fn time64us_to_time64ns(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
439    unary(
440        from,
441        |x| x * 1000,
442        ArrowDataType::Time64(TimeUnit::Nanosecond),
443    )
444}
445
446/// Conversion of times
447pub fn time64ns_to_time64us(from: &PrimitiveArray<i64>) -> PrimitiveArray<i64> {
448    unary(
449        from,
450        |x| x / 1000,
451        ArrowDataType::Time64(TimeUnit::Microsecond),
452    )
453}
454
455/// Conversion of timestamp
456pub fn timestamp_to_date64(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i64> {
457    let from_size = time_unit_multiple(from_unit);
458    let to_size = MILLISECONDS;
459    let to_type = ArrowDataType::Date64;
460
461    // Scale time_array by (to_size / from_size) using a
462    // single integer operation, but need to avoid integer
463    // math rounding down to zero
464
465    match to_size.cmp(&from_size) {
466        std::cmp::Ordering::Less => unary(from, |x| (x / (from_size / to_size)), to_type),
467        std::cmp::Ordering::Equal => primitive_to_same_primitive(from, &to_type),
468        std::cmp::Ordering::Greater => unary(from, |x| (x * (to_size / from_size)), to_type),
469    }
470}
471
472/// Conversion of timestamp
473pub fn timestamp_to_date32(from: &PrimitiveArray<i64>, from_unit: TimeUnit) -> PrimitiveArray<i32> {
474    let from_size = time_unit_multiple(from_unit) * SECONDS_IN_DAY;
475    unary(from, |x| (x / from_size) as i32, ArrowDataType::Date32)
476}
477
478/// Conversion of time
479pub fn time32_to_time64(
480    from: &PrimitiveArray<i32>,
481    from_unit: TimeUnit,
482    to_unit: TimeUnit,
483) -> PrimitiveArray<i64> {
484    let from_size = time_unit_multiple(from_unit);
485    let to_size = time_unit_multiple(to_unit);
486    let divisor = to_size / from_size;
487    unary(
488        from,
489        |x| (x as i64 * divisor),
490        ArrowDataType::Time64(to_unit),
491    )
492}
493
494/// Conversion of time
495pub fn time64_to_time32(
496    from: &PrimitiveArray<i64>,
497    from_unit: TimeUnit,
498    to_unit: TimeUnit,
499) -> PrimitiveArray<i32> {
500    let from_size = time_unit_multiple(from_unit);
501    let to_size = time_unit_multiple(to_unit);
502    let divisor = from_size / to_size;
503    unary(
504        from,
505        |x| (x / divisor) as i32,
506        ArrowDataType::Time32(to_unit),
507    )
508}
509
510/// Conversion of timestamp
511pub fn timestamp_to_timestamp(
512    from: &PrimitiveArray<i64>,
513    from_unit: TimeUnit,
514    to_unit: TimeUnit,
515    tz: &Option<PlSmallStr>,
516) -> PrimitiveArray<i64> {
517    let from_size = time_unit_multiple(from_unit);
518    let to_size = time_unit_multiple(to_unit);
519    let to_type = ArrowDataType::Timestamp(to_unit, tz.clone());
520    // we either divide or multiply, depending on size of each unit
521    if from_size >= to_size {
522        unary(from, |x| (x / (from_size / to_size)), to_type)
523    } else {
524        unary(from, |x| (x * (to_size / from_size)), to_type)
525    }
526}
527
528/// Casts f16 into f32
529pub fn f16_to_f32(from: &PrimitiveArray<f16>) -> PrimitiveArray<f32> {
530    unary(from, |x| x.to_f32(), ArrowDataType::Float32)
531}
532
533/// Returns a [`Utf8Array`] where every element is the utf8 representation of the number.
534pub(super) fn primitive_to_binview<T: NativeType + SerPrimitive>(
535    from: &PrimitiveArray<T>,
536) -> BinaryViewArray {
537    let mut mutable = MutableBinaryViewArray::with_capacity(from.len());
538
539    let mut scratch = vec![];
540    for &x in from.values().iter() {
541        unsafe { scratch.set_len(0) };
542        T::write(&mut scratch, x);
543        mutable.push_value_ignore_validity(&scratch)
544    }
545
546    mutable.freeze().with_validity(from.validity().cloned())
547}
548
549pub(super) fn primitive_to_binview_dyn<T>(from: &dyn Array) -> BinaryViewArray
550where
551    T: NativeType + SerPrimitive,
552{
553    let from = from.as_any().downcast_ref().unwrap();
554    primitive_to_binview::<T>(from)
555}