polars_compute/cast/
binary_to.rs1use std::sync::Arc;
2
3use arrow::array::*;
4use arrow::buffer::Buffer;
5use arrow::datatypes::ArrowDataType;
6use arrow::offset::{Offset, Offsets};
7use arrow::types::NativeType;
8use polars_error::PolarsResult;
9
10use super::CastOptionsImpl;
11
12pub(super) trait Parse {
13 fn parse(val: &[u8]) -> Option<Self>
14 where
15 Self: Sized;
16}
17
18macro_rules! impl_parse {
19 ($primitive_type:ident) => {
20 impl Parse for $primitive_type {
21 fn parse(val: &[u8]) -> Option<Self> {
22 atoi_simd::parse_skipped(val).ok()
23 }
24 }
25 };
26}
27impl_parse!(i8);
28impl_parse!(i16);
29impl_parse!(i32);
30impl_parse!(i64);
31
32impl_parse!(u8);
33impl_parse!(u16);
34impl_parse!(u32);
35impl_parse!(u64);
36
37#[cfg(feature = "dtype-i128")]
38impl_parse!(i128);
39
40impl Parse for f32 {
41 fn parse(val: &[u8]) -> Option<Self>
42 where
43 Self: Sized,
44 {
45 fast_float2::parse(val).ok()
46 }
47}
48impl Parse for f64 {
49 fn parse(val: &[u8]) -> Option<Self>
50 where
51 Self: Sized,
52 {
53 fast_float2::parse(val).ok()
54 }
55}
56
57pub fn binary_to_large_binary(
59 from: &BinaryArray<i32>,
60 to_dtype: ArrowDataType,
61) -> BinaryArray<i64> {
62 let values = from.values().clone();
63 BinaryArray::<i64>::new(
64 to_dtype,
65 from.offsets().into(),
66 values,
67 from.validity().cloned(),
68 )
69}
70
71pub fn binary_large_to_binary(
73 from: &BinaryArray<i64>,
74 to_dtype: ArrowDataType,
75) -> PolarsResult<BinaryArray<i32>> {
76 let values = from.values().clone();
77 let offsets = from.offsets().try_into()?;
78 Ok(BinaryArray::<i32>::new(
79 to_dtype,
80 offsets,
81 values,
82 from.validity().cloned(),
83 ))
84}
85
86pub fn binary_to_utf8<O: Offset>(
88 from: &BinaryArray<O>,
89 to_dtype: ArrowDataType,
90) -> PolarsResult<Utf8Array<O>> {
91 Utf8Array::<O>::try_new(
92 to_dtype,
93 from.offsets().clone(),
94 from.values().clone(),
95 from.validity().cloned(),
96 )
97}
98
99pub(super) fn binary_to_primitive<O: Offset, T>(
101 from: &BinaryArray<O>,
102 to: &ArrowDataType,
103) -> PrimitiveArray<T>
104where
105 T: NativeType + Parse,
106{
107 let iter = from.iter().map(|x| x.and_then::<T, _>(|x| T::parse(x)));
108
109 PrimitiveArray::<T>::from_trusted_len_iter(iter).to(to.clone())
110}
111
112pub(super) fn binary_to_primitive_dyn<O: Offset, T>(
113 from: &dyn Array,
114 to: &ArrowDataType,
115 options: CastOptionsImpl,
116) -> PolarsResult<Box<dyn Array>>
117where
118 T: NativeType + Parse,
119{
120 let from = from.as_any().downcast_ref().unwrap();
121 if options.partial {
122 unimplemented!()
123 } else {
124 Ok(Box::new(binary_to_primitive::<O, T>(from, to)))
125 }
126}
127
128pub fn binary_to_dictionary<O: Offset, K: DictionaryKey>(
133 from: &BinaryArray<O>,
134) -> PolarsResult<DictionaryArray<K>> {
135 let mut array = MutableDictionaryArray::<K, MutableBinaryArray<O>>::new();
136 array.reserve(from.len());
137 array.try_extend(from.iter())?;
138
139 Ok(array.into())
140}
141
142pub(super) fn binary_to_dictionary_dyn<O: Offset, K: DictionaryKey>(
143 from: &dyn Array,
144) -> PolarsResult<Box<dyn Array>> {
145 let values = from.as_any().downcast_ref().unwrap();
146 binary_to_dictionary::<O, K>(values).map(|x| Box::new(x) as Box<dyn Array>)
147}
148
149fn fixed_size_to_offsets<O: Offset>(values_len: usize, fixed_size: usize) -> Offsets<O> {
150 let offsets = (0..(values_len + 1))
151 .step_by(fixed_size)
152 .map(|v| O::from_as_usize(v))
153 .collect();
154 unsafe { Offsets::new_unchecked(offsets) }
158}
159
160pub fn fixed_size_binary_binary<O: Offset>(
162 from: &FixedSizeBinaryArray,
163 to_dtype: ArrowDataType,
164) -> BinaryArray<O> {
165 let values = from.values().clone();
166 let offsets = fixed_size_to_offsets(values.len(), from.size());
167 BinaryArray::<O>::new(to_dtype, offsets.into(), values, from.validity().cloned())
168}
169
170pub fn fixed_size_binary_to_binview(from: &FixedSizeBinaryArray) -> BinaryViewArray {
171 let datatype = <[u8] as ViewType>::DATA_TYPE;
172
173 if from.size() <= View::MAX_INLINE_SIZE as usize {
175 let mut views = Vec::new();
185 View::extend_with_inlinable_strided(
186 &mut views,
187 from.values().as_slice(),
188 from.size() as u8,
189 );
190 let views = Buffer::from(views);
191 return BinaryViewArray::try_new(datatype, views, Arc::default(), from.validity().cloned())
192 .unwrap();
193 }
194
195 const MAX_BYTES_PER_BUFFER: usize = u32::MAX as usize;
196
197 let size = from.size();
198 let num_bytes = from.len() * size;
199 let num_buffers = num_bytes.div_ceil(MAX_BYTES_PER_BUFFER);
200 assert!(num_buffers < u32::MAX as usize);
201
202 let num_elements_per_buffer = MAX_BYTES_PER_BUFFER / size;
203 let split_point = num_elements_per_buffer * size;
205
206 let mut buffer = from.values().clone();
208 let mut buffers = Vec::with_capacity(num_buffers);
209
210 if let Some(num_buffers) = num_buffers.checked_sub(1) {
211 for _ in 0..num_buffers {
212 let slice;
213 (slice, buffer) = buffer.split_at(split_point);
214 buffers.push(slice);
215 }
216 buffers.push(buffer);
217 }
218
219 let mut iter = from.values_iter();
220 let iter = iter.by_ref();
221 let mut views = Vec::with_capacity(from.len());
222 for buffer_idx in 0..num_buffers {
223 views.extend(
224 iter.take(num_elements_per_buffer)
225 .enumerate()
226 .map(|(i, slice)| {
227 unsafe {
229 View::new_noninline_unchecked(slice, buffer_idx as u32, (i * size) as u32)
230 }
231 }),
232 );
233 }
234 let views = views.into();
235
236 BinaryViewArray::try_new(datatype, views, buffers.into(), from.validity().cloned()).unwrap()
237}
238
239pub fn binary_to_list<O: Offset>(from: &BinaryArray<O>, to_dtype: ArrowDataType) -> ListArray<O> {
241 let values = from.values().clone();
242 let values = PrimitiveArray::new(ArrowDataType::UInt8, values, None);
243 ListArray::<O>::new(
244 to_dtype,
245 from.offsets().clone(),
246 values.boxed(),
247 from.validity().cloned(),
248 )
249}