polars_arrow/array/fixed_size_binary/
mod.rs

1use super::{Array, Splitable};
2use crate::bitmap::Bitmap;
3use crate::buffer::Buffer;
4use crate::datatypes::ArrowDataType;
5
6mod ffi;
7pub(super) mod fmt;
8mod iterator;
9mod mutable;
10pub use mutable::*;
11use polars_error::{polars_bail, polars_ensure, PolarsResult};
12
13/// The Arrow's equivalent to an immutable `Vec<Option<[u8; size]>>`.
14/// Cloning and slicing this struct is `O(1)`.
15#[derive(Clone)]
16pub struct FixedSizeBinaryArray {
17    size: usize, // this is redundant with `dtype`, but useful to not have to deconstruct the dtype.
18    dtype: ArrowDataType,
19    values: Buffer<u8>,
20    validity: Option<Bitmap>,
21}
22
23impl FixedSizeBinaryArray {
24    /// Creates a new [`FixedSizeBinaryArray`].
25    ///
26    /// # Errors
27    /// This function returns an error iff:
28    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`]
29    /// * The length of `values` is not a multiple of `size` in `dtype`
30    /// * the validity's length is not equal to `values.len() / size`.
31    pub fn try_new(
32        dtype: ArrowDataType,
33        values: Buffer<u8>,
34        validity: Option<Bitmap>,
35    ) -> PolarsResult<Self> {
36        let size = Self::maybe_get_size(&dtype)?;
37
38        if values.len() % size != 0 {
39            polars_bail!(ComputeError:
40                "values (of len {}) must be a multiple of size ({}) in FixedSizeBinaryArray.",
41                values.len(),
42                size
43            )
44        }
45        let len = values.len() / size;
46
47        if validity
48            .as_ref()
49            .is_some_and(|validity| validity.len() != len)
50        {
51            polars_bail!(ComputeError: "validity mask length must be equal to the number of values divided by size")
52        }
53
54        Ok(Self {
55            size,
56            dtype,
57            values,
58            validity,
59        })
60    }
61
62    /// Creates a new [`FixedSizeBinaryArray`].
63    /// # Panics
64    /// This function panics iff:
65    /// * The `dtype`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeBinary`]
66    /// * The length of `values` is not a multiple of `size` in `dtype`
67    /// * the validity's length is not equal to `values.len() / size`.
68    pub fn new(dtype: ArrowDataType, values: Buffer<u8>, validity: Option<Bitmap>) -> Self {
69        Self::try_new(dtype, values, validity).unwrap()
70    }
71
72    /// Returns a new empty [`FixedSizeBinaryArray`].
73    pub fn new_empty(dtype: ArrowDataType) -> Self {
74        Self::new(dtype, Buffer::new(), None)
75    }
76
77    /// Returns a new null [`FixedSizeBinaryArray`].
78    pub fn new_null(dtype: ArrowDataType, length: usize) -> Self {
79        let size = Self::maybe_get_size(&dtype).unwrap();
80        Self::new(
81            dtype,
82            vec![0u8; length * size].into(),
83            Some(Bitmap::new_zeroed(length)),
84        )
85    }
86}
87
88// must use
89impl FixedSizeBinaryArray {
90    /// Slices this [`FixedSizeBinaryArray`].
91    /// # Implementation
92    /// This operation is `O(1)`.
93    /// # Panics
94    /// panics iff `offset + length > self.len()`
95    pub fn slice(&mut self, offset: usize, length: usize) {
96        assert!(
97            offset + length <= self.len(),
98            "the offset of the new Buffer cannot exceed the existing length"
99        );
100        unsafe { self.slice_unchecked(offset, length) }
101    }
102
103    /// Slices this [`FixedSizeBinaryArray`].
104    /// # Implementation
105    /// This operation is `O(1)`.
106    ///
107    /// # Safety
108    /// The caller must ensure that `offset + length <= self.len()`.
109    pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
110        self.validity = self
111            .validity
112            .take()
113            .map(|bitmap| bitmap.sliced_unchecked(offset, length))
114            .filter(|bitmap| bitmap.unset_bits() > 0);
115        self.values
116            .slice_unchecked(offset * self.size, length * self.size);
117    }
118
119    impl_sliced!();
120    impl_mut_validity!();
121    impl_into_array!();
122}
123
124// accessors
125impl FixedSizeBinaryArray {
126    /// Returns the length of this array
127    #[inline]
128    pub fn len(&self) -> usize {
129        self.values.len() / self.size
130    }
131
132    /// The optional validity.
133    #[inline]
134    pub fn validity(&self) -> Option<&Bitmap> {
135        self.validity.as_ref()
136    }
137
138    /// Returns the values allocated on this [`FixedSizeBinaryArray`].
139    pub fn values(&self) -> &Buffer<u8> {
140        &self.values
141    }
142
143    /// Returns value at position `i`.
144    /// # Panic
145    /// Panics iff `i >= self.len()`.
146    #[inline]
147    pub fn value(&self, i: usize) -> &[u8] {
148        assert!(i < self.len());
149        unsafe { self.value_unchecked(i) }
150    }
151
152    /// Returns the element at index `i` as &str
153    ///
154    /// # Safety
155    /// Assumes that the `i < self.len`.
156    #[inline]
157    pub unsafe fn value_unchecked(&self, i: usize) -> &[u8] {
158        // soundness: invariant of the function.
159        self.values
160            .get_unchecked(i * self.size..(i + 1) * self.size)
161    }
162
163    /// Returns the element at index `i` or `None` if it is null
164    /// # Panics
165    /// iff `i >= self.len()`
166    #[inline]
167    pub fn get(&self, i: usize) -> Option<&[u8]> {
168        if !self.is_null(i) {
169            // soundness: Array::is_null panics if i >= self.len
170            unsafe { Some(self.value_unchecked(i)) }
171        } else {
172            None
173        }
174    }
175
176    /// Returns a new [`FixedSizeBinaryArray`] with a different logical type.
177    /// This is `O(1)`.
178    /// # Panics
179    /// Panics iff the dtype is not supported for the physical type.
180    #[inline]
181    pub fn to(self, dtype: ArrowDataType) -> Self {
182        match (dtype.to_logical_type(), self.dtype().to_logical_type()) {
183            (ArrowDataType::FixedSizeBinary(size_a), ArrowDataType::FixedSizeBinary(size_b))
184                if size_a == size_b => {},
185            _ => panic!("Wrong DataType"),
186        }
187
188        Self {
189            size: self.size,
190            dtype,
191            values: self.values,
192            validity: self.validity,
193        }
194    }
195
196    /// Returns the size
197    pub fn size(&self) -> usize {
198        self.size
199    }
200}
201
202impl FixedSizeBinaryArray {
203    pub(crate) fn maybe_get_size(dtype: &ArrowDataType) -> PolarsResult<usize> {
204        match dtype.to_logical_type() {
205            ArrowDataType::FixedSizeBinary(size) => {
206                polars_ensure!(*size != 0, ComputeError: "FixedSizeBinaryArray expects a positive size");
207                Ok(*size)
208            },
209            other => {
210                polars_bail!(ComputeError: "FixedSizeBinaryArray expects DataType::FixedSizeBinary. found {other:?}")
211            },
212        }
213    }
214
215    pub fn get_size(dtype: &ArrowDataType) -> usize {
216        Self::maybe_get_size(dtype).unwrap()
217    }
218}
219
220impl Array for FixedSizeBinaryArray {
221    impl_common_array!();
222
223    fn validity(&self) -> Option<&Bitmap> {
224        self.validity.as_ref()
225    }
226
227    #[inline]
228    fn with_validity(&self, validity: Option<Bitmap>) -> Box<dyn Array> {
229        Box::new(self.clone().with_validity(validity))
230    }
231}
232
233impl Splitable for FixedSizeBinaryArray {
234    fn check_bound(&self, offset: usize) -> bool {
235        offset < self.len()
236    }
237
238    unsafe fn _split_at_unchecked(&self, offset: usize) -> (Self, Self) {
239        let (lhs_values, rhs_values) = unsafe { self.values.split_at_unchecked(offset) };
240        let (lhs_validity, rhs_validity) = unsafe { self.validity.split_at_unchecked(offset) };
241
242        let size = self.size;
243
244        (
245            Self {
246                dtype: self.dtype.clone(),
247                values: lhs_values,
248                validity: lhs_validity,
249                size,
250            },
251            Self {
252                dtype: self.dtype.clone(),
253                values: rhs_values,
254                validity: rhs_validity,
255                size,
256            },
257        )
258    }
259}
260
261impl FixedSizeBinaryArray {
262    /// Creates a [`FixedSizeBinaryArray`] from an fallible iterator of optional `[u8]`.
263    pub fn try_from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
264        iter: I,
265        size: usize,
266    ) -> PolarsResult<Self> {
267        MutableFixedSizeBinaryArray::try_from_iter(iter, size).map(|x| x.into())
268    }
269
270    /// Creates a [`FixedSizeBinaryArray`] from an iterator of optional `[u8]`.
271    pub fn from_iter<P: AsRef<[u8]>, I: IntoIterator<Item = Option<P>>>(
272        iter: I,
273        size: usize,
274    ) -> Self {
275        MutableFixedSizeBinaryArray::try_from_iter(iter, size)
276            .unwrap()
277            .into()
278    }
279
280    /// Creates a [`FixedSizeBinaryArray`] from a slice of arrays of bytes
281    pub fn from_slice<const N: usize, P: AsRef<[[u8; N]]>>(a: P) -> Self {
282        let values = a.as_ref().iter().flatten().copied().collect::<Vec<_>>();
283        Self::new(ArrowDataType::FixedSizeBinary(N), values.into(), None)
284    }
285
286    /// Creates a new [`FixedSizeBinaryArray`] from a slice of optional `[u8]`.
287    // Note: this can't be `impl From` because Rust does not allow double `AsRef` on it.
288    pub fn from<const N: usize, P: AsRef<[Option<[u8; N]>]>>(slice: P) -> Self {
289        MutableFixedSizeBinaryArray::from(slice).into()
290    }
291}