polars_arrow/array/primitive/
mutable.rs

1use std::sync::Arc;
2
3use polars_error::PolarsResult;
4
5use super::{check, PrimitiveArray};
6use crate::array::physical_binary::extend_validity;
7use crate::array::{Array, MutableArray, TryExtend, TryExtendFromSelf, TryPush};
8use crate::bitmap::{Bitmap, MutableBitmap};
9use crate::datatypes::ArrowDataType;
10use crate::trusted_len::TrustedLen;
11use crate::types::NativeType;
12
13/// The Arrow's equivalent to `Vec<Option<T>>` where `T` is byte-size (e.g. `i32`).
14/// Converting a [`MutablePrimitiveArray`] into a [`PrimitiveArray`] is `O(1)`.
15#[derive(Debug, Clone)]
16pub struct MutablePrimitiveArray<T: NativeType> {
17    dtype: ArrowDataType,
18    values: Vec<T>,
19    validity: Option<MutableBitmap>,
20}
21
22impl<T: NativeType> From<MutablePrimitiveArray<T>> for PrimitiveArray<T> {
23    fn from(other: MutablePrimitiveArray<T>) -> Self {
24        let validity = other.validity.and_then(|x| {
25            let bitmap: Bitmap = x.into();
26            if bitmap.unset_bits() == 0 {
27                None
28            } else {
29                Some(bitmap)
30            }
31        });
32
33        PrimitiveArray::<T>::new(other.dtype, other.values.into(), validity)
34    }
35}
36
37impl<T: NativeType, P: AsRef<[Option<T>]>> From<P> for MutablePrimitiveArray<T> {
38    fn from(slice: P) -> Self {
39        Self::from_trusted_len_iter(slice.as_ref().iter().map(|x| x.as_ref()))
40    }
41}
42
43impl<T: NativeType> MutablePrimitiveArray<T> {
44    /// Creates a new empty [`MutablePrimitiveArray`].
45    pub fn new() -> Self {
46        Self::with_capacity(0)
47    }
48
49    /// Creates a new [`MutablePrimitiveArray`] with a capacity.
50    pub fn with_capacity(capacity: usize) -> Self {
51        Self::with_capacity_from(capacity, T::PRIMITIVE.into())
52    }
53
54    /// The canonical method to create a [`MutablePrimitiveArray`] out of its internal components.
55    /// # Implementation
56    /// This function is `O(1)`.
57    ///
58    /// # Errors
59    /// This function errors iff:
60    /// * The validity is not `None` and its length is different from `values`'s length
61    /// * The `dtype`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Primitive(T::PRIMITIVE)`]
62    pub fn try_new(
63        dtype: ArrowDataType,
64        values: Vec<T>,
65        validity: Option<MutableBitmap>,
66    ) -> PolarsResult<Self> {
67        check(&dtype, &values, validity.as_ref().map(|x| x.len()))?;
68        Ok(Self {
69            dtype,
70            values,
71            validity,
72        })
73    }
74
75    /// Extract the low-end APIs from the [`MutablePrimitiveArray`].
76    pub fn into_inner(self) -> (ArrowDataType, Vec<T>, Option<MutableBitmap>) {
77        (self.dtype, self.values, self.validity)
78    }
79
80    /// Applies a function `f` to the values of this array, cloning the values
81    /// iff they are being shared with others
82    ///
83    /// This is an API to use clone-on-write
84    /// # Implementation
85    /// This function is `O(f)` if the data is not being shared, and `O(N) + O(f)`
86    /// if it is being shared (since it results in a `O(N)` memcopy).
87    /// # Panics
88    /// This function panics iff `f` panics
89    pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
90        f(&mut self.values);
91    }
92}
93
94impl<T: NativeType> Default for MutablePrimitiveArray<T> {
95    fn default() -> Self {
96        Self::new()
97    }
98}
99
100impl<T: NativeType> From<ArrowDataType> for MutablePrimitiveArray<T> {
101    fn from(dtype: ArrowDataType) -> Self {
102        assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE));
103        Self {
104            dtype,
105            values: Vec::<T>::new(),
106            validity: None,
107        }
108    }
109}
110
111impl<T: NativeType> MutablePrimitiveArray<T> {
112    /// Creates a new [`MutablePrimitiveArray`] from a capacity and [`ArrowDataType`].
113    pub fn with_capacity_from(capacity: usize, dtype: ArrowDataType) -> Self {
114        assert!(dtype.to_physical_type().eq_primitive(T::PRIMITIVE));
115        Self {
116            dtype,
117            values: Vec::<T>::with_capacity(capacity),
118            validity: None,
119        }
120    }
121
122    /// Reserves `additional` entries.
123    pub fn reserve(&mut self, additional: usize) {
124        self.values.reserve(additional);
125        if let Some(x) = self.validity.as_mut() {
126            x.reserve(additional)
127        }
128    }
129
130    #[inline]
131    pub fn push_value(&mut self, value: T) {
132        self.values.push(value);
133        if let Some(validity) = &mut self.validity {
134            validity.push(true)
135        }
136    }
137
138    /// Adds a new value to the array.
139    #[inline]
140    pub fn push(&mut self, value: Option<T>) {
141        match value {
142            Some(value) => self.push_value(value),
143            None => {
144                self.values.push(T::default());
145                match &mut self.validity {
146                    Some(validity) => validity.push(false),
147                    None => {
148                        self.init_validity();
149                    },
150                }
151            },
152        }
153    }
154
155    /// Pop a value from the array.
156    /// Note if the values is empty, this method will return None.
157    pub fn pop(&mut self) -> Option<T> {
158        let value = self.values.pop()?;
159        self.validity
160            .as_mut()
161            .map(|x| x.pop()?.then(|| value))
162            .unwrap_or_else(|| Some(value))
163    }
164
165    /// Extends the [`MutablePrimitiveArray`] with a constant
166    #[inline]
167    pub fn extend_constant(&mut self, additional: usize, value: Option<T>) {
168        if let Some(value) = value {
169            self.values.resize(self.values.len() + additional, value);
170            if let Some(validity) = &mut self.validity {
171                validity.extend_constant(additional, true)
172            }
173        } else {
174            if let Some(validity) = &mut self.validity {
175                validity.extend_constant(additional, false)
176            } else {
177                let mut validity = MutableBitmap::with_capacity(self.values.capacity());
178                validity.extend_constant(self.len(), true);
179                validity.extend_constant(additional, false);
180                self.validity = Some(validity)
181            }
182            self.values
183                .resize(self.values.len() + additional, T::default());
184        }
185    }
186
187    /// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.
188    #[inline]
189    pub fn extend_trusted_len<P, I>(&mut self, iterator: I)
190    where
191        P: std::borrow::Borrow<T>,
192        I: TrustedLen<Item = Option<P>>,
193    {
194        unsafe { self.extend_trusted_len_unchecked(iterator) }
195    }
196
197    /// Extends the [`MutablePrimitiveArray`] from an iterator of trusted len.
198    ///
199    /// # Safety
200    /// The iterator must be trusted len.
201    #[inline]
202    pub unsafe fn extend_trusted_len_unchecked<P, I>(&mut self, iterator: I)
203    where
204        P: std::borrow::Borrow<T>,
205        I: Iterator<Item = Option<P>>,
206    {
207        if let Some(validity) = self.validity.as_mut() {
208            extend_trusted_len_unzip(iterator, validity, &mut self.values)
209        } else {
210            let mut validity = MutableBitmap::new();
211            validity.extend_constant(self.len(), true);
212            extend_trusted_len_unzip(iterator, &mut validity, &mut self.values);
213            self.validity = Some(validity);
214        }
215    }
216    /// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
217    /// This differs from `extend_trusted_len` which accepts in iterator of optional values.
218    #[inline]
219    pub fn extend_trusted_len_values<I>(&mut self, iterator: I)
220    where
221        I: TrustedLen<Item = T>,
222    {
223        unsafe { self.extend_values(iterator) }
224    }
225
226    /// Extends the [`MutablePrimitiveArray`] from an iterator of values of trusted len.
227    /// This differs from `extend_trusted_len_unchecked` which accepts in iterator of optional values.
228    ///
229    /// # Safety
230    /// The iterator must be trusted len.
231    #[inline]
232    pub fn extend_values<I>(&mut self, iterator: I)
233    where
234        I: Iterator<Item = T>,
235    {
236        self.values.extend(iterator);
237        self.update_all_valid();
238    }
239
240    #[inline]
241    /// Extends the [`MutablePrimitiveArray`] from a slice
242    pub fn extend_from_slice(&mut self, items: &[T]) {
243        self.values.extend_from_slice(items);
244        self.update_all_valid();
245    }
246
247    fn update_all_valid(&mut self) {
248        // get len before mutable borrow
249        let len = self.len();
250        if let Some(validity) = self.validity.as_mut() {
251            validity.extend_constant(len - validity.len(), true);
252        }
253    }
254
255    fn init_validity(&mut self) {
256        let mut validity = MutableBitmap::with_capacity(self.values.capacity());
257        validity.extend_constant(self.len(), true);
258        validity.set(self.len() - 1, false);
259        self.validity = Some(validity)
260    }
261
262    /// Changes the arrays' [`ArrowDataType`], returning a new [`MutablePrimitiveArray`].
263    /// Use to change the logical type without changing the corresponding physical Type.
264    /// # Implementation
265    /// This operation is `O(1)`.
266    #[inline]
267    pub fn to(self, dtype: ArrowDataType) -> Self {
268        Self::try_new(dtype, self.values, self.validity).unwrap()
269    }
270
271    /// Converts itself into an [`Array`].
272    pub fn into_arc(self) -> Arc<dyn Array> {
273        let a: PrimitiveArray<T> = self.into();
274        Arc::new(a)
275    }
276
277    /// Shrinks the capacity of the [`MutablePrimitiveArray`] to fit its current length.
278    pub fn shrink_to_fit(&mut self) {
279        self.values.shrink_to_fit();
280        if let Some(validity) = &mut self.validity {
281            validity.shrink_to_fit()
282        }
283    }
284
285    /// Returns the capacity of this [`MutablePrimitiveArray`].
286    pub fn capacity(&self) -> usize {
287        self.values.capacity()
288    }
289
290    pub fn freeze(self) -> PrimitiveArray<T> {
291        self.into()
292    }
293
294    /// Clears the array, removing all values.
295    ///
296    /// Note that this method has no effect on the allocated capacity
297    /// of the array.
298    pub fn clear(&mut self) {
299        self.values.clear();
300        self.validity = None;
301    }
302
303    /// Apply a function that temporarily freezes this `MutableArray` into a `PrimitiveArray`.
304    pub fn with_freeze<K, F: FnOnce(&PrimitiveArray<T>) -> K>(&mut self, f: F) -> K {
305        let mutable = std::mem::take(self);
306        let arr = mutable.freeze();
307        let out = f(&arr);
308        *self = arr.into_mut().right().unwrap();
309        out
310    }
311}
312
313/// Accessors
314impl<T: NativeType> MutablePrimitiveArray<T> {
315    /// Returns its values.
316    pub fn values(&self) -> &Vec<T> {
317        &self.values
318    }
319
320    /// Returns a mutable slice of values.
321    pub fn values_mut_slice(&mut self) -> &mut [T] {
322        self.values.as_mut_slice()
323    }
324}
325
326/// Setters
327impl<T: NativeType> MutablePrimitiveArray<T> {
328    /// Sets position `index` to `value`.
329    /// Note that if it is the first time a null appears in this array,
330    /// this initializes the validity bitmap (`O(N)`).
331    /// # Panic
332    /// Panics iff `index >= self.len()`.
333    pub fn set(&mut self, index: usize, value: Option<T>) {
334        assert!(index < self.len());
335        // SAFETY:
336        // we just checked bounds
337        unsafe { self.set_unchecked(index, value) }
338    }
339
340    /// Sets position `index` to `value`.
341    /// Note that if it is the first time a null appears in this array,
342    /// this initializes the validity bitmap (`O(N)`).
343    ///
344    /// # Safety
345    /// Caller must ensure `index < self.len()`
346    pub unsafe fn set_unchecked(&mut self, index: usize, value: Option<T>) {
347        *self.values.get_unchecked_mut(index) = value.unwrap_or_default();
348
349        if value.is_none() && self.validity.is_none() {
350            // When the validity is None, all elements so far are valid. When one of the elements is set of null,
351            // the validity must be initialized.
352            let mut validity = MutableBitmap::new();
353            validity.extend_constant(self.len(), true);
354            self.validity = Some(validity);
355        }
356        if let Some(x) = self.validity.as_mut() {
357            x.set_unchecked(index, value.is_some())
358        }
359    }
360
361    /// Sets the validity.
362    /// # Panic
363    /// Panics iff the validity's len is not equal to the existing values' length.
364    pub fn set_validity(&mut self, validity: Option<MutableBitmap>) {
365        if let Some(validity) = &validity {
366            assert_eq!(self.values.len(), validity.len())
367        }
368        self.validity = validity;
369    }
370
371    /// Sets values.
372    /// # Panic
373    /// Panics iff the values' length is not equal to the existing values' len.
374    pub fn set_values(&mut self, values: Vec<T>) {
375        assert_eq!(values.len(), self.values.len());
376        self.values = values;
377    }
378}
379
380impl<T: NativeType> Extend<Option<T>> for MutablePrimitiveArray<T> {
381    fn extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) {
382        let iter = iter.into_iter();
383        self.reserve(iter.size_hint().0);
384        iter.for_each(|x| self.push(x))
385    }
386}
387
388impl<T: NativeType> TryExtend<Option<T>> for MutablePrimitiveArray<T> {
389    /// This is infallible and is implemented for consistency with all other types
390    fn try_extend<I: IntoIterator<Item = Option<T>>>(&mut self, iter: I) -> PolarsResult<()> {
391        self.extend(iter);
392        Ok(())
393    }
394}
395
396impl<T: NativeType> TryPush<Option<T>> for MutablePrimitiveArray<T> {
397    /// This is infalible and is implemented for consistency with all other types
398    #[inline]
399    fn try_push(&mut self, item: Option<T>) -> PolarsResult<()> {
400        self.push(item);
401        Ok(())
402    }
403}
404
405impl<T: NativeType> MutableArray for MutablePrimitiveArray<T> {
406    fn len(&self) -> usize {
407        self.values.len()
408    }
409
410    fn validity(&self) -> Option<&MutableBitmap> {
411        self.validity.as_ref()
412    }
413
414    fn as_box(&mut self) -> Box<dyn Array> {
415        PrimitiveArray::new(
416            self.dtype.clone(),
417            std::mem::take(&mut self.values).into(),
418            std::mem::take(&mut self.validity).map(|x| x.into()),
419        )
420        .boxed()
421    }
422
423    fn as_arc(&mut self) -> Arc<dyn Array> {
424        PrimitiveArray::new(
425            self.dtype.clone(),
426            std::mem::take(&mut self.values).into(),
427            std::mem::take(&mut self.validity).map(|x| x.into()),
428        )
429        .arced()
430    }
431
432    fn dtype(&self) -> &ArrowDataType {
433        &self.dtype
434    }
435
436    fn as_any(&self) -> &dyn std::any::Any {
437        self
438    }
439
440    fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
441        self
442    }
443
444    fn push_null(&mut self) {
445        self.push(None)
446    }
447
448    fn reserve(&mut self, additional: usize) {
449        self.reserve(additional)
450    }
451
452    fn shrink_to_fit(&mut self) {
453        self.shrink_to_fit()
454    }
455}
456
457impl<T: NativeType> MutablePrimitiveArray<T> {
458    /// Creates a [`MutablePrimitiveArray`] from a slice of values.
459    pub fn from_slice<P: AsRef<[T]>>(slice: P) -> Self {
460        Self::from_trusted_len_values_iter(slice.as_ref().iter().copied())
461    }
462
463    /// Creates a [`MutablePrimitiveArray`] from an iterator of trusted length.
464    ///
465    /// # Safety
466    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
467    /// I.e. `size_hint().1` correctly reports its length.
468    #[inline]
469    pub unsafe fn from_trusted_len_iter_unchecked<I, P>(iterator: I) -> Self
470    where
471        P: std::borrow::Borrow<T>,
472        I: Iterator<Item = Option<P>>,
473    {
474        let (validity, values) = trusted_len_unzip(iterator);
475
476        Self {
477            dtype: T::PRIMITIVE.into(),
478            values,
479            validity,
480        }
481    }
482
483    /// Creates a [`MutablePrimitiveArray`] from a [`TrustedLen`].
484    #[inline]
485    pub fn from_trusted_len_iter<I, P>(iterator: I) -> Self
486    where
487        P: std::borrow::Borrow<T>,
488        I: TrustedLen<Item = Option<P>>,
489    {
490        unsafe { Self::from_trusted_len_iter_unchecked(iterator) }
491    }
492
493    /// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.
494    ///
495    /// # Safety
496    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
497    /// I.e. that `size_hint().1` correctly reports its length.
498    #[inline]
499    pub unsafe fn try_from_trusted_len_iter_unchecked<E, I, P>(
500        iter: I,
501    ) -> std::result::Result<Self, E>
502    where
503        P: std::borrow::Borrow<T>,
504        I: IntoIterator<Item = std::result::Result<Option<P>, E>>,
505    {
506        let iterator = iter.into_iter();
507
508        let (validity, values) = try_trusted_len_unzip(iterator)?;
509
510        Ok(Self {
511            dtype: T::PRIMITIVE.into(),
512            values,
513            validity,
514        })
515    }
516
517    /// Creates a [`MutablePrimitiveArray`] from an fallible iterator of trusted length.
518    #[inline]
519    pub fn try_from_trusted_len_iter<E, I, P>(iterator: I) -> std::result::Result<Self, E>
520    where
521        P: std::borrow::Borrow<T>,
522        I: TrustedLen<Item = std::result::Result<Option<P>, E>>,
523    {
524        unsafe { Self::try_from_trusted_len_iter_unchecked(iterator) }
525    }
526
527    /// Creates a new [`MutablePrimitiveArray`] out an iterator over values
528    pub fn from_trusted_len_values_iter<I: TrustedLen<Item = T>>(iter: I) -> Self {
529        Self {
530            dtype: T::PRIMITIVE.into(),
531            values: iter.collect(),
532            validity: None,
533        }
534    }
535
536    /// Creates a (non-null) [`MutablePrimitiveArray`] from a vector of values.
537    /// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
538    pub fn from_vec(values: Vec<T>) -> Self {
539        Self::try_new(T::PRIMITIVE.into(), values, None).unwrap()
540    }
541
542    /// Creates a new [`MutablePrimitiveArray`] from an iterator over values
543    ///
544    /// # Safety
545    /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
546    /// I.e. that `size_hint().1` correctly reports its length.
547    pub unsafe fn from_trusted_len_values_iter_unchecked<I: Iterator<Item = T>>(iter: I) -> Self {
548        Self {
549            dtype: T::PRIMITIVE.into(),
550            values: iter.collect(),
551            validity: None,
552        }
553    }
554}
555
556impl<T: NativeType, Ptr: std::borrow::Borrow<Option<T>>> FromIterator<Ptr>
557    for MutablePrimitiveArray<T>
558{
559    fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
560        let iter = iter.into_iter();
561        let (lower, _) = iter.size_hint();
562
563        let mut validity = MutableBitmap::with_capacity(lower);
564
565        let values: Vec<T> = iter
566            .map(|item| {
567                if let Some(a) = item.borrow() {
568                    validity.push(true);
569                    *a
570                } else {
571                    validity.push(false);
572                    T::default()
573                }
574            })
575            .collect();
576
577        let validity = Some(validity);
578
579        Self {
580            dtype: T::PRIMITIVE.into(),
581            values,
582            validity,
583        }
584    }
585}
586
587/// Extends a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.
588/// The first buffer corresponds to a bitmap buffer, the second one
589/// corresponds to a values buffer.
590/// # Safety
591/// The caller must ensure that `iterator` is `TrustedLen`.
592#[inline]
593pub(crate) unsafe fn extend_trusted_len_unzip<I, P, T>(
594    iterator: I,
595    validity: &mut MutableBitmap,
596    buffer: &mut Vec<T>,
597) where
598    T: NativeType,
599    P: std::borrow::Borrow<T>,
600    I: Iterator<Item = Option<P>>,
601{
602    let (_, upper) = iterator.size_hint();
603    let additional = upper.expect("trusted_len_unzip requires an upper limit");
604
605    validity.reserve(additional);
606    let values = iterator.map(|item| {
607        if let Some(item) = item {
608            validity.push_unchecked(true);
609            *item.borrow()
610        } else {
611            validity.push_unchecked(false);
612            T::default()
613        }
614    });
615    buffer.extend(values);
616}
617
618/// Creates a [`MutableBitmap`] and a [`Vec`] from an iterator of `Option`.
619/// The first buffer corresponds to a bitmap buffer, the second one
620/// corresponds to a values buffer.
621/// # Safety
622/// The caller must ensure that `iterator` is `TrustedLen`.
623#[inline]
624pub(crate) unsafe fn trusted_len_unzip<I, P, T>(iterator: I) -> (Option<MutableBitmap>, Vec<T>)
625where
626    T: NativeType,
627    P: std::borrow::Borrow<T>,
628    I: Iterator<Item = Option<P>>,
629{
630    let mut validity = MutableBitmap::new();
631    let mut buffer = Vec::<T>::new();
632
633    extend_trusted_len_unzip(iterator, &mut validity, &mut buffer);
634
635    let validity = Some(validity);
636
637    (validity, buffer)
638}
639
640/// # Safety
641/// The caller must ensure that `iterator` is `TrustedLen`.
642#[inline]
643pub(crate) unsafe fn try_trusted_len_unzip<E, I, P, T>(
644    iterator: I,
645) -> std::result::Result<(Option<MutableBitmap>, Vec<T>), E>
646where
647    T: NativeType,
648    P: std::borrow::Borrow<T>,
649    I: Iterator<Item = std::result::Result<Option<P>, E>>,
650{
651    let (_, upper) = iterator.size_hint();
652    let len = upper.expect("trusted_len_unzip requires an upper limit");
653
654    let mut null = MutableBitmap::with_capacity(len);
655    let mut buffer = Vec::<T>::with_capacity(len);
656
657    let mut dst = buffer.as_mut_ptr();
658    for item in iterator {
659        let item = if let Some(item) = item? {
660            null.push(true);
661            *item.borrow()
662        } else {
663            null.push(false);
664            T::default()
665        };
666        std::ptr::write(dst, item);
667        dst = dst.add(1);
668    }
669    assert_eq!(
670        dst.offset_from(buffer.as_ptr()) as usize,
671        len,
672        "Trusted iterator length was not accurately reported"
673    );
674    buffer.set_len(len);
675    null.set_len(len);
676
677    let validity = Some(null);
678
679    Ok((validity, buffer))
680}
681
682impl<T: NativeType> PartialEq for MutablePrimitiveArray<T> {
683    fn eq(&self, other: &Self) -> bool {
684        self.iter().eq(other.iter())
685    }
686}
687
688impl<T: NativeType> TryExtendFromSelf for MutablePrimitiveArray<T> {
689    fn try_extend_from_self(&mut self, other: &Self) -> PolarsResult<()> {
690        extend_validity(self.len(), &mut self.validity, &other.validity);
691
692        let slice = other.values.as_slice();
693        self.values.extend_from_slice(slice);
694        Ok(())
695    }
696}