polars_arrow/array/binview/
mutable.rs

1use std::any::Any;
2use std::fmt::{Debug, Formatter};
3use std::ops::Deref;
4use std::sync::Arc;
5
6use hashbrown::hash_map::Entry;
7use polars_error::PolarsResult;
8use polars_utils::aliases::{InitHashMaps, PlHashMap};
9
10use crate::array::binview::iterator::MutableBinaryViewValueIter;
11use crate::array::binview::view::validate_utf8_only;
12use crate::array::binview::{BinaryViewArrayGeneric, ViewType};
13use crate::array::{Array, MutableArray, TryExtend, TryPush, View};
14use crate::bitmap::MutableBitmap;
15use crate::buffer::Buffer;
16use crate::datatypes::ArrowDataType;
17use crate::legacy::trusted_len::TrustedLenPush;
18use crate::trusted_len::TrustedLen;
19
20const DEFAULT_BLOCK_SIZE: usize = 8 * 1024;
21const MAX_EXP_BLOCK_SIZE: usize = 16 * 1024 * 1024;
22
23// Invariants:
24//
25// - Each view must point to a valid slice of a buffer
26// - `total_buffer_len` must be equal to `completed_buffers.iter().map(Vec::len).sum()`
27// - `total_bytes_len` must be equal to `views.iter().map(View::len).sum()`
28pub struct MutableBinaryViewArray<T: ViewType + ?Sized> {
29    pub(crate) views: Vec<View>,
30    pub(crate) completed_buffers: Vec<Buffer<u8>>,
31    pub(crate) in_progress_buffer: Vec<u8>,
32    pub(crate) validity: Option<MutableBitmap>,
33    pub(crate) phantom: std::marker::PhantomData<T>,
34    /// Total bytes length if we would concatenate them all.
35    pub(crate) total_bytes_len: usize,
36    /// Total bytes in the buffer (excluding remaining capacity)
37    pub(crate) total_buffer_len: usize,
38    /// Mapping from `Buffer::deref()` to index in `completed_buffers`.
39    /// Used in `push_view()`.
40    pub(crate) stolen_buffers: PlHashMap<usize, u32>,
41}
42
43impl<T: ViewType + ?Sized> Clone for MutableBinaryViewArray<T> {
44    fn clone(&self) -> Self {
45        Self {
46            views: self.views.clone(),
47            completed_buffers: self.completed_buffers.clone(),
48            in_progress_buffer: self.in_progress_buffer.clone(),
49            validity: self.validity.clone(),
50            phantom: Default::default(),
51            total_bytes_len: self.total_bytes_len,
52            total_buffer_len: self.total_buffer_len,
53            stolen_buffers: PlHashMap::new(),
54        }
55    }
56}
57
58impl<T: ViewType + ?Sized> Debug for MutableBinaryViewArray<T> {
59    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
60        write!(f, "mutable-binview{:?}", T::DATA_TYPE)
61    }
62}
63
64impl<T: ViewType + ?Sized> Default for MutableBinaryViewArray<T> {
65    fn default() -> Self {
66        Self::with_capacity(0)
67    }
68}
69
70impl<T: ViewType + ?Sized> From<MutableBinaryViewArray<T>> for BinaryViewArrayGeneric<T> {
71    fn from(mut value: MutableBinaryViewArray<T>) -> Self {
72        value.finish_in_progress();
73        unsafe {
74            Self::new_unchecked(
75                T::DATA_TYPE,
76                value.views.into(),
77                Arc::from(value.completed_buffers),
78                value.validity.map(|b| b.into()),
79                value.total_bytes_len,
80                value.total_buffer_len,
81            )
82        }
83    }
84}
85
86impl<T: ViewType + ?Sized> MutableBinaryViewArray<T> {
87    pub fn new() -> Self {
88        Self::default()
89    }
90
91    pub fn with_capacity(capacity: usize) -> Self {
92        Self {
93            views: Vec::with_capacity(capacity),
94            completed_buffers: vec![],
95            in_progress_buffer: vec![],
96            validity: None,
97            phantom: Default::default(),
98            total_buffer_len: 0,
99            total_bytes_len: 0,
100            stolen_buffers: PlHashMap::new(),
101        }
102    }
103
104    /// Get a mutable reference to the [`Vec`] of [`View`]s in this [`MutableBinaryViewArray`].
105    ///
106    /// # Safety
107    ///
108    /// This is safe as long as any mutation of the [`Vec`] does not break any invariants of the
109    /// [`MutableBinaryViewArray`] before it is read again.
110    #[inline]
111    pub unsafe fn views_mut(&mut self) -> &mut Vec<View> {
112        &mut self.views
113    }
114
115    /// Set the `total_bytes_len` of the [`MutableBinaryViewArray`]
116    ///
117    /// # Safety
118    ///
119    /// This should not break invariants of the [`MutableBinaryViewArray`]
120    #[inline]
121    pub unsafe fn set_total_bytes_len(&mut self, value: usize) {
122        #[cfg(debug_assertions)]
123        {
124            let actual_length: usize = self.views().iter().map(|v| v.length as usize).sum();
125            assert_eq!(value, actual_length);
126        }
127
128        self.total_bytes_len = value;
129    }
130
131    pub fn total_bytes_len(&self) -> usize {
132        self.total_bytes_len
133    }
134
135    pub fn total_buffer_len(&self) -> usize {
136        self.total_buffer_len
137    }
138
139    #[inline]
140    pub fn views(&self) -> &[View] {
141        &self.views
142    }
143
144    #[inline]
145    pub fn completed_buffers(&self) -> &[Buffer<u8>] {
146        &self.completed_buffers
147    }
148
149    pub fn validity(&mut self) -> Option<&mut MutableBitmap> {
150        self.validity.as_mut()
151    }
152
153    /// Reserves `additional` elements and `additional_buffer` on the buffer.
154    pub fn reserve(&mut self, additional: usize) {
155        self.views.reserve(additional);
156    }
157
158    #[inline]
159    pub fn len(&self) -> usize {
160        self.views.len()
161    }
162
163    #[inline]
164    pub fn capacity(&self) -> usize {
165        self.views.capacity()
166    }
167
168    fn init_validity(&mut self, unset_last: bool) {
169        let mut validity = MutableBitmap::with_capacity(self.views.capacity());
170        validity.extend_constant(self.len(), true);
171        if unset_last {
172            validity.set(self.len() - 1, false);
173        }
174        self.validity = Some(validity);
175    }
176
177    /// # Safety
178    /// - caller must allocate enough capacity
179    /// - caller must ensure the view and buffers match.
180    /// - The array must not have validity.
181    pub(crate) unsafe fn push_view_unchecked(&mut self, v: View, buffers: &[Buffer<u8>]) {
182        let len = v.length;
183        self.total_bytes_len += len as usize;
184        if len <= 12 {
185            debug_assert!(self.views.capacity() > self.views.len());
186            self.views.push_unchecked(v)
187        } else {
188            self.total_buffer_len += len as usize;
189            let data = buffers.get_unchecked(v.buffer_idx as usize);
190            let offset = v.offset as usize;
191            let bytes = data.get_unchecked(offset..offset + len as usize);
192            let t = T::from_bytes_unchecked(bytes);
193            self.push_value_ignore_validity(t)
194        }
195    }
196
197    /// # Safety
198    /// - caller must allocate enough capacity
199    /// - caller must ensure the view and buffers match.
200    /// - The array must not have validity.
201    /// - caller must not mix use this function with other push functions.
202    pub unsafe fn push_view_unchecked_dedupe(&mut self, mut v: View, buffers: &[Buffer<u8>]) {
203        let len = v.length;
204        self.total_bytes_len += len as usize;
205        if len <= 12 {
206            self.views.push_unchecked(v);
207        } else {
208            let buffer = buffers.get_unchecked(v.buffer_idx as usize);
209            let idx = match self.stolen_buffers.entry(buffer.deref().as_ptr() as usize) {
210                Entry::Occupied(entry) => *entry.get(),
211                Entry::Vacant(entry) => {
212                    let idx = self.completed_buffers.len() as u32;
213                    entry.insert(idx);
214                    self.completed_buffers.push(buffer.clone());
215                    self.total_buffer_len += buffer.len();
216                    idx
217                },
218            };
219            v.buffer_idx = idx;
220            self.views.push_unchecked(v);
221        }
222    }
223
224    pub fn push_view(&mut self, mut v: View, buffers: &[Buffer<u8>]) {
225        let len = v.length;
226        self.total_bytes_len += len as usize;
227        if len <= 12 {
228            self.views.push(v);
229        } else {
230            // Do no mix use of push_view and push_value_ignore_validity -
231            // it causes fragmentation.
232            self.finish_in_progress();
233
234            let buffer = &buffers[v.buffer_idx as usize];
235            let idx = match self.stolen_buffers.entry(buffer.deref().as_ptr() as usize) {
236                Entry::Occupied(entry) => {
237                    let idx = *entry.get();
238                    let target_buffer = &self.completed_buffers[idx as usize];
239                    debug_assert_eq!(buffer, target_buffer);
240                    idx
241                },
242                Entry::Vacant(entry) => {
243                    let idx = self.completed_buffers.len() as u32;
244                    entry.insert(idx);
245                    self.completed_buffers.push(buffer.clone());
246                    self.total_buffer_len += buffer.len();
247                    idx
248                },
249            };
250            v.buffer_idx = idx;
251            self.views.push(v);
252        }
253        if let Some(validity) = &mut self.validity {
254            validity.push(true)
255        }
256    }
257
258    #[inline]
259    pub fn push_value_ignore_validity<V: AsRef<T>>(&mut self, value: V) {
260        let bytes = value.as_ref().to_bytes();
261        self.total_bytes_len += bytes.len();
262
263        // A string can only be maximum of 4GB in size.
264        let len = u32::try_from(bytes.len()).unwrap();
265
266        let view = if len <= View::MAX_INLINE_SIZE {
267            View::new_inline(bytes)
268        } else {
269            self.total_buffer_len += bytes.len();
270
271            // We want to make sure that we never have to memcopy between buffers. So if the
272            // current buffer is not large enough, create a new buffer that is large enough and try
273            // to anticipate the larger size.
274            let required_capacity = self.in_progress_buffer.len() + bytes.len();
275            let does_not_fit_in_buffer = self.in_progress_buffer.capacity() < required_capacity;
276
277            // We can only save offsets that are below u32::MAX
278            let offset_will_not_fit = self.in_progress_buffer.len() > u32::MAX as usize;
279
280            if does_not_fit_in_buffer || offset_will_not_fit {
281                // Allocate a new buffer and flush the old buffer
282                let new_capacity = (self.in_progress_buffer.capacity() * 2)
283                    .clamp(DEFAULT_BLOCK_SIZE, MAX_EXP_BLOCK_SIZE)
284                    .max(bytes.len());
285                let in_progress = Vec::with_capacity(new_capacity);
286                let flushed = std::mem::replace(&mut self.in_progress_buffer, in_progress);
287                if !flushed.is_empty() {
288                    self.completed_buffers.push(flushed.into())
289                }
290            }
291
292            let offset = self.in_progress_buffer.len() as u32;
293            self.in_progress_buffer.extend_from_slice(bytes);
294
295            let buffer_idx = u32::try_from(self.completed_buffers.len()).unwrap();
296
297            View::new_from_bytes(bytes, buffer_idx, offset)
298        };
299
300        self.views.push(view);
301    }
302
303    #[inline]
304    pub fn push_buffer(&mut self, buffer: Buffer<u8>) -> u32 {
305        self.finish_in_progress();
306
307        let buffer_idx = self.completed_buffers.len();
308        self.total_buffer_len += buffer.len();
309        self.completed_buffers.push(buffer);
310        buffer_idx as u32
311    }
312
313    #[inline]
314    pub fn push_value<V: AsRef<T>>(&mut self, value: V) {
315        if let Some(validity) = &mut self.validity {
316            validity.push(true)
317        }
318        self.push_value_ignore_validity(value)
319    }
320
321    #[inline]
322    pub fn push<V: AsRef<T>>(&mut self, value: Option<V>) {
323        if let Some(value) = value {
324            self.push_value(value)
325        } else {
326            self.push_null()
327        }
328    }
329
330    #[inline]
331    pub fn push_null(&mut self) {
332        self.views.push(View::default());
333        match &mut self.validity {
334            Some(validity) => validity.push(false),
335            None => self.init_validity(true),
336        }
337    }
338
339    pub fn extend_null(&mut self, additional: usize) {
340        if self.validity.is_none() && additional > 0 {
341            self.init_validity(false);
342        }
343        self.views
344            .extend(std::iter::repeat(View::default()).take(additional));
345        if let Some(validity) = &mut self.validity {
346            validity.extend_constant(additional, false);
347        }
348    }
349
350    pub fn extend_constant<V: AsRef<T>>(&mut self, additional: usize, value: Option<V>) {
351        if value.is_none() && self.validity.is_none() {
352            self.init_validity(false);
353        }
354
355        if let Some(validity) = &mut self.validity {
356            validity.extend_constant(additional, value.is_some())
357        }
358
359        // Push and pop to get the properly encoded value.
360        // For long string this leads to a dictionary encoding,
361        // as we push the string only once in the buffers
362        let view_value = value
363            .map(|v| {
364                self.push_value_ignore_validity(v);
365                self.views.pop().unwrap()
366            })
367            .unwrap_or_default();
368        self.views
369            .extend(std::iter::repeat(view_value).take(additional));
370    }
371
372    impl_mutable_array_mut_validity!();
373
374    #[inline]
375    pub fn extend_values<I, P>(&mut self, iterator: I)
376    where
377        I: Iterator<Item = P>,
378        P: AsRef<T>,
379    {
380        self.reserve(iterator.size_hint().0);
381        for v in iterator {
382            self.push_value(v)
383        }
384    }
385
386    #[inline]
387    pub fn extend_trusted_len_values<I, P>(&mut self, iterator: I)
388    where
389        I: TrustedLen<Item = P>,
390        P: AsRef<T>,
391    {
392        self.extend_values(iterator)
393    }
394
395    #[inline]
396    pub fn extend<I, P>(&mut self, iterator: I)
397    where
398        I: Iterator<Item = Option<P>>,
399        P: AsRef<T>,
400    {
401        self.reserve(iterator.size_hint().0);
402        for p in iterator {
403            self.push(p)
404        }
405    }
406
407    #[inline]
408    pub fn extend_trusted_len<I, P>(&mut self, iterator: I)
409    where
410        I: TrustedLen<Item = Option<P>>,
411        P: AsRef<T>,
412    {
413        self.extend(iterator)
414    }
415
416    #[inline]
417    pub fn extend_views<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
418    where
419        I: Iterator<Item = Option<View>>,
420    {
421        self.reserve(iterator.size_hint().0);
422        for p in iterator {
423            match p {
424                Some(v) => self.push_view(v, buffers),
425                None => self.push_null(),
426            }
427        }
428    }
429
430    #[inline]
431    pub fn extend_views_trusted_len<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
432    where
433        I: TrustedLen<Item = Option<View>>,
434    {
435        self.extend_views(iterator, buffers);
436    }
437
438    #[inline]
439    pub fn extend_non_null_views<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
440    where
441        I: Iterator<Item = View>,
442    {
443        self.reserve(iterator.size_hint().0);
444        for v in iterator {
445            self.push_view(v, buffers);
446        }
447    }
448
449    #[inline]
450    pub fn extend_non_null_views_trusted_len<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
451    where
452        I: TrustedLen<Item = View>,
453    {
454        self.extend_non_null_views(iterator, buffers);
455    }
456
457    /// # Safety
458    /// Same as `push_view_unchecked()`.
459    #[inline]
460    pub unsafe fn extend_non_null_views_unchecked<I>(&mut self, iterator: I, buffers: &[Buffer<u8>])
461    where
462        I: Iterator<Item = View>,
463    {
464        self.reserve(iterator.size_hint().0);
465        for v in iterator {
466            self.push_view_unchecked(v, buffers);
467        }
468    }
469
470    /// # Safety
471    /// Same as `push_view_unchecked()`.
472    #[inline]
473    pub unsafe fn extend_non_null_views_unchecked_dedupe<I>(
474        &mut self,
475        iterator: I,
476        buffers: &[Buffer<u8>],
477    ) where
478        I: Iterator<Item = View>,
479    {
480        self.reserve(iterator.size_hint().0);
481        for v in iterator {
482            self.push_view_unchecked_dedupe(v, buffers);
483        }
484    }
485
486    #[inline]
487    pub fn from_iterator<I, P>(iterator: I) -> Self
488    where
489        I: Iterator<Item = Option<P>>,
490        P: AsRef<T>,
491    {
492        let mut mutable = Self::with_capacity(iterator.size_hint().0);
493        mutable.extend(iterator);
494        mutable
495    }
496
497    pub fn from_values_iter<I, P>(iterator: I) -> Self
498    where
499        I: Iterator<Item = P>,
500        P: AsRef<T>,
501    {
502        let mut mutable = Self::with_capacity(iterator.size_hint().0);
503        mutable.extend_values(iterator);
504        mutable
505    }
506
507    pub fn from<S: AsRef<T>, P: AsRef<[Option<S>]>>(slice: P) -> Self {
508        Self::from_iterator(slice.as_ref().iter().map(|opt_v| opt_v.as_ref()))
509    }
510
511    pub fn finish_in_progress(&mut self) -> bool {
512        if !self.in_progress_buffer.is_empty() {
513            self.completed_buffers
514                .push(std::mem::take(&mut self.in_progress_buffer).into());
515            true
516        } else {
517            false
518        }
519    }
520
521    #[inline]
522    pub fn freeze(self) -> BinaryViewArrayGeneric<T> {
523        self.into()
524    }
525
526    #[inline]
527    pub fn freeze_with_dtype(self, dtype: ArrowDataType) -> BinaryViewArrayGeneric<T> {
528        let mut arr: BinaryViewArrayGeneric<T> = self.into();
529        arr.dtype = dtype;
530        arr
531    }
532
533    pub fn take(self) -> (Vec<View>, Vec<Buffer<u8>>) {
534        (self.views, self.completed_buffers)
535    }
536
537    #[inline]
538    pub fn value(&self, i: usize) -> &T {
539        assert!(i < self.len());
540        unsafe { self.value_unchecked(i) }
541    }
542
543    /// Returns the element at index `i`
544    ///
545    /// # Safety
546    /// Assumes that the `i < self.len`.
547    #[inline]
548    pub unsafe fn value_unchecked(&self, i: usize) -> &T {
549        self.value_from_view_unchecked(self.views.get_unchecked(i))
550    }
551
552    /// Returns the element indicated by the given view.
553    ///
554    /// # Safety
555    /// Assumes the View belongs to this MutableBinaryViewArray.
556    pub unsafe fn value_from_view_unchecked<'a>(&'a self, view: &'a View) -> &'a T {
557        // View layout:
558        // length: 4 bytes
559        // prefix: 4 bytes
560        // buffer_index: 4 bytes
561        // offset: 4 bytes
562
563        // Inlined layout:
564        // length: 4 bytes
565        // data: 12 bytes
566        let len = view.length;
567        let bytes = if len <= 12 {
568            let ptr = view as *const View as *const u8;
569            std::slice::from_raw_parts(ptr.add(4), len as usize)
570        } else {
571            let buffer_idx = view.buffer_idx as usize;
572            let offset = view.offset;
573
574            let data = if buffer_idx == self.completed_buffers.len() {
575                self.in_progress_buffer.as_slice()
576            } else {
577                self.completed_buffers.get_unchecked(buffer_idx)
578            };
579
580            let offset = offset as usize;
581            data.get_unchecked(offset..offset + len as usize)
582        };
583        T::from_bytes_unchecked(bytes)
584    }
585
586    /// Returns an iterator of `&[u8]` over every element of this array, ignoring the validity
587    pub fn values_iter(&self) -> MutableBinaryViewValueIter<T> {
588        MutableBinaryViewValueIter::new(self)
589    }
590}
591
592impl MutableBinaryViewArray<[u8]> {
593    pub fn validate_utf8(&mut self, buffer_offset: usize, views_offset: usize) -> PolarsResult<()> {
594        // Finish the in progress as it might be required for validation.
595        let pushed = self.finish_in_progress();
596        // views are correct
597        unsafe {
598            validate_utf8_only(
599                &self.views[views_offset..],
600                &self.completed_buffers[buffer_offset..],
601                &self.completed_buffers,
602            )?
603        }
604        // Restore in-progress buffer as we don't want to get too small buffers
605        if pushed {
606            if let Some(last) = self.completed_buffers.pop() {
607                self.in_progress_buffer = last.into_mut().right().unwrap();
608            }
609        }
610        Ok(())
611    }
612
613    /// Extend from a `buffer` and `length` of items given some statistics about the lengths.
614    ///
615    /// This will attempt to dispatch to several optimized implementations.
616    ///
617    /// # Safety
618    ///
619    /// This is safe if the statistics are correct.
620    pub unsafe fn extend_from_lengths_with_stats(
621        &mut self,
622        buffer: &[u8],
623        lengths_iterator: impl Clone + ExactSizeIterator<Item = usize>,
624        min_length: usize,
625        max_length: usize,
626        sum_length: usize,
627    ) {
628        let num_items = lengths_iterator.len();
629
630        if num_items == 0 {
631            return;
632        }
633
634        #[cfg(debug_assertions)]
635        {
636            let (min, max, sum) = lengths_iterator.clone().map(|v| (v, v, v)).fold(
637                (usize::MAX, usize::MIN, 0usize),
638                |(cmin, cmax, csum), (emin, emax, esum)| {
639                    (cmin.min(emin), cmax.max(emax), csum + esum)
640                },
641            );
642
643            assert_eq!(min, min_length);
644            assert_eq!(max, max_length);
645            assert_eq!(sum, sum_length);
646        }
647
648        assert!(sum_length <= buffer.len());
649
650        let mut buffer_offset = 0;
651        if min_length > View::MAX_INLINE_SIZE as usize
652            && (num_items == 1 || sum_length + self.in_progress_buffer.len() <= u32::MAX as usize)
653        {
654            let buffer_idx = self.completed_buffers().len() as u32;
655            let in_progress_buffer_offset = self.in_progress_buffer.len();
656
657            self.total_bytes_len += sum_length;
658            self.total_buffer_len += sum_length;
659
660            self.in_progress_buffer
661                .extend_from_slice(&buffer[..sum_length]);
662            self.views.extend(lengths_iterator.map(|length| {
663                // SAFETY: We asserted before that the sum of all lengths is smaller or equal to
664                // the buffer length.
665                let view_buffer =
666                    unsafe { buffer.get_unchecked(buffer_offset..buffer_offset + length) };
667
668                // SAFETY: We know that the minimum length > View::MAX_INLINE_SIZE. Therefore, this
669                // length is > View::MAX_INLINE_SIZE.
670                let view = unsafe {
671                    View::new_noninline_unchecked(
672                        view_buffer,
673                        buffer_idx,
674                        (buffer_offset + in_progress_buffer_offset) as u32,
675                    )
676                };
677                buffer_offset += length;
678                view
679            }));
680        } else if max_length <= View::MAX_INLINE_SIZE as usize {
681            self.total_bytes_len += sum_length;
682
683            // If the min and max are the same, we can dispatch to the optimized SIMD
684            // implementation.
685            if min_length == max_length {
686                let length = min_length;
687                if length == 0 {
688                    self.views
689                        .resize(self.views.len() + num_items, View::new_inline(&[]));
690                } else {
691                    View::extend_with_inlinable_strided(
692                        &mut self.views,
693                        &buffer[..length * num_items],
694                        length as u8,
695                    );
696                }
697            } else {
698                self.views.extend(lengths_iterator.map(|length| {
699                    // SAFETY: We asserted before that the sum of all lengths is smaller or equal
700                    // to the buffer length.
701                    let view_buffer =
702                        unsafe { buffer.get_unchecked(buffer_offset..buffer_offset + length) };
703
704                    // SAFETY: We know that each view has a length <= View::MAX_INLINE_SIZE because
705                    // the maximum length is <= View::MAX_INLINE_SIZE
706                    let view = unsafe { View::new_inline_unchecked(view_buffer) };
707
708                    buffer_offset += length;
709
710                    view
711                }));
712            }
713        } else {
714            // If all fails, just fall back to a base implementation.
715            self.reserve(num_items);
716            for length in lengths_iterator {
717                let value = &buffer[buffer_offset..buffer_offset + length];
718                buffer_offset += length;
719                self.push_value(value);
720            }
721        }
722    }
723
724    /// Extend from a `buffer` and `length` of items.
725    ///
726    /// This will attempt to dispatch to several optimized implementations.
727    #[inline]
728    pub fn extend_from_lengths(
729        &mut self,
730        buffer: &[u8],
731        lengths_iterator: impl Clone + ExactSizeIterator<Item = usize>,
732    ) {
733        let (min, max, sum) = lengths_iterator.clone().map(|v| (v, v, v)).fold(
734            (usize::MAX, usize::MIN, 0usize),
735            |(cmin, cmax, csum), (emin, emax, esum)| (cmin.min(emin), cmax.max(emax), csum + esum),
736        );
737
738        // SAFETY: We just collected the right stats.
739        unsafe { self.extend_from_lengths_with_stats(buffer, lengths_iterator, min, max, sum) }
740    }
741}
742
743impl<T: ViewType + ?Sized, P: AsRef<T>> Extend<Option<P>> for MutableBinaryViewArray<T> {
744    #[inline]
745    fn extend<I: IntoIterator<Item = Option<P>>>(&mut self, iter: I) {
746        Self::extend(self, iter.into_iter())
747    }
748}
749
750impl<T: ViewType + ?Sized, P: AsRef<T>> FromIterator<Option<P>> for MutableBinaryViewArray<T> {
751    #[inline]
752    fn from_iter<I: IntoIterator<Item = Option<P>>>(iter: I) -> Self {
753        Self::from_iterator(iter.into_iter())
754    }
755}
756
757impl<T: ViewType + ?Sized> MutableArray for MutableBinaryViewArray<T> {
758    fn dtype(&self) -> &ArrowDataType {
759        T::dtype()
760    }
761
762    fn len(&self) -> usize {
763        MutableBinaryViewArray::len(self)
764    }
765
766    fn validity(&self) -> Option<&MutableBitmap> {
767        self.validity.as_ref()
768    }
769
770    fn as_box(&mut self) -> Box<dyn Array> {
771        let mutable = std::mem::take(self);
772        let arr: BinaryViewArrayGeneric<T> = mutable.into();
773        arr.boxed()
774    }
775
776    fn as_any(&self) -> &dyn Any {
777        self
778    }
779
780    fn as_mut_any(&mut self) -> &mut dyn Any {
781        self
782    }
783
784    fn push_null(&mut self) {
785        MutableBinaryViewArray::push_null(self)
786    }
787
788    fn reserve(&mut self, additional: usize) {
789        MutableBinaryViewArray::reserve(self, additional)
790    }
791
792    fn shrink_to_fit(&mut self) {
793        self.views.shrink_to_fit()
794    }
795}
796
797impl<T: ViewType + ?Sized, P: AsRef<T>> TryExtend<Option<P>> for MutableBinaryViewArray<T> {
798    /// This is infallible and is implemented for consistency with all other types
799    #[inline]
800    fn try_extend<I: IntoIterator<Item = Option<P>>>(&mut self, iter: I) -> PolarsResult<()> {
801        self.extend(iter.into_iter());
802        Ok(())
803    }
804}
805
806impl<T: ViewType + ?Sized, P: AsRef<T>> TryPush<Option<P>> for MutableBinaryViewArray<T> {
807    /// This is infallible and is implemented for consistency with all other types
808    #[inline(always)]
809    fn try_push(&mut self, item: Option<P>) -> PolarsResult<()> {
810        self.push(item.as_ref().map(|p| p.as_ref()));
811        Ok(())
812    }
813}
814
815#[cfg(test)]
816mod tests {
817    use super::*;
818
819    fn roundtrip(values: &[&[u8]]) -> bool {
820        let buffer = values
821            .iter()
822            .flat_map(|v| v.iter().copied())
823            .collect::<Vec<u8>>();
824        let lengths = values.iter().map(|v| v.len()).collect::<Vec<usize>>();
825        let mut bv = MutableBinaryViewArray::<[u8]>::with_capacity(values.len());
826
827        bv.extend_from_lengths(&buffer[..], lengths.into_iter());
828
829        &bv.values_iter().collect::<Vec<&[u8]>>()[..] == values
830    }
831
832    #[test]
833    fn extend_with_lengths_basic() {
834        assert!(roundtrip(&[]));
835        assert!(roundtrip(&[b"abc"]));
836        assert!(roundtrip(&[
837            b"a_very_very_long_string_that_is_not_inlinable"
838        ]));
839        assert!(roundtrip(&[
840            b"abc",
841            b"a_very_very_long_string_that_is_not_inlinable"
842        ]));
843    }
844
845    #[test]
846    fn extend_with_inlinable_fastpath() {
847        assert!(roundtrip(&[b"abc", b"defg", b"hix"]));
848        assert!(roundtrip(&[b"abc", b"defg", b"hix", b"xyza1234abcd"]));
849    }
850
851    #[test]
852    fn extend_with_inlinable_eq_len_fastpath() {
853        assert!(roundtrip(&[b"abc", b"def", b"hix"]));
854        assert!(roundtrip(&[b"abc", b"def", b"hix", b"xyz"]));
855    }
856
857    #[test]
858    fn extend_with_not_inlinable_fastpath() {
859        assert!(roundtrip(&[
860            b"a_very_long_string123",
861            b"a_longer_string_than_the_previous"
862        ]));
863    }
864}