polars_utils/
vec.rs

1use std::mem::MaybeUninit;
2
3use num_traits::Zero;
4
5pub trait IntoRawParts<T> {
6    fn into_raw_parts(self) -> (*mut T, usize, usize);
7
8    // doesn't take ownership
9    fn raw_parts(&self) -> (*mut T, usize, usize);
10}
11
12impl<T> IntoRawParts<T> for Vec<T> {
13    fn into_raw_parts(self) -> (*mut T, usize, usize) {
14        let mut me = std::mem::ManuallyDrop::new(self);
15        (me.as_mut_ptr(), me.len(), me.capacity())
16    }
17
18    fn raw_parts(&self) -> (*mut T, usize, usize) {
19        (self.as_ptr() as *mut T, self.len(), self.capacity())
20    }
21}
22
23/// Fill current allocation if > 0
24/// otherwise realloc
25pub trait ResizeFaster<T: Copy> {
26    fn fill_or_alloc(&mut self, new_len: usize, value: T);
27}
28
29impl<T: Copy + Zero + PartialEq> ResizeFaster<T> for Vec<T> {
30    fn fill_or_alloc(&mut self, new_len: usize, value: T) {
31        if self.capacity() == 0 {
32            // it is faster to allocate zeroed
33            // so if the capacity is 0, we alloc (value might be 0)
34            *self = vec![value; new_len]
35        } else {
36            // first clear then reserve so that the reserve doesn't have
37            // to memcpy in case it needs to realloc.
38            self.clear();
39            self.reserve(new_len);
40
41            // // init the uninit values
42            let spare = &mut self.spare_capacity_mut()[..new_len];
43            let init_value = MaybeUninit::new(value);
44            spare.fill(init_value);
45            unsafe { self.set_len(new_len) }
46        }
47    }
48}
49pub trait PushUnchecked<T> {
50    /// Will push an item and not check if there is enough capacity
51    ///
52    /// # Safety
53    /// Caller must ensure the array has enough capacity to hold `T`.
54    unsafe fn push_unchecked(&mut self, value: T);
55}
56
57impl<T> PushUnchecked<T> for Vec<T> {
58    #[inline]
59    unsafe fn push_unchecked(&mut self, value: T) {
60        debug_assert!(self.capacity() > self.len());
61        let end = self.as_mut_ptr().add(self.len());
62        std::ptr::write(end, value);
63        self.set_len(self.len() + 1);
64    }
65}
66
67pub trait CapacityByFactor {
68    fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self;
69}
70
71impl<T> CapacityByFactor for Vec<T> {
72    fn with_capacity_by_factor(original_len: usize, factor: f64) -> Self {
73        let cap = (original_len as f64 * factor) as usize;
74        Vec::with_capacity(cap)
75    }
76}
77
78// Trait to convert a Vec.
79// The reason for this is to reduce code-generation. Conversion functions that are named
80// functions should only generate the conversion loop once.
81pub trait ConvertVec<Out> {
82    type ItemIn;
83
84    fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out>;
85
86    fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out>;
87}
88
89impl<T, Out> ConvertVec<Out> for Vec<T> {
90    type ItemIn = T;
91
92    fn convert_owned<F: FnMut(Self::ItemIn) -> Out>(self, f: F) -> Vec<Out> {
93        self.into_iter().map(f).collect()
94    }
95
96    fn convert<F: FnMut(&Self::ItemIn) -> Out>(&self, f: F) -> Vec<Out> {
97        self.iter().map(f).collect()
98    }
99}
100
101/// Perform an in-place `Iterator::filter_map` over two vectors at the same time.
102pub fn inplace_zip_filtermap<T, U>(
103    x: &mut Vec<T>,
104    y: &mut Vec<U>,
105    mut f: impl FnMut(T, U) -> Option<(T, U)>,
106) {
107    assert_eq!(x.len(), y.len());
108
109    let length = x.len();
110
111    struct OwnedBuffer<T> {
112        end: *mut T,
113        length: usize,
114    }
115
116    impl<T> Drop for OwnedBuffer<T> {
117        fn drop(&mut self) {
118            for i in 0..self.length {
119                unsafe { self.end.wrapping_sub(i + 1).read() };
120            }
121        }
122    }
123
124    let x_ptr = x.as_mut_ptr();
125    let y_ptr = y.as_mut_ptr();
126
127    let mut x_buf = OwnedBuffer {
128        end: x_ptr.wrapping_add(length),
129        length,
130    };
131    let mut y_buf = OwnedBuffer {
132        end: y_ptr.wrapping_add(length),
133        length,
134    };
135
136    // SAFETY: All items are now owned by `x_buf` and `y_buf`. Since we know that `x_buf` and
137    // `y_buf` will be dropped before the vecs representing `x` and `y`, this is safe.
138    unsafe {
139        x.set_len(0);
140        y.set_len(0);
141    }
142
143    // SAFETY:
144    //
145    // We know we have a exclusive reference to x and y.
146    //
147    // We know that `i` is always smaller than `x.len()` and `y.len()`. Furthermore, we also know
148    // that `i - num_deleted > 0`.
149    //
150    // Items are dropped exactly once, even if `f` panics.
151    for i in 0..length {
152        let xi = unsafe { x_ptr.wrapping_add(i).read() };
153        let yi = unsafe { y_ptr.wrapping_add(i).read() };
154
155        x_buf.length -= 1;
156        y_buf.length -= 1;
157
158        // We hold the invariant here that all items that are not yet deleted are either in
159        // - `xi` or `yi`
160        // - `x_buf` or `y_buf`
161        // ` `x` or `y`
162        //
163        // This way if `f` ever panics, we are sure that all items are dropped exactly once.
164        // Deleted items will be dropped when they are deleted.
165        let result = f(xi, yi);
166
167        if let Some((xi, yi)) = result {
168            x.push(xi);
169            y.push(yi);
170        }
171    }
172
173    debug_assert_eq!(x_buf.length, 0);
174    debug_assert_eq!(y_buf.length, 0);
175
176    // We are safe to forget `x_buf` and `y_buf` here since they will not deallocate anything
177    // anymore.
178    std::mem::forget(x_buf);
179    std::mem::forget(y_buf);
180}