polars_arrow/bitmap/
bitmap_ops.rs

1use std::ops::{BitAnd, BitOr, BitXor, Not};
2
3use super::utils::{BitChunk, BitChunkIterExact, BitChunksExact};
4use super::Bitmap;
5use crate::bitmap::MutableBitmap;
6use crate::trusted_len::TrustedLen;
7
8#[inline(always)]
9pub(crate) fn push_bitchunk<T: BitChunk>(buffer: &mut Vec<u8>, value: T) {
10    buffer.extend(value.to_ne_bytes())
11}
12
13/// Creates a [`Vec<u8>`] from a [`TrustedLen`] of [`BitChunk`].
14pub fn chunk_iter_to_vec<T: BitChunk, I: TrustedLen<Item = T>>(iter: I) -> Vec<u8> {
15    let cap = iter.size_hint().0 * size_of::<T>();
16    let mut buffer = Vec::with_capacity(cap);
17    for v in iter {
18        push_bitchunk(&mut buffer, v)
19    }
20    buffer
21}
22
23fn chunk_iter_to_vec_and_remainder<T: BitChunk, I: TrustedLen<Item = T>>(
24    iter: I,
25    remainder: T,
26) -> Vec<u8> {
27    let cap = (iter.size_hint().0 + 1) * size_of::<T>();
28    let mut buffer = Vec::with_capacity(cap);
29    for v in iter {
30        push_bitchunk(&mut buffer, v)
31    }
32    push_bitchunk(&mut buffer, remainder);
33    debug_assert_eq!(buffer.len(), cap);
34    buffer
35}
36
37/// Apply a bitwise operation `op` to four inputs and return the result as a [`Bitmap`].
38pub fn quaternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, a4: &Bitmap, op: F) -> Bitmap
39where
40    F: Fn(u64, u64, u64, u64) -> u64,
41{
42    assert_eq!(a1.len(), a2.len());
43    assert_eq!(a1.len(), a3.len());
44    assert_eq!(a1.len(), a4.len());
45    let a1_chunks = a1.chunks();
46    let a2_chunks = a2.chunks();
47    let a3_chunks = a3.chunks();
48    let a4_chunks = a4.chunks();
49
50    let rem_a1 = a1_chunks.remainder();
51    let rem_a2 = a2_chunks.remainder();
52    let rem_a3 = a3_chunks.remainder();
53    let rem_a4 = a4_chunks.remainder();
54
55    let chunks = a1_chunks
56        .zip(a2_chunks)
57        .zip(a3_chunks)
58        .zip(a4_chunks)
59        .map(|(((a1, a2), a3), a4)| op(a1, a2, a3, a4));
60
61    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3, rem_a4));
62    let length = a1.len();
63
64    Bitmap::from_u8_vec(buffer, length)
65}
66
67/// Apply a bitwise operation `op` to three inputs and return the result as a [`Bitmap`].
68pub fn ternary<F>(a1: &Bitmap, a2: &Bitmap, a3: &Bitmap, op: F) -> Bitmap
69where
70    F: Fn(u64, u64, u64) -> u64,
71{
72    assert_eq!(a1.len(), a2.len());
73    assert_eq!(a1.len(), a3.len());
74    let a1_chunks = a1.chunks();
75    let a2_chunks = a2.chunks();
76    let a3_chunks = a3.chunks();
77
78    let rem_a1 = a1_chunks.remainder();
79    let rem_a2 = a2_chunks.remainder();
80    let rem_a3 = a3_chunks.remainder();
81
82    let chunks = a1_chunks
83        .zip(a2_chunks)
84        .zip(a3_chunks)
85        .map(|((a1, a2), a3)| op(a1, a2, a3));
86
87    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_a1, rem_a2, rem_a3));
88    let length = a1.len();
89
90    Bitmap::from_u8_vec(buffer, length)
91}
92
93/// Apply a bitwise operation `op` to two inputs and return the result as a [`Bitmap`].
94pub fn binary<F>(lhs: &Bitmap, rhs: &Bitmap, op: F) -> Bitmap
95where
96    F: Fn(u64, u64) -> u64,
97{
98    assert_eq!(lhs.len(), rhs.len());
99    let lhs_chunks = lhs.chunks();
100    let rhs_chunks = rhs.chunks();
101    let rem_lhs = lhs_chunks.remainder();
102    let rem_rhs = rhs_chunks.remainder();
103
104    let chunks = lhs_chunks
105        .zip(rhs_chunks)
106        .map(|(left, right)| op(left, right));
107
108    let buffer = chunk_iter_to_vec_and_remainder(chunks, op(rem_lhs, rem_rhs));
109    let length = lhs.len();
110
111    Bitmap::from_u8_vec(buffer, length)
112}
113
114/// Apply a bitwise operation `op` to two inputs and fold the result.
115pub fn binary_fold<B, F, R>(lhs: &Bitmap, rhs: &Bitmap, op: F, init: B, fold: R) -> B
116where
117    F: Fn(u64, u64) -> B,
118    R: Fn(B, B) -> B,
119{
120    assert_eq!(lhs.len(), rhs.len());
121    let lhs_chunks = lhs.chunks();
122    let rhs_chunks = rhs.chunks();
123    let rem_lhs = lhs_chunks.remainder();
124    let rem_rhs = rhs_chunks.remainder();
125
126    let result = lhs_chunks
127        .zip(rhs_chunks)
128        .fold(init, |prev, (left, right)| fold(prev, op(left, right)));
129
130    fold(result, op(rem_lhs, rem_rhs))
131}
132
133/// Apply a bitwise operation `op` to two inputs and fold the result.
134pub fn binary_fold_mut<B, F, R>(
135    lhs: &MutableBitmap,
136    rhs: &MutableBitmap,
137    op: F,
138    init: B,
139    fold: R,
140) -> B
141where
142    F: Fn(u64, u64) -> B,
143    R: Fn(B, B) -> B,
144{
145    assert_eq!(lhs.len(), rhs.len());
146    let lhs_chunks = lhs.chunks();
147    let rhs_chunks = rhs.chunks();
148    let rem_lhs = lhs_chunks.remainder();
149    let rem_rhs = rhs_chunks.remainder();
150
151    let result = lhs_chunks
152        .zip(rhs_chunks)
153        .fold(init, |prev, (left, right)| fold(prev, op(left, right)));
154
155    fold(result, op(rem_lhs, rem_rhs))
156}
157
158fn unary_impl<F, I>(iter: I, op: F, length: usize) -> Bitmap
159where
160    I: BitChunkIterExact<u64>,
161    F: Fn(u64) -> u64,
162{
163    let rem = op(iter.remainder());
164    let buffer = chunk_iter_to_vec_and_remainder(iter.map(op), rem);
165
166    Bitmap::from_u8_vec(buffer, length)
167}
168
169/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`].
170pub fn unary<F>(lhs: &Bitmap, op: F) -> Bitmap
171where
172    F: Fn(u64) -> u64,
173{
174    let (slice, offset, length) = lhs.as_slice();
175    if offset == 0 {
176        let iter = BitChunksExact::<u64>::new(slice, length);
177        unary_impl(iter, op, lhs.len())
178    } else {
179        let iter = lhs.chunks::<u64>();
180        unary_impl(iter, op, lhs.len())
181    }
182}
183
184// create a new [`Bitmap`] semantically equal to ``bitmap`` but with an offset equal to ``offset``
185pub(crate) fn align(bitmap: &Bitmap, new_offset: usize) -> Bitmap {
186    let length = bitmap.len();
187
188    let bitmap: Bitmap = std::iter::repeat(false)
189        .take(new_offset)
190        .chain(bitmap.iter())
191        .collect();
192
193    bitmap.sliced(new_offset, length)
194}
195
196/// Compute bitwise A AND B operation.
197pub fn and(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
198    if lhs.unset_bits() == lhs.len() || rhs.unset_bits() == rhs.len() {
199        assert_eq!(lhs.len(), rhs.len());
200        Bitmap::new_zeroed(lhs.len())
201    } else {
202        binary(lhs, rhs, |x, y| x & y)
203    }
204}
205
206/// Compute bitwise A AND NOT B operation.
207pub fn and_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
208    binary(lhs, rhs, |x, y| x & !y)
209}
210
211/// Compute bitwise A OR B operation.
212pub fn or(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
213    if lhs.unset_bits() == 0 || rhs.unset_bits() == 0 {
214        assert_eq!(lhs.len(), rhs.len());
215        let mut mutable = MutableBitmap::with_capacity(lhs.len());
216        mutable.extend_constant(lhs.len(), true);
217        mutable.into()
218    } else {
219        binary(lhs, rhs, |x, y| x | y)
220    }
221}
222
223/// Compute bitwise A OR NOT B operation.
224pub fn or_not(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
225    binary(lhs, rhs, |x, y| x | !y)
226}
227
228/// Compute bitwise XOR operation.
229pub fn xor(lhs: &Bitmap, rhs: &Bitmap) -> Bitmap {
230    let lhs_nulls = lhs.unset_bits();
231    let rhs_nulls = rhs.unset_bits();
232
233    // all false or all true
234    if lhs_nulls == rhs_nulls && rhs_nulls == rhs.len() || lhs_nulls == 0 && rhs_nulls == 0 {
235        assert_eq!(lhs.len(), rhs.len());
236        Bitmap::new_zeroed(rhs.len())
237    }
238    // all false and all true or vice versa
239    else if (lhs_nulls == 0 && rhs_nulls == rhs.len())
240        || (lhs_nulls == lhs.len() && rhs_nulls == 0)
241    {
242        assert_eq!(lhs.len(), rhs.len());
243        let mut mutable = MutableBitmap::with_capacity(lhs.len());
244        mutable.extend_constant(lhs.len(), true);
245        mutable.into()
246    } else {
247        binary(lhs, rhs, |x, y| x ^ y)
248    }
249}
250
251/// Compute bitwise equality (not XOR) operation.
252fn eq(lhs: &Bitmap, rhs: &Bitmap) -> bool {
253    if lhs.len() != rhs.len() {
254        return false;
255    }
256
257    let mut lhs_chunks = lhs.chunks::<u64>();
258    let mut rhs_chunks = rhs.chunks::<u64>();
259
260    let equal_chunks = lhs_chunks
261        .by_ref()
262        .zip(rhs_chunks.by_ref())
263        .all(|(left, right)| left == right);
264
265    if !equal_chunks {
266        return false;
267    }
268    let lhs_remainder = lhs_chunks.remainder_iter();
269    let rhs_remainder = rhs_chunks.remainder_iter();
270    lhs_remainder.zip(rhs_remainder).all(|(x, y)| x == y)
271}
272
273pub fn num_intersections_with(lhs: &Bitmap, rhs: &Bitmap) -> usize {
274    binary_fold(
275        lhs,
276        rhs,
277        |lhs, rhs| (lhs & rhs).count_ones() as usize,
278        0,
279        |lhs, rhs| lhs + rhs,
280    )
281}
282
283pub fn intersects_with(lhs: &Bitmap, rhs: &Bitmap) -> bool {
284    binary_fold(
285        lhs,
286        rhs,
287        |lhs, rhs| lhs & rhs != 0,
288        false,
289        |lhs, rhs| lhs || rhs,
290    )
291}
292
293pub fn intersects_with_mut(lhs: &MutableBitmap, rhs: &MutableBitmap) -> bool {
294    binary_fold_mut(
295        lhs,
296        rhs,
297        |lhs, rhs| lhs & rhs != 0,
298        false,
299        |lhs, rhs| lhs || rhs,
300    )
301}
302
303pub fn num_edges(lhs: &Bitmap) -> usize {
304    if lhs.is_empty() {
305        return 0;
306    }
307
308    // @TODO: If is probably quite inefficient to do it like this because now either one is not
309    // aligned. Maybe, we can implement a smarter way to do this.
310    binary_fold(
311        &unsafe { lhs.clone().sliced_unchecked(0, lhs.len() - 1) },
312        &unsafe { lhs.clone().sliced_unchecked(1, lhs.len() - 1) },
313        |l, r| (l ^ r).count_ones() as usize,
314        0,
315        |acc, v| acc + v,
316    )
317}
318
319/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy }`.
320pub fn select_constant(selector: &Bitmap, truthy: &Bitmap, falsy: bool) -> Bitmap {
321    let falsy_mask: u64 = if falsy {
322        0xFFFF_FFFF_FFFF_FFFF
323    } else {
324        0x0000_0000_0000_0000
325    };
326
327    binary(selector, truthy, |s, t| (s & t) | (!s & falsy_mask))
328}
329
330/// Compute `out[i] = if selector[i] { truthy[i] } else { falsy[i] }`.
331pub fn select(selector: &Bitmap, truthy: &Bitmap, falsy: &Bitmap) -> Bitmap {
332    ternary(selector, truthy, falsy, |s, t, f| (s & t) | (!s & f))
333}
334
335impl PartialEq for Bitmap {
336    fn eq(&self, other: &Self) -> bool {
337        eq(self, other)
338    }
339}
340
341impl<'b> BitOr<&'b Bitmap> for &Bitmap {
342    type Output = Bitmap;
343
344    fn bitor(self, rhs: &'b Bitmap) -> Bitmap {
345        or(self, rhs)
346    }
347}
348
349impl<'b> BitAnd<&'b Bitmap> for &Bitmap {
350    type Output = Bitmap;
351
352    fn bitand(self, rhs: &'b Bitmap) -> Bitmap {
353        and(self, rhs)
354    }
355}
356
357impl<'b> BitXor<&'b Bitmap> for &Bitmap {
358    type Output = Bitmap;
359
360    fn bitxor(self, rhs: &'b Bitmap) -> Bitmap {
361        xor(self, rhs)
362    }
363}
364
365impl Not for &Bitmap {
366    type Output = Bitmap;
367
368    fn not(self) -> Bitmap {
369        unary(self, |a| !a)
370    }
371}