polars_arrow/legacy/
bit_util.rs

1/// Forked from Arrow until their API stabilizes.
2///
3/// Note that the bound checks are optimized away.
4///
5use crate::bitmap::utils::{BitChunkIterExact, BitChunks, BitChunksExact};
6use crate::bitmap::Bitmap;
7
8fn first_set_bit_impl<I>(mut mask_chunks: I) -> usize
9where
10    I: BitChunkIterExact<u64>,
11{
12    let mut total = 0usize;
13    const SIZE: u32 = 64;
14    for chunk in &mut mask_chunks {
15        let pos = chunk.trailing_zeros();
16        if pos != SIZE {
17            return total + pos as usize;
18        } else {
19            total += SIZE as usize
20        }
21    }
22    if let Some(pos) = mask_chunks.remainder_iter().position(|v| v) {
23        total += pos;
24        return total;
25    }
26    // all null, return the first
27    0
28}
29
30pub fn first_set_bit(mask: &Bitmap) -> usize {
31    if mask.unset_bits() == 0 || mask.unset_bits() == mask.len() {
32        return 0;
33    }
34    let (slice, offset, length) = mask.as_slice();
35    if offset == 0 {
36        let mask_chunks = BitChunksExact::<u64>::new(slice, length);
37        first_set_bit_impl(mask_chunks)
38    } else {
39        let mask_chunks = mask.chunks::<u64>();
40        first_set_bit_impl(mask_chunks)
41    }
42}
43
44fn first_unset_bit_impl<I>(mut mask_chunks: I) -> usize
45where
46    I: BitChunkIterExact<u64>,
47{
48    let mut total = 0usize;
49    const SIZE: u32 = 64;
50    for chunk in &mut mask_chunks {
51        let pos = chunk.trailing_ones();
52        if pos != SIZE {
53            return total + pos as usize;
54        } else {
55            total += SIZE as usize
56        }
57    }
58    if let Some(pos) = mask_chunks.remainder_iter().position(|v| !v) {
59        total += pos;
60        return total;
61    }
62    // all null, return the first
63    0
64}
65
66pub fn first_unset_bit(mask: &Bitmap) -> usize {
67    if mask.unset_bits() == 0 || mask.unset_bits() == mask.len() {
68        return 0;
69    }
70    let (slice, offset, length) = mask.as_slice();
71    if offset == 0 {
72        let mask_chunks = BitChunksExact::<u64>::new(slice, length);
73        first_unset_bit_impl(mask_chunks)
74    } else {
75        let mask_chunks = mask.chunks::<u64>();
76        first_unset_bit_impl(mask_chunks)
77    }
78}
79
80pub fn find_first_true_false_null(
81    mut bit_chunks: BitChunks<u64>,
82    mut validity_chunks: BitChunks<u64>,
83) -> (Option<usize>, Option<usize>, Option<usize>) {
84    let (mut true_index, mut false_index, mut null_index) = (None, None, None);
85    let (mut true_not_found_mask, mut false_not_found_mask, mut null_not_found_mask) =
86        (!0u64, !0u64, !0u64); // All ones while not found.
87    let mut offset: usize = 0;
88    let mut all_found = false;
89    for (truth_mask, null_mask) in (&mut bit_chunks).zip(&mut validity_chunks) {
90        let mask = null_mask & truth_mask & true_not_found_mask;
91        if mask > 0 {
92            true_index = Some(offset + mask.trailing_zeros() as usize);
93            true_not_found_mask = 0;
94        }
95        let mask = null_mask & !truth_mask & false_not_found_mask;
96        if mask > 0 {
97            false_index = Some(offset + mask.trailing_zeros() as usize);
98            false_not_found_mask = 0;
99        }
100        if !null_mask & null_not_found_mask > 0 {
101            null_index = Some(offset + null_mask.trailing_ones() as usize);
102            null_not_found_mask = 0;
103        }
104        if null_not_found_mask | true_not_found_mask | false_not_found_mask == 0 {
105            all_found = true;
106            break;
107        }
108        offset += 64;
109    }
110    if !all_found {
111        for (val, not_null) in bit_chunks
112            .remainder_iter()
113            .zip(validity_chunks.remainder_iter())
114        {
115            if true_index.is_none() && not_null && val {
116                true_index = Some(offset);
117            } else if false_index.is_none() && not_null && !val {
118                false_index = Some(offset);
119            } else if null_index.is_none() && !not_null {
120                null_index = Some(offset);
121            }
122            offset += 1;
123        }
124    }
125    (true_index, false_index, null_index)
126}
127
128pub fn find_first_true_false_no_null(
129    mut bit_chunks: BitChunks<u64>,
130) -> (Option<usize>, Option<usize>) {
131    let (mut true_index, mut false_index) = (None, None);
132    let (mut true_not_found_mask, mut false_not_found_mask) = (!0u64, !0u64); // All ones while not found.
133    let mut offset: usize = 0;
134    let mut all_found = false;
135    for truth_mask in &mut bit_chunks {
136        let mask = truth_mask & true_not_found_mask;
137        if mask > 0 {
138            true_index = Some(offset + mask.trailing_zeros() as usize);
139            true_not_found_mask = 0;
140        }
141        let mask = !truth_mask & false_not_found_mask;
142        if mask > 0 {
143            false_index = Some(offset + mask.trailing_zeros() as usize);
144            false_not_found_mask = 0;
145        }
146        if true_not_found_mask | false_not_found_mask == 0 {
147            all_found = true;
148            break;
149        }
150        offset += 64;
151    }
152    if !all_found {
153        for val in bit_chunks.remainder_iter() {
154            if true_index.is_none() && val {
155                true_index = Some(offset);
156            } else if false_index.is_none() && !val {
157                false_index = Some(offset);
158            }
159            offset += 1;
160        }
161    }
162    (true_index, false_index)
163}