polars_arrow/bitmap/
iterator.rs1use polars_utils::slice::load_padded_le_u64;
2
3use super::bitmask::BitMask;
4use super::Bitmap;
5use crate::trusted_len::TrustedLen;
6
7fn calc_iters_remaining(length: usize, min_length_for_iter: usize, consume: usize) -> usize {
12 let min_length_for_iter = min_length_for_iter.max(consume);
13 if length < min_length_for_iter {
14 return 0;
15 }
16
17 let obvious_part = length - min_length_for_iter;
18 let obvious_iters = obvious_part / consume;
19 1 + obvious_iters }
27
28#[derive(Clone)]
29pub struct TrueIdxIter<'a> {
30 mask: BitMask<'a>,
31 first_unknown: usize,
32 i: usize,
33 len: usize,
34 remaining: usize,
35}
36
37impl<'a> TrueIdxIter<'a> {
38 #[inline]
39 pub fn new(len: usize, validity: Option<&'a Bitmap>) -> Self {
40 if let Some(bitmap) = validity {
41 assert!(len == bitmap.len());
42 Self {
43 mask: BitMask::from_bitmap(bitmap),
44 first_unknown: 0,
45 i: 0,
46 remaining: bitmap.len() - bitmap.unset_bits(),
47 len,
48 }
49 } else {
50 Self {
51 mask: BitMask::default(),
52 first_unknown: len,
53 i: 0,
54 remaining: len,
55 len,
56 }
57 }
58 }
59}
60
61impl Iterator for TrueIdxIter<'_> {
62 type Item = usize;
63
64 #[inline]
65 fn next(&mut self) -> Option<Self::Item> {
66 if self.i < self.first_unknown {
68 let ret = self.i;
69 self.i += 1;
70 self.remaining -= 1;
71 return Some(ret);
72 }
73
74 while self.i < self.len {
75 let mask = self.mask.get_u32(self.i);
76 let num_null = mask.trailing_zeros();
77 self.i += num_null as usize;
78 if num_null < 32 {
79 self.first_unknown = self.i + (mask >> num_null).trailing_ones() as usize;
80 let ret = self.i;
81 self.i += 1;
82 self.remaining -= 1;
83 return Some(ret);
84 }
85 }
86
87 None
88 }
89
90 #[inline]
91 fn size_hint(&self) -> (usize, Option<usize>) {
92 (self.remaining, Some(self.remaining))
93 }
94}
95
96unsafe impl TrustedLen for TrueIdxIter<'_> {}
97
98pub struct FastU32BitmapIter<'a> {
99 bytes: &'a [u8],
100 shift: u32,
101 bits_left: usize,
102}
103
104impl<'a> FastU32BitmapIter<'a> {
105 pub fn new(bytes: &'a [u8], offset: usize, len: usize) -> Self {
106 assert!(bytes.len() * 8 >= offset + len);
107 let shift = (offset % 8) as u32;
108 let bytes = &bytes[offset / 8..];
109 Self {
110 bytes,
111 shift,
112 bits_left: len,
113 }
114 }
115
116 fn next_remainder(&mut self) -> Option<u32> {
118 if self.bits_left > 0 {
119 let word = load_padded_le_u64(self.bytes);
120 let mask;
121 if self.bits_left >= 32 {
122 mask = u32::MAX;
123 self.bits_left -= 32;
124 self.bytes = unsafe { self.bytes.get_unchecked(4..) };
125 } else {
126 mask = (1 << self.bits_left) - 1;
127 self.bits_left = 0;
128 }
129
130 return Some((word >> self.shift) as u32 & mask);
131 }
132
133 None
134 }
135
136 pub fn remainder(mut self) -> (u64, usize) {
139 let bits_left = self.bits_left;
140 let lo = self.next_remainder().unwrap_or(0);
141 let hi = self.next_remainder().unwrap_or(0);
142 (((hi as u64) << 32) | (lo as u64), bits_left)
143 }
144}
145
146impl Iterator for FastU32BitmapIter<'_> {
147 type Item = u32;
148
149 #[inline]
150 fn next(&mut self) -> Option<Self::Item> {
151 if self.bits_left >= 64 {
153 let chunk;
154 unsafe {
155 chunk = self.bytes.get_unchecked(0..8);
157 self.bytes = self.bytes.get_unchecked(4..);
158 }
159 self.bits_left -= 32;
160 let word = u64::from_le_bytes(chunk.try_into().unwrap());
161 return Some((word >> self.shift) as u32);
162 }
163
164 None
165 }
166
167 #[inline]
168 fn size_hint(&self) -> (usize, Option<usize>) {
169 let hint = calc_iters_remaining(self.bits_left, 64, 32);
170 (hint, Some(hint))
171 }
172}
173
174unsafe impl TrustedLen for FastU32BitmapIter<'_> {}
175
176#[derive(Clone)]
177pub struct FastU56BitmapIter<'a> {
178 bytes: &'a [u8],
179 shift: u32,
180 bits_left: usize,
181}
182
183impl<'a> FastU56BitmapIter<'a> {
184 pub fn new(bytes: &'a [u8], offset: usize, len: usize) -> Self {
185 assert!(bytes.len() * 8 >= offset + len);
186 let shift = (offset % 8) as u32;
187 let bytes = &bytes[offset / 8..];
188 Self {
189 bytes,
190 shift,
191 bits_left: len,
192 }
193 }
194
195 fn next_remainder(&mut self) -> Option<u64> {
197 if self.bits_left > 0 {
198 let word = load_padded_le_u64(self.bytes);
199 let mask;
200 if self.bits_left >= 56 {
201 mask = (1 << 56) - 1;
202 self.bits_left -= 56;
203 self.bytes = unsafe { self.bytes.get_unchecked(7..) };
204 } else {
205 mask = (1 << self.bits_left) - 1;
206 self.bits_left = 0;
207 };
208
209 return Some((word >> self.shift) & mask);
210 }
211
212 None
213 }
214
215 pub fn remainder(mut self) -> (u64, usize) {
219 let bits_left = self.bits_left;
220 let lo = self.next_remainder().unwrap_or(0);
221 let hi = self.next_remainder().unwrap_or(0);
222 ((hi << 56) | lo, bits_left)
223 }
224}
225
226impl Iterator for FastU56BitmapIter<'_> {
227 type Item = u64;
228
229 #[inline]
230 fn next(&mut self) -> Option<Self::Item> {
231 if self.bits_left >= 64 {
233 let chunk;
234 unsafe {
235 chunk = self.bytes.get_unchecked(0..8);
237 self.bytes = self.bytes.get_unchecked(7..);
238 self.bits_left -= 56;
239 }
240
241 let word = u64::from_le_bytes(chunk.try_into().unwrap());
242 let mask = (1 << 56) - 1;
243 return Some((word >> self.shift) & mask);
244 }
245
246 None
247 }
248
249 #[inline]
250 fn size_hint(&self) -> (usize, Option<usize>) {
251 let hint = calc_iters_remaining(self.bits_left, 64, 56);
252 (hint, Some(hint))
253 }
254}
255
256unsafe impl TrustedLen for FastU56BitmapIter<'_> {}
257
258pub struct FastU64BitmapIter<'a> {
259 bytes: &'a [u8],
260 shift: u32,
261 bits_left: usize,
262 next_word: u64,
263}
264
265impl<'a> FastU64BitmapIter<'a> {
266 pub fn new(bytes: &'a [u8], offset: usize, len: usize) -> Self {
267 assert!(bytes.len() * 8 >= offset + len);
268 let shift = (offset % 8) as u32;
269 let bytes = &bytes[offset / 8..];
270 let next_word = load_padded_le_u64(bytes);
271 let bytes = bytes.get(8..).unwrap_or(&[]);
272 Self {
273 bytes,
274 shift,
275 bits_left: len,
276 next_word,
277 }
278 }
279
280 #[inline]
281 fn combine(&self, lo: u64, hi: u64) -> u64 {
282 let wide = ((hi as u128) << 64) | lo as u128;
285 (wide >> (self.shift % 64)) as u64
286 }
287
288 fn next_remainder(&mut self) -> Option<u64> {
290 if self.bits_left > 0 {
291 let lo = self.next_word;
292 let hi = load_padded_le_u64(self.bytes);
293 let mask;
294 if self.bits_left >= 64 {
295 mask = u64::MAX;
296 self.bits_left -= 64;
297 self.bytes = self.bytes.get(8..).unwrap_or(&[]);
298 } else {
299 mask = (1 << self.bits_left) - 1;
300 self.bits_left = 0;
301 };
302 self.next_word = hi;
303
304 return Some(self.combine(lo, hi) & mask);
305 }
306
307 None
308 }
309
310 pub fn remainder(mut self) -> ([u64; 2], usize) {
314 let bits_left = self.bits_left;
315 let lo = self.next_remainder().unwrap_or(0);
316 let hi = self.next_remainder().unwrap_or(0);
317 ([lo, hi], bits_left)
318 }
319}
320
321impl Iterator for FastU64BitmapIter<'_> {
322 type Item = u64;
323
324 #[inline]
325 fn next(&mut self) -> Option<Self::Item> {
326 if self.bits_left >= 128 {
329 let chunk;
330 unsafe {
331 chunk = self.bytes.get_unchecked(0..8);
333 self.bytes = self.bytes.get_unchecked(8..);
334 }
335 let lo = self.next_word;
336 let hi = u64::from_le_bytes(chunk.try_into().unwrap());
337 self.next_word = hi;
338 self.bits_left -= 64;
339
340 return Some(self.combine(lo, hi));
341 }
342
343 None
344 }
345
346 #[inline]
347 fn size_hint(&self) -> (usize, Option<usize>) {
348 let hint = calc_iters_remaining(self.bits_left, 128, 64);
349 (hint, Some(hint))
350 }
351}
352
353unsafe impl TrustedLen for FastU64BitmapIter<'_> {}
354
355#[derive(Debug, Clone)]
357pub struct IntoIter {
358 values: Bitmap,
359 index: usize,
360 end: usize,
361}
362
363impl IntoIter {
364 #[inline]
366 pub fn new(values: Bitmap) -> Self {
367 let end = values.len();
368 Self {
369 values,
370 index: 0,
371 end,
372 }
373 }
374}
375
376impl Iterator for IntoIter {
377 type Item = bool;
378
379 #[inline]
380 fn next(&mut self) -> Option<Self::Item> {
381 if self.index == self.end {
382 return None;
383 }
384 let old = self.index;
385 self.index += 1;
386 Some(unsafe { self.values.get_bit_unchecked(old) })
387 }
388
389 #[inline]
390 fn size_hint(&self) -> (usize, Option<usize>) {
391 (self.end - self.index, Some(self.end - self.index))
392 }
393
394 #[inline]
395 fn nth(&mut self, n: usize) -> Option<Self::Item> {
396 let new_index = self.index + n;
397 if new_index > self.end {
398 self.index = self.end;
399 None
400 } else {
401 self.index = new_index;
402 self.next()
403 }
404 }
405}
406
407impl DoubleEndedIterator for IntoIter {
408 #[inline]
409 fn next_back(&mut self) -> Option<Self::Item> {
410 if self.index == self.end {
411 None
412 } else {
413 self.end -= 1;
414 Some(unsafe { self.values.get_bit_unchecked(self.end) })
415 }
416 }
417}
418
419unsafe impl TrustedLen for IntoIter {}