polars_arrow/bitmap/
builder.rs1use polars_utils::slice::load_padded_le_u64;
2
3use crate::bitmap::{Bitmap, MutableBitmap};
4use crate::storage::SharedStorage;
5use crate::trusted_len::TrustedLen;
6
7#[derive(Default, Clone)]
9pub struct BitmapBuilder {
10 buf: u64, bit_len: usize, bit_cap: usize, set_bits_in_bytes: usize, bytes: Vec<u8>,
15}
16
17impl BitmapBuilder {
18 pub fn new() -> Self {
19 Self::default()
20 }
21
22 #[inline(always)]
23 pub fn len(&self) -> usize {
24 self.bit_len
25 }
26
27 #[inline(always)]
28 pub fn is_empty(&self) -> bool {
29 self.bit_len == 0
30 }
31
32 #[inline(always)]
33 pub fn capacity(&self) -> usize {
34 self.bit_cap
35 }
36
37 #[inline(always)]
38 pub fn set_bits(&self) -> usize {
39 self.set_bits_in_bytes + self.buf.count_ones() as usize
40 }
41
42 #[inline(always)]
43 pub fn unset_bits(&self) -> usize {
44 self.bit_len - self.set_bits()
45 }
46
47 pub fn with_capacity(bits: usize) -> Self {
48 let bytes = Vec::with_capacity(bits.div_ceil(64) * 8);
49 let words_available = bytes.capacity() / 8;
50 Self {
51 buf: 0,
52 bit_len: 0,
53 bit_cap: words_available * 64,
54 set_bits_in_bytes: 0,
55 bytes,
56 }
57 }
58
59 #[inline(always)]
60 pub fn reserve(&mut self, additional: usize) {
61 if self.bit_len + additional > self.bit_cap {
62 self.reserve_slow(additional)
63 }
64 }
65
66 #[cold]
67 #[inline(never)]
68 fn reserve_slow(&mut self, additional: usize) {
69 let bytes_needed = (self.bit_len + additional).div_ceil(64) * 8;
70 self.bytes.reserve(bytes_needed - self.bytes.len());
71 let words_available = self.bytes.capacity() / 8;
72 self.bit_cap = words_available * 64;
73 }
74
75 #[inline(always)]
76 pub fn push(&mut self, x: bool) {
77 self.reserve(1);
78 unsafe { self.push_unchecked(x) }
79 }
80
81 #[inline(always)]
85 unsafe fn flush_word_unchecked(&mut self, w: u64) {
86 let cur_len = self.bytes.len();
87 let p = self.bytes.as_mut_ptr().add(cur_len).cast::<u64>();
88 p.write_unaligned(w.to_le());
89 self.bytes.set_len(cur_len + 8);
90 }
91
92 #[inline(always)]
95 pub unsafe fn push_unchecked(&mut self, x: bool) {
96 debug_assert!(self.bit_len < self.bit_cap);
97 self.buf |= (x as u64) << (self.bit_len % 64);
98 self.bit_len += 1;
99 if self.bit_len % 64 == 0 {
100 self.flush_word_unchecked(self.buf);
101 self.set_bits_in_bytes += self.buf.count_ones() as usize;
102 self.buf = 0;
103 }
104 }
105
106 #[inline(always)]
107 pub fn extend_constant(&mut self, length: usize, value: bool) {
108 let bits_in_buf = self.bit_len % 64;
110 if bits_in_buf + length < 64 {
111 let bit_block = ((value as u64) << length) - (value as u64);
112 self.buf |= bit_block << bits_in_buf;
113 self.bit_len += length;
114 } else {
115 self.extend_constant_slow(length, value);
116 }
117 }
118
119 #[cold]
120 fn extend_constant_slow(&mut self, length: usize, value: bool) {
121 unsafe {
122 let value_spread = if value { u64::MAX } else { 0 }; self.reserve(length);
126 let bits_in_buf = self.bit_len % 64;
127 let ext_buf = self.buf | (value_spread << bits_in_buf);
128 self.flush_word_unchecked(ext_buf);
129 self.set_bits_in_bytes += ext_buf.count_ones() as usize;
130
131 let remaining_bits = length - (64 - bits_in_buf);
133 let remaining_words = remaining_bits / 64;
134 for _ in 0..remaining_words {
135 self.flush_word_unchecked(value_spread);
136 }
137 self.set_bits_in_bytes += (remaining_words * 64) & value_spread as usize;
138
139 self.buf = ((value as u64) << (remaining_bits % 64)) - (value as u64);
141 self.bit_len += length;
142 }
143 }
144
145 pub unsafe fn push_word_with_len_unchecked(&mut self, word: u64, length: usize) {
150 debug_assert!(self.bit_len + length <= self.bit_cap);
151 debug_assert!(length <= 64);
152 debug_assert!(length == 64 || (word >> length) == 0);
153 let bits_in_buf = self.bit_len % 64;
154 self.buf |= word << bits_in_buf;
155 if bits_in_buf + length >= 64 {
156 self.flush_word_unchecked(self.buf);
157 self.set_bits_in_bytes += self.buf.count_ones() as usize;
158 self.buf = if bits_in_buf > 0 {
159 word >> (64 - bits_in_buf)
160 } else {
161 0
162 };
163 }
164 self.bit_len += length;
165 }
166
167 unsafe fn extend_from_slice_unchecked(
171 &mut self,
172 mut slice: &[u8],
173 mut offset: usize,
174 mut length: usize,
175 ) {
176 if length == 0 {
177 return;
178 }
179
180 let slice_bit_offset = offset % 8;
182 if slice_bit_offset > 0 {
183 let bits_in_first_byte = (8 - slice_bit_offset).min(length);
184 let first_byte = *slice.get_unchecked(offset / 8) >> slice_bit_offset;
185 self.push_word_with_len_unchecked(
186 first_byte as u64 & ((1 << bits_in_first_byte) - 1),
187 bits_in_first_byte,
188 );
189 length -= bits_in_first_byte;
190 offset += bits_in_first_byte;
191 }
192 slice = slice.get_unchecked(offset / 8..);
193
194 let bits_in_buf = self.bit_len % 64;
196 if bits_in_buf > 0 {
197 while length >= 64 {
198 let word = u64::from_le_bytes(slice.get_unchecked(0..8).try_into().unwrap());
199 self.buf |= word << bits_in_buf;
200 self.flush_word_unchecked(self.buf);
201 self.set_bits_in_bytes += self.buf.count_ones() as usize;
202 self.buf = word >> (64 - bits_in_buf);
203 self.bit_len += 64;
204 length -= 64;
205 slice = slice.get_unchecked(8..);
206 }
207 } else {
208 while length >= 64 {
209 let word = u64::from_le_bytes(slice.get_unchecked(0..8).try_into().unwrap());
210 self.flush_word_unchecked(word);
211 self.set_bits_in_bytes += word.count_ones() as usize;
212 self.bit_len += 64;
213 length -= 64;
214 slice = slice.get_unchecked(8..);
215 }
216 }
217
218 if length > 0 {
220 let word = load_padded_le_u64(slice);
221 self.push_word_with_len_unchecked(word & ((1 << length) - 1), length);
222 }
223 }
224
225 pub fn extend_from_slice(&mut self, slice: &[u8], offset: usize, length: usize) {
226 assert!(8 * slice.len() >= offset + length);
227 self.reserve(length);
228 unsafe {
229 self.extend_from_slice_unchecked(slice, offset, length);
230 }
231 }
232
233 pub fn extend_from_bitmap(&mut self, bitmap: &Bitmap) {
234 let (slice, offset, length) = bitmap.as_slice();
237 self.extend_from_slice(slice, offset, length);
238 }
239
240 unsafe fn finish(&mut self) {
243 if self.bit_len % 64 != 0 {
244 self.bytes.extend_from_slice(&self.buf.to_le_bytes());
245 self.set_bits_in_bytes += self.buf.count_ones() as usize;
246 self.buf = 0;
247 }
248 }
249
250 pub fn into_mut(mut self) -> MutableBitmap {
252 unsafe {
253 self.finish();
254 MutableBitmap::from_vec(self.bytes, self.bit_len)
255 }
256 }
257
258 pub fn into_opt_mut_validity(mut self) -> Option<MutableBitmap> {
260 unsafe {
261 self.finish();
262 if self.set_bits_in_bytes == self.bit_len {
263 return None;
264 }
265 Some(MutableBitmap::from_vec(self.bytes, self.bit_len))
266 }
267 }
268
269 pub fn freeze(mut self) -> Bitmap {
271 unsafe {
272 self.finish();
273 let storage = SharedStorage::from_vec(self.bytes);
274 Bitmap::from_inner_unchecked(
275 storage,
276 0,
277 self.bit_len,
278 Some(self.bit_len - self.set_bits_in_bytes),
279 )
280 }
281 }
282
283 pub fn into_opt_validity(mut self) -> Option<Bitmap> {
285 unsafe {
286 self.finish();
287 if self.set_bits_in_bytes == self.bit_len {
288 return None;
289 }
290 let storage = SharedStorage::from_vec(self.bytes);
291 let bitmap = Bitmap::from_inner_unchecked(
292 storage,
293 0,
294 self.bit_len,
295 Some(self.bit_len - self.set_bits_in_bytes),
296 );
297 Some(bitmap)
298 }
299 }
300
301 pub fn extend_trusted_len_iter<I>(&mut self, iterator: I)
302 where
303 I: Iterator<Item = bool> + TrustedLen,
304 {
305 self.reserve(iterator.size_hint().1.unwrap());
306 for b in iterator {
307 unsafe {
309 self.push_unchecked(b);
310 }
311 }
312 }
313
314 #[inline]
315 pub fn from_trusted_len_iter<I>(iterator: I) -> Self
316 where
317 I: Iterator<Item = bool> + TrustedLen,
318 {
319 let mut builder = Self::new();
320 builder.extend_trusted_len_iter(iterator);
321 builder
322 }
323}