polars_arrow/bitmap/
aligned.rs

1use std::iter::Copied;
2use std::slice::Iter;
3
4use crate::bitmap::utils::BitChunk;
5
6fn load_chunk_le<T: BitChunk>(src: &[u8]) -> T {
7    if let Ok(chunk) = src.try_into() {
8        return T::from_le_bytes(chunk);
9    }
10
11    let mut chunk = T::Bytes::default();
12    let len = src.len().min(chunk.as_ref().len());
13    chunk.as_mut()[..len].copy_from_slice(&src[..len]);
14    T::from_le_bytes(chunk)
15}
16
17/// Represents a bitmap split in three portions, a prefix, a suffix and an
18/// aligned bulk section in the middle.
19#[derive(Default, Clone, Debug)]
20pub struct AlignedBitmapSlice<'a, T: BitChunk> {
21    prefix: T,
22    prefix_len: u32,
23    bulk: &'a [T],
24    suffix: T,
25    suffix_len: u32,
26}
27
28impl<'a, T: BitChunk> AlignedBitmapSlice<'a, T> {
29    #[inline(always)]
30    pub fn prefix(&self) -> T {
31        self.prefix
32    }
33
34    #[inline(always)]
35    pub fn bulk_iter(&self) -> Copied<Iter<'a, T>> {
36        self.bulk.iter().copied()
37    }
38
39    #[inline(always)]
40    pub fn bulk(&self) -> &'a [T] {
41        self.bulk
42    }
43
44    #[inline(always)]
45    pub fn suffix(&self) -> T {
46        self.suffix
47    }
48
49    /// The length (in bits) of the portion of the bitmap found in prefix.
50    #[inline(always)]
51    pub fn prefix_bitlen(&self) -> usize {
52        self.prefix_len as usize
53    }
54
55    /// The length (in bits) of the portion of the bitmap found in bulk.
56    #[inline(always)]
57    pub fn bulk_bitlen(&self) -> usize {
58        8 * size_of::<T>() * self.bulk.len()
59    }
60
61    /// The length (in bits) of the portion of the bitmap found in suffix.
62    #[inline(always)]
63    pub fn suffix_bitlen(&self) -> usize {
64        self.suffix_len as usize
65    }
66
67    pub fn new(mut bytes: &'a [u8], mut offset: usize, len: usize) -> Self {
68        if len == 0 {
69            return Self::default();
70        }
71
72        assert!(bytes.len() * 8 >= offset + len);
73
74        // Strip off useless bytes from start.
75        let start_byte_idx = offset / 8;
76        bytes = &bytes[start_byte_idx..];
77        offset %= 8;
78
79        // Fast-path: fits entirely in one chunk.
80        let chunk_len = size_of::<T>();
81        let chunk_len_bits = 8 * chunk_len;
82        if offset + len <= chunk_len_bits {
83            let mut prefix = load_chunk_le::<T>(bytes) >> offset;
84            if len < chunk_len_bits {
85                prefix &= (T::one() << len) - T::one();
86            }
87            return Self {
88                prefix,
89                prefix_len: len as u32,
90                ..Self::default()
91            };
92        }
93
94        // Find how many bytes from the start our aligned section would start.
95        let mut align_offset = bytes.as_ptr().align_offset(chunk_len);
96        let mut align_offset_bits = 8 * align_offset;
97
98        // Oops, the original pointer was already aligned, but our offset means
99        // we can't start there, start one chunk later.
100        if offset > align_offset_bits {
101            align_offset_bits += chunk_len_bits;
102            align_offset += chunk_len;
103        }
104
105        // Calculate based on this the lengths of our sections (in bits).
106        let prefix_len = (align_offset_bits - offset).min(len);
107        let rest_len = len - prefix_len;
108        let suffix_len = rest_len % chunk_len_bits;
109        let bulk_len = rest_len - suffix_len;
110        debug_assert!(prefix_len < chunk_len_bits);
111        debug_assert!(bulk_len % chunk_len_bits == 0);
112        debug_assert!(suffix_len < chunk_len_bits);
113
114        // Now we just have to load.
115        let (prefix_bytes, rest_bytes) = bytes.split_at(align_offset);
116        let (bulk_bytes, suffix_bytes) = rest_bytes.split_at(bulk_len / 8);
117        let mut prefix = load_chunk_le::<T>(prefix_bytes) >> offset;
118        let mut suffix = load_chunk_le::<T>(suffix_bytes);
119        prefix &= (T::one() << prefix_len) - T::one();
120        suffix &= (T::one() << suffix_len) - T::one();
121        Self {
122            prefix,
123            bulk: bytemuck::cast_slice(bulk_bytes),
124            suffix,
125            prefix_len: prefix_len as u32,
126            suffix_len: suffix_len as u32,
127        }
128    }
129}