polars_arrow/types/
bit_chunk.rs

1use std::fmt::Binary;
2use std::ops::{BitAndAssign, Not, Shl, ShlAssign, ShrAssign};
3
4use num_traits::PrimInt;
5
6use super::NativeType;
7
8/// A chunk of bits. This is used to create masks of a given length
9/// whose width is `1` bit. In `portable_simd` notation, this corresponds to `m1xY`.
10///
11/// This (sealed) trait is implemented for [`u8`], [`u16`], [`u32`] and [`u64`].
12pub trait BitChunk:
13    super::private::Sealed
14    + PrimInt
15    + NativeType
16    + Binary
17    + ShlAssign
18    + Not<Output = Self>
19    + ShrAssign<usize>
20    + ShlAssign<usize>
21    + Shl<usize, Output = Self>
22    + BitAndAssign
23{
24    /// convert itself into bytes.
25    fn to_ne_bytes(self) -> Self::Bytes;
26    /// convert itself from bytes.
27    fn from_ne_bytes(v: Self::Bytes) -> Self;
28}
29
30macro_rules! bit_chunk {
31    ($ty:ty) => {
32        impl BitChunk for $ty {
33            #[inline(always)]
34            fn to_ne_bytes(self) -> Self::Bytes {
35                self.to_ne_bytes()
36            }
37
38            #[inline(always)]
39            fn from_ne_bytes(v: Self::Bytes) -> Self {
40                Self::from_ne_bytes(v)
41            }
42        }
43    };
44}
45
46bit_chunk!(u8);
47bit_chunk!(u16);
48bit_chunk!(u32);
49bit_chunk!(u64);
50
51/// An [`Iterator<Item=bool>`] over a [`BitChunk`].
52///
53/// This iterator is often compiled to SIMD.
54///
55/// The [LSB](https://en.wikipedia.org/wiki/Bit_numbering#Least_significant_bit) corresponds
56/// to the first slot, as defined by the arrow specification.
57/// # Example
58/// ```
59/// use polars_arrow::types::BitChunkIter;
60/// let a = 0b00010000u8;
61/// let iter = BitChunkIter::new(a, 7);
62/// let r = iter.collect::<Vec<_>>();
63/// assert_eq!(r, vec![false, false, false, false, true, false, false]);
64/// ```
65pub struct BitChunkIter<T: BitChunk> {
66    value: T,
67    mask: T,
68    remaining: usize,
69}
70
71impl<T: BitChunk> BitChunkIter<T> {
72    /// Creates a new [`BitChunkIter`] with `len` bits.
73    #[inline]
74    pub fn new(value: T, len: usize) -> Self {
75        assert!(len <= size_of::<T>() * 8);
76        Self {
77            value,
78            remaining: len,
79            mask: T::one(),
80        }
81    }
82}
83
84impl<T: BitChunk> Iterator for BitChunkIter<T> {
85    type Item = bool;
86
87    #[inline]
88    fn next(&mut self) -> Option<Self::Item> {
89        if self.remaining == 0 {
90            return None;
91        };
92        let result = Some(self.value & self.mask != T::zero());
93        self.remaining -= 1;
94        self.mask <<= 1;
95        result
96    }
97
98    #[inline]
99    fn size_hint(&self) -> (usize, Option<usize>) {
100        (self.remaining, Some(self.remaining))
101    }
102}
103
104// # Safety
105// a mathematical invariant of this iterator
106unsafe impl<T: BitChunk> crate::trusted_len::TrustedLen for BitChunkIter<T> {}
107
108/// An [`Iterator<Item=usize>`] over a [`BitChunk`] returning the index of each bit set in the chunk
109/// See <https://lemire.me/blog/2018/03/08/iterating-over-set-bits-quickly-simd-edition/> for details
110/// # Example
111/// ```
112/// use polars_arrow::types::BitChunkOnes;
113/// let a = 0b00010000u8;
114/// let iter = BitChunkOnes::new(a);
115/// let r = iter.collect::<Vec<_>>();
116/// assert_eq!(r, vec![4]);
117/// ```
118pub struct BitChunkOnes<T: BitChunk> {
119    value: T,
120    remaining: usize,
121}
122
123impl<T: BitChunk> BitChunkOnes<T> {
124    /// Creates a new [`BitChunkOnes`] with `len` bits.
125    #[inline]
126    pub fn new(value: T) -> Self {
127        Self {
128            value,
129            remaining: value.count_ones() as usize,
130        }
131    }
132
133    #[inline]
134    pub fn from_known_count(value: T, remaining: usize) -> Self {
135        Self { value, remaining }
136    }
137}
138
139impl<T: BitChunk> Iterator for BitChunkOnes<T> {
140    type Item = usize;
141
142    #[inline]
143    fn next(&mut self) -> Option<Self::Item> {
144        if self.remaining == 0 {
145            return None;
146        }
147        let v = self.value.trailing_zeros() as usize;
148        self.value &= self.value - T::one();
149
150        self.remaining -= 1;
151        Some(v)
152    }
153
154    #[inline]
155    fn size_hint(&self) -> (usize, Option<usize>) {
156        (self.remaining, Some(self.remaining))
157    }
158}
159
160// # Safety
161// a mathematical invariant of this iterator
162unsafe impl<T: BitChunk> crate::trusted_len::TrustedLen for BitChunkOnes<T> {}