polars_arrow/legacy/kernels/
mod.rs

1use std::iter::Enumerate;
2
3use crate::array::BooleanArray;
4use crate::bitmap::utils::BitChunks;
5pub mod concatenate;
6pub mod ewm;
7pub mod rolling;
8pub mod set;
9pub mod sort_partition;
10#[cfg(feature = "performant")]
11pub mod sorted_join;
12#[cfg(feature = "strings")]
13pub mod string;
14pub mod take_agg;
15mod time;
16
17#[cfg(feature = "timezones")]
18pub use time::{convert_to_naive_local, convert_to_naive_local_opt};
19pub use time::{Ambiguous, NonExistent};
20
21/// Internal state of [SlicesIterator]
22#[derive(Debug, PartialEq)]
23enum State {
24    // it is iterating over bits of a mask (`u64`, steps of size of 1 slot)
25    Bits(u64),
26    // it is iterating over chunks (steps of size of 64 slots)
27    Chunks,
28    // it is iterating over the remaining bits (steps of size of 1 slot)
29    Remainder,
30    // nothing more to iterate.
31    Finish,
32}
33
34/// Forked and modified from arrow crate.
35///
36/// An iterator of `(usize, usize)` each representing an interval `[start,end[` whose
37/// slots of a [BooleanArray] are true. Each interval corresponds to a contiguous region of memory to be
38/// "taken" from an array to be filtered.
39#[derive(Debug)]
40struct MaskedSlicesIterator<'a> {
41    iter: Enumerate<BitChunks<'a, u64>>,
42    state: State,
43    remainder_mask: u64,
44    remainder_len: usize,
45    chunk_len: usize,
46    len: usize,
47    start: usize,
48    on_region: bool,
49    current_chunk: usize,
50    current_bit: usize,
51    total_len: usize,
52}
53
54impl<'a> MaskedSlicesIterator<'a> {
55    pub(crate) fn new(mask: &'a BooleanArray) -> Self {
56        let chunks = mask.values().chunks::<u64>();
57
58        let chunk_len = mask.len() / 64;
59        let remainder_len = chunks.remainder_len();
60        let remainder_mask = chunks.remainder();
61
62        Self {
63            iter: chunks.enumerate(),
64            state: State::Chunks,
65            remainder_len,
66            chunk_len,
67            remainder_mask,
68            len: 0,
69            start: 0,
70            on_region: false,
71            current_chunk: 0,
72            current_bit: 0,
73            total_len: mask.len(),
74        }
75    }
76
77    #[inline]
78    fn current_start(&self) -> usize {
79        self.current_chunk * 64 + self.current_bit
80    }
81
82    #[inline]
83    fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {
84        while self.current_bit < max {
85            if (mask & (1 << self.current_bit)) != 0 {
86                if !self.on_region {
87                    self.start = self.current_start();
88                    self.on_region = true;
89                }
90                self.len += 1;
91            } else if self.on_region {
92                let result = (self.start, self.start + self.len);
93                self.len = 0;
94                self.on_region = false;
95                self.current_bit += 1;
96                return Some(result);
97            }
98            self.current_bit += 1;
99        }
100        self.current_bit = 0;
101        None
102    }
103
104    /// iterates over chunks.
105    #[inline]
106    fn iterate_chunks(&mut self) -> Option<(usize, usize)> {
107        while let Some((i, mask)) = self.iter.next() {
108            self.current_chunk = i;
109            if mask == 0 {
110                if self.on_region {
111                    let result = (self.start, self.start + self.len);
112                    self.len = 0;
113                    self.on_region = false;
114                    return Some(result);
115                }
116            } else if mask == u64::MAX {
117                // = !0u64
118                if !self.on_region {
119                    self.start = self.current_start();
120                    self.on_region = true;
121                }
122                self.len += 64;
123            } else {
124                // there is a chunk that has a non-trivial mask => iterate over bits.
125                self.state = State::Bits(mask);
126                return None;
127            }
128        }
129        // no more chunks => start iterating over the remainder
130        self.current_chunk = self.chunk_len;
131        self.state = State::Remainder;
132        None
133    }
134}
135
136impl Iterator for MaskedSlicesIterator<'_> {
137    type Item = (usize, usize);
138
139    fn next(&mut self) -> Option<Self::Item> {
140        match self.state {
141            State::Chunks => {
142                match self.iterate_chunks() {
143                    None => {
144                        // iterating over chunks does not yield any new slice => continue to the next
145                        self.current_bit = 0;
146                        self.next()
147                    },
148                    other => other,
149                }
150            },
151            State::Bits(mask) => {
152                match self.iterate_bits(mask, 64) {
153                    None => {
154                        // iterating over bits does not yield any new slice => change back
155                        // to chunks and continue to the next
156                        self.state = State::Chunks;
157                        self.next()
158                    },
159                    other => other,
160                }
161            },
162            State::Remainder => match self.iterate_bits(self.remainder_mask, self.remainder_len) {
163                None => {
164                    self.state = State::Finish;
165                    if self.on_region {
166                        Some((self.start, self.start + self.len))
167                    } else {
168                        None
169                    }
170                },
171                other => other,
172            },
173            State::Finish => None,
174        }
175    }
176}
177
178#[derive(Eq, PartialEq, Debug)]
179enum BinaryMaskedState {
180    Start,
181    // Last masks were false values
182    LastFalse,
183    // Last masks were true values
184    LastTrue,
185    Finish,
186}
187
188pub(crate) struct BinaryMaskedSliceIterator<'a> {
189    slice_iter: MaskedSlicesIterator<'a>,
190    filled: usize,
191    low: usize,
192    high: usize,
193    state: BinaryMaskedState,
194}
195
196impl<'a> BinaryMaskedSliceIterator<'a> {
197    pub(crate) fn new(mask: &'a BooleanArray) -> Self {
198        Self {
199            slice_iter: MaskedSlicesIterator::new(mask),
200            filled: 0,
201            low: 0,
202            high: 0,
203            state: BinaryMaskedState::Start,
204        }
205    }
206}
207
208impl Iterator for BinaryMaskedSliceIterator<'_> {
209    type Item = (usize, usize, bool);
210
211    fn next(&mut self) -> Option<Self::Item> {
212        use BinaryMaskedState::*;
213
214        match self.state {
215            Start => {
216                // first iteration
217                if self.low == 0 && self.high == 0 {
218                    match self.slice_iter.next() {
219                        Some((low, high)) => {
220                            self.low = low;
221                            self.high = high;
222
223                            if low > 0 {
224                                // do another start iteration.
225                                Some((0, low, false))
226                            } else {
227                                self.state = LastTrue;
228                                self.filled = high;
229                                Some((low, high, true))
230                            }
231                        },
232                        None => {
233                            self.state = Finish;
234                            Some((self.filled, self.slice_iter.total_len, false))
235                        },
236                    }
237                } else {
238                    self.filled = self.high;
239                    self.state = LastTrue;
240                    Some((self.low, self.high, true))
241                }
242            },
243            LastFalse => {
244                self.state = LastTrue;
245                self.filled = self.high;
246                Some((self.low, self.high, true))
247            },
248            LastTrue => match self.slice_iter.next() {
249                Some((low, high)) => {
250                    self.low = low;
251                    self.high = high;
252                    self.state = LastFalse;
253                    let last_filled = self.filled;
254                    self.filled = low;
255                    Some((last_filled, low, false))
256                },
257                None => {
258                    self.state = Finish;
259                    if self.filled != self.slice_iter.total_len {
260                        Some((self.filled, self.slice_iter.total_len, false))
261                    } else {
262                        None
263                    }
264                },
265            },
266            Finish => None,
267        }
268    }
269}
270
271#[cfg(test)]
272mod test {
273    use super::*;
274
275    #[test]
276    fn test_binary_masked_slice_iter() {
277        let mask = BooleanArray::from_slice([false, false, true, true, true, false, false]);
278
279        let out = BinaryMaskedSliceIterator::new(&mask)
280            .map(|(_, _, b)| b)
281            .collect::<Vec<_>>();
282        assert_eq!(out, &[false, true, false]);
283        let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();
284        assert_eq!(out, &[(0, 2, false), (2, 5, true), (5, 7, false)]);
285        let mask = BooleanArray::from_slice([true, true, false, true]);
286        let out = BinaryMaskedSliceIterator::new(&mask)
287            .map(|(_, _, b)| b)
288            .collect::<Vec<_>>();
289        assert_eq!(out, &[true, false, true]);
290        let mask = BooleanArray::from_slice([true, true, false, true, true]);
291        let out = BinaryMaskedSliceIterator::new(&mask)
292            .map(|(_, _, b)| b)
293            .collect::<Vec<_>>();
294        assert_eq!(out, &[true, false, true]);
295    }
296
297    #[test]
298    fn test_binary_slice_mask_iter_with_false() {
299        let mask = BooleanArray::from_slice([false, false]);
300        let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();
301        assert_eq!(out, &[(0, 2, false)]);
302    }
303}