polars_arrow/legacy/kernels/
mod.rs1use std::iter::Enumerate;
2
3use crate::array::BooleanArray;
4use crate::bitmap::utils::BitChunks;
5pub mod concatenate;
6pub mod ewm;
7pub mod rolling;
8pub mod set;
9pub mod sort_partition;
10#[cfg(feature = "performant")]
11pub mod sorted_join;
12#[cfg(feature = "strings")]
13pub mod string;
14pub mod take_agg;
15mod time;
16
17#[cfg(feature = "timezones")]
18pub use time::{convert_to_naive_local, convert_to_naive_local_opt};
19pub use time::{Ambiguous, NonExistent};
20
21#[derive(Debug, PartialEq)]
23enum State {
24 Bits(u64),
26 Chunks,
28 Remainder,
30 Finish,
32}
33
34#[derive(Debug)]
40struct MaskedSlicesIterator<'a> {
41 iter: Enumerate<BitChunks<'a, u64>>,
42 state: State,
43 remainder_mask: u64,
44 remainder_len: usize,
45 chunk_len: usize,
46 len: usize,
47 start: usize,
48 on_region: bool,
49 current_chunk: usize,
50 current_bit: usize,
51 total_len: usize,
52}
53
54impl<'a> MaskedSlicesIterator<'a> {
55 pub(crate) fn new(mask: &'a BooleanArray) -> Self {
56 let chunks = mask.values().chunks::<u64>();
57
58 let chunk_len = mask.len() / 64;
59 let remainder_len = chunks.remainder_len();
60 let remainder_mask = chunks.remainder();
61
62 Self {
63 iter: chunks.enumerate(),
64 state: State::Chunks,
65 remainder_len,
66 chunk_len,
67 remainder_mask,
68 len: 0,
69 start: 0,
70 on_region: false,
71 current_chunk: 0,
72 current_bit: 0,
73 total_len: mask.len(),
74 }
75 }
76
77 #[inline]
78 fn current_start(&self) -> usize {
79 self.current_chunk * 64 + self.current_bit
80 }
81
82 #[inline]
83 fn iterate_bits(&mut self, mask: u64, max: usize) -> Option<(usize, usize)> {
84 while self.current_bit < max {
85 if (mask & (1 << self.current_bit)) != 0 {
86 if !self.on_region {
87 self.start = self.current_start();
88 self.on_region = true;
89 }
90 self.len += 1;
91 } else if self.on_region {
92 let result = (self.start, self.start + self.len);
93 self.len = 0;
94 self.on_region = false;
95 self.current_bit += 1;
96 return Some(result);
97 }
98 self.current_bit += 1;
99 }
100 self.current_bit = 0;
101 None
102 }
103
104 #[inline]
106 fn iterate_chunks(&mut self) -> Option<(usize, usize)> {
107 while let Some((i, mask)) = self.iter.next() {
108 self.current_chunk = i;
109 if mask == 0 {
110 if self.on_region {
111 let result = (self.start, self.start + self.len);
112 self.len = 0;
113 self.on_region = false;
114 return Some(result);
115 }
116 } else if mask == u64::MAX {
117 if !self.on_region {
119 self.start = self.current_start();
120 self.on_region = true;
121 }
122 self.len += 64;
123 } else {
124 self.state = State::Bits(mask);
126 return None;
127 }
128 }
129 self.current_chunk = self.chunk_len;
131 self.state = State::Remainder;
132 None
133 }
134}
135
136impl Iterator for MaskedSlicesIterator<'_> {
137 type Item = (usize, usize);
138
139 fn next(&mut self) -> Option<Self::Item> {
140 match self.state {
141 State::Chunks => {
142 match self.iterate_chunks() {
143 None => {
144 self.current_bit = 0;
146 self.next()
147 },
148 other => other,
149 }
150 },
151 State::Bits(mask) => {
152 match self.iterate_bits(mask, 64) {
153 None => {
154 self.state = State::Chunks;
157 self.next()
158 },
159 other => other,
160 }
161 },
162 State::Remainder => match self.iterate_bits(self.remainder_mask, self.remainder_len) {
163 None => {
164 self.state = State::Finish;
165 if self.on_region {
166 Some((self.start, self.start + self.len))
167 } else {
168 None
169 }
170 },
171 other => other,
172 },
173 State::Finish => None,
174 }
175 }
176}
177
178#[derive(Eq, PartialEq, Debug)]
179enum BinaryMaskedState {
180 Start,
181 LastFalse,
183 LastTrue,
185 Finish,
186}
187
188pub(crate) struct BinaryMaskedSliceIterator<'a> {
189 slice_iter: MaskedSlicesIterator<'a>,
190 filled: usize,
191 low: usize,
192 high: usize,
193 state: BinaryMaskedState,
194}
195
196impl<'a> BinaryMaskedSliceIterator<'a> {
197 pub(crate) fn new(mask: &'a BooleanArray) -> Self {
198 Self {
199 slice_iter: MaskedSlicesIterator::new(mask),
200 filled: 0,
201 low: 0,
202 high: 0,
203 state: BinaryMaskedState::Start,
204 }
205 }
206}
207
208impl Iterator for BinaryMaskedSliceIterator<'_> {
209 type Item = (usize, usize, bool);
210
211 fn next(&mut self) -> Option<Self::Item> {
212 use BinaryMaskedState::*;
213
214 match self.state {
215 Start => {
216 if self.low == 0 && self.high == 0 {
218 match self.slice_iter.next() {
219 Some((low, high)) => {
220 self.low = low;
221 self.high = high;
222
223 if low > 0 {
224 Some((0, low, false))
226 } else {
227 self.state = LastTrue;
228 self.filled = high;
229 Some((low, high, true))
230 }
231 },
232 None => {
233 self.state = Finish;
234 Some((self.filled, self.slice_iter.total_len, false))
235 },
236 }
237 } else {
238 self.filled = self.high;
239 self.state = LastTrue;
240 Some((self.low, self.high, true))
241 }
242 },
243 LastFalse => {
244 self.state = LastTrue;
245 self.filled = self.high;
246 Some((self.low, self.high, true))
247 },
248 LastTrue => match self.slice_iter.next() {
249 Some((low, high)) => {
250 self.low = low;
251 self.high = high;
252 self.state = LastFalse;
253 let last_filled = self.filled;
254 self.filled = low;
255 Some((last_filled, low, false))
256 },
257 None => {
258 self.state = Finish;
259 if self.filled != self.slice_iter.total_len {
260 Some((self.filled, self.slice_iter.total_len, false))
261 } else {
262 None
263 }
264 },
265 },
266 Finish => None,
267 }
268 }
269}
270
271#[cfg(test)]
272mod test {
273 use super::*;
274
275 #[test]
276 fn test_binary_masked_slice_iter() {
277 let mask = BooleanArray::from_slice([false, false, true, true, true, false, false]);
278
279 let out = BinaryMaskedSliceIterator::new(&mask)
280 .map(|(_, _, b)| b)
281 .collect::<Vec<_>>();
282 assert_eq!(out, &[false, true, false]);
283 let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();
284 assert_eq!(out, &[(0, 2, false), (2, 5, true), (5, 7, false)]);
285 let mask = BooleanArray::from_slice([true, true, false, true]);
286 let out = BinaryMaskedSliceIterator::new(&mask)
287 .map(|(_, _, b)| b)
288 .collect::<Vec<_>>();
289 assert_eq!(out, &[true, false, true]);
290 let mask = BooleanArray::from_slice([true, true, false, true, true]);
291 let out = BinaryMaskedSliceIterator::new(&mask)
292 .map(|(_, _, b)| b)
293 .collect::<Vec<_>>();
294 assert_eq!(out, &[true, false, true]);
295 }
296
297 #[test]
298 fn test_binary_slice_mask_iter_with_false() {
299 let mask = BooleanArray::from_slice([false, false]);
300 let out = BinaryMaskedSliceIterator::new(&mask).collect::<Vec<_>>();
301 assert_eq!(out, &[(0, 2, false)]);
302 }
303}