polars_compute/comparisons/
view.rs

1use arrow::array::{BinaryViewArray, Utf8ViewArray};
2use arrow::bitmap::Bitmap;
3
4use super::TotalEqKernel;
5use crate::comparisons::TotalOrdKernel;
6
7// If s fits in 12 bytes, returns the view encoding it would have in a
8// BinaryViewArray.
9fn small_view_encoding(s: &[u8]) -> Option<u128> {
10    if s.len() > 12 {
11        return None;
12    }
13
14    let mut tmp = [0u8; 16];
15    tmp[0] = s.len() as u8;
16    tmp[4..4 + s.len()].copy_from_slice(s);
17    Some(u128::from_le_bytes(tmp))
18}
19
20// Loads (up to) the first 4 bytes of s as little-endian, padded with zeros.
21fn load_prefix(s: &[u8]) -> u32 {
22    let start = &s[..s.len().min(4)];
23    let mut tmp = [0u8; 4];
24    tmp[..start.len()].copy_from_slice(start);
25    u32::from_le_bytes(tmp)
26}
27
28fn broadcast_inequality(
29    arr: &BinaryViewArray,
30    scalar: &[u8],
31    cmp_prefix: impl Fn(u32, u32) -> bool,
32    cmp_str: impl Fn(&[u8], &[u8]) -> bool,
33) -> Bitmap {
34    let views = arr.views().as_slice();
35    let prefix = load_prefix(scalar);
36    let be_prefix = prefix.to_be();
37    Bitmap::from_trusted_len_iter((0..arr.len()).map(|i| unsafe {
38        let v_prefix = (views.get_unchecked(i).as_u128() >> 32) as u32;
39        if v_prefix != prefix {
40            cmp_prefix(v_prefix.to_be(), be_prefix)
41        } else {
42            cmp_str(arr.value_unchecked(i), scalar)
43        }
44    }))
45}
46
47impl TotalEqKernel for BinaryViewArray {
48    type Scalar = [u8];
49
50    fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
51        debug_assert!(self.len() == other.len());
52
53        let slf_views = self.views().as_slice();
54        let other_views = other.views().as_slice();
55
56        Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
57            let av = slf_views.get_unchecked(i).as_u128();
58            let bv = other_views.get_unchecked(i).as_u128();
59
60            // First 64 bits contain length and prefix.
61            let a_len_prefix = av as u64;
62            let b_len_prefix = bv as u64;
63            if a_len_prefix != b_len_prefix {
64                return false;
65            }
66
67            let alen = av as u32;
68            if alen <= 12 {
69                // String is fully inlined, compare top 64 bits. Bottom bits were
70                // tested equal before, which also ensures the lengths are equal.
71                (av >> 64) as u64 == (bv >> 64) as u64
72            } else {
73                self.value_unchecked(i) == other.value_unchecked(i)
74            }
75        }))
76    }
77
78    fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
79        debug_assert!(self.len() == other.len());
80
81        let slf_views = self.views().as_slice();
82        let other_views = other.views().as_slice();
83
84        Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
85            let av = slf_views.get_unchecked(i).as_u128();
86            let bv = other_views.get_unchecked(i).as_u128();
87
88            // First 64 bits contain length and prefix.
89            let a_len_prefix = av as u64;
90            let b_len_prefix = bv as u64;
91            if a_len_prefix != b_len_prefix {
92                return true;
93            }
94
95            let alen = av as u32;
96            if alen <= 12 {
97                // String is fully inlined, compare top 64 bits. Bottom bits were
98                // tested equal before, which also ensures the lengths are equal.
99                (av >> 64) as u64 != (bv >> 64) as u64
100            } else {
101                self.value_unchecked(i) != other.value_unchecked(i)
102            }
103        }))
104    }
105
106    fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
107        if let Some(val) = small_view_encoding(other) {
108            Bitmap::from_trusted_len_iter(self.views().iter().map(|v| v.as_u128() == val))
109        } else {
110            let slf_views = self.views().as_slice();
111            let prefix = u32::from_le_bytes(other[..4].try_into().unwrap());
112            let prefix_len = ((prefix as u64) << 32) | other.len() as u64;
113            Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
114                let v_prefix_len = slf_views.get_unchecked(i).as_u128() as u64;
115                if v_prefix_len != prefix_len {
116                    false
117                } else {
118                    self.value_unchecked(i) == other
119                }
120            }))
121        }
122    }
123
124    fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
125        if let Some(val) = small_view_encoding(other) {
126            Bitmap::from_trusted_len_iter(self.views().iter().map(|v| v.as_u128() != val))
127        } else {
128            let slf_views = self.views().as_slice();
129            let prefix = u32::from_le_bytes(other[..4].try_into().unwrap());
130            let prefix_len = ((prefix as u64) << 32) | other.len() as u64;
131            Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
132                let v_prefix_len = slf_views.get_unchecked(i).as_u128() as u64;
133                if v_prefix_len != prefix_len {
134                    true
135                } else {
136                    self.value_unchecked(i) != other
137                }
138            }))
139        }
140    }
141}
142
143impl TotalOrdKernel for BinaryViewArray {
144    type Scalar = [u8];
145
146    fn tot_lt_kernel(&self, other: &Self) -> Bitmap {
147        debug_assert!(self.len() == other.len());
148
149        let slf_views = self.views().as_slice();
150        let other_views = other.views().as_slice();
151
152        Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
153            let av = slf_views.get_unchecked(i).as_u128();
154            let bv = other_views.get_unchecked(i).as_u128();
155
156            // First 64 bits contain length and prefix.
157            // Only check prefix.
158            let a_prefix = (av >> 32) as u32;
159            let b_prefix = (bv >> 32) as u32;
160            if a_prefix != b_prefix {
161                a_prefix.to_be() < b_prefix.to_be()
162            } else {
163                self.value_unchecked(i) < other.value_unchecked(i)
164            }
165        }))
166    }
167
168    fn tot_le_kernel(&self, other: &Self) -> Bitmap {
169        debug_assert!(self.len() == other.len());
170
171        let slf_views = self.views().as_slice();
172        let other_views = other.views().as_slice();
173
174        Bitmap::from_trusted_len_iter((0..self.len()).map(|i| unsafe {
175            let av = slf_views.get_unchecked(i).as_u128();
176            let bv = other_views.get_unchecked(i).as_u128();
177
178            // First 64 bits contain length and prefix.
179            // Only check prefix.
180            let a_prefix = (av >> 32) as u32;
181            let b_prefix = (bv >> 32) as u32;
182            if a_prefix != b_prefix {
183                a_prefix.to_be() < b_prefix.to_be()
184            } else {
185                self.value_unchecked(i) <= other.value_unchecked(i)
186            }
187        }))
188    }
189
190    fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
191        broadcast_inequality(self, other, |a, b| a < b, |a, b| a < b)
192    }
193
194    fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
195        broadcast_inequality(self, other, |a, b| a <= b, |a, b| a <= b)
196    }
197
198    fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
199        broadcast_inequality(self, other, |a, b| a > b, |a, b| a > b)
200    }
201
202    fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
203        broadcast_inequality(self, other, |a, b| a >= b, |a, b| a >= b)
204    }
205}
206
207impl TotalEqKernel for Utf8ViewArray {
208    type Scalar = str;
209
210    fn tot_eq_kernel(&self, other: &Self) -> Bitmap {
211        self.to_binview().tot_eq_kernel(&other.to_binview())
212    }
213
214    fn tot_ne_kernel(&self, other: &Self) -> Bitmap {
215        self.to_binview().tot_ne_kernel(&other.to_binview())
216    }
217
218    fn tot_eq_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
219        self.to_binview().tot_eq_kernel_broadcast(other.as_bytes())
220    }
221
222    fn tot_ne_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
223        self.to_binview().tot_ne_kernel_broadcast(other.as_bytes())
224    }
225}
226
227impl TotalOrdKernel for Utf8ViewArray {
228    type Scalar = str;
229
230    fn tot_lt_kernel(&self, other: &Self) -> Bitmap {
231        self.to_binview().tot_lt_kernel(&other.to_binview())
232    }
233
234    fn tot_le_kernel(&self, other: &Self) -> Bitmap {
235        self.to_binview().tot_le_kernel(&other.to_binview())
236    }
237
238    fn tot_lt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
239        self.to_binview().tot_lt_kernel_broadcast(other.as_bytes())
240    }
241
242    fn tot_le_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
243        self.to_binview().tot_le_kernel_broadcast(other.as_bytes())
244    }
245
246    fn tot_gt_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
247        self.to_binview().tot_gt_kernel_broadcast(other.as_bytes())
248    }
249
250    fn tot_ge_kernel_broadcast(&self, other: &Self::Scalar) -> Bitmap {
251        self.to_binview().tot_ge_kernel_broadcast(other.as_bytes())
252    }
253}