polars_utils/
index.rs

1use std::fmt::{Debug, Formatter};
2
3use polars_error::{polars_ensure, PolarsResult};
4
5use crate::nulls::IsNull;
6
7#[cfg(not(feature = "bigidx"))]
8pub type IdxSize = u32;
9#[cfg(feature = "bigidx")]
10pub type IdxSize = u64;
11
12#[cfg(not(feature = "bigidx"))]
13pub type NonZeroIdxSize = std::num::NonZeroU32;
14#[cfg(feature = "bigidx")]
15pub type NonZeroIdxSize = std::num::NonZeroU64;
16
17#[cfg(not(feature = "bigidx"))]
18pub type AtomicIdxSize = std::sync::atomic::AtomicU32;
19#[cfg(feature = "bigidx")]
20pub type AtomicIdxSize = std::sync::atomic::AtomicU64;
21
22#[derive(Clone, Copy)]
23#[repr(transparent)]
24pub struct NullableIdxSize {
25    pub inner: IdxSize,
26}
27
28impl PartialEq<Self> for NullableIdxSize {
29    fn eq(&self, other: &Self) -> bool {
30        self.inner == other.inner
31    }
32}
33
34impl Eq for NullableIdxSize {}
35
36unsafe impl bytemuck::Zeroable for NullableIdxSize {}
37unsafe impl bytemuck::AnyBitPattern for NullableIdxSize {}
38unsafe impl bytemuck::NoUninit for NullableIdxSize {}
39
40impl Debug for NullableIdxSize {
41    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
42        write!(f, "{:?}", self.inner)
43    }
44}
45
46impl NullableIdxSize {
47    #[inline(always)]
48    pub fn is_null_idx(&self) -> bool {
49        // The left/right join maintain_order algorithms depend on the special value for sorting
50        self.inner == IdxSize::MAX
51    }
52
53    #[inline(always)]
54    pub const fn null() -> Self {
55        Self {
56            inner: IdxSize::MAX,
57        }
58    }
59
60    #[inline(always)]
61    pub fn idx(&self) -> IdxSize {
62        self.inner
63    }
64
65    #[inline(always)]
66    pub fn to_opt(&self) -> Option<IdxSize> {
67        if self.is_null_idx() {
68            None
69        } else {
70            Some(self.idx())
71        }
72    }
73}
74
75impl From<IdxSize> for NullableIdxSize {
76    #[inline(always)]
77    fn from(value: IdxSize) -> Self {
78        Self { inner: value }
79    }
80}
81
82pub trait Bounded {
83    fn len(&self) -> usize;
84
85    fn is_empty(&self) -> bool {
86        self.len() == 0
87    }
88}
89
90pub trait NullCount {
91    fn null_count(&self) -> usize {
92        0
93    }
94}
95
96impl<T: NullCount> NullCount for &T {
97    fn null_count(&self) -> usize {
98        (*self).null_count()
99    }
100}
101
102impl<T> Bounded for &[T] {
103    fn len(&self) -> usize {
104        <[T]>::len(self)
105    }
106}
107
108impl<T> NullCount for &[T] {
109    fn null_count(&self) -> usize {
110        0
111    }
112}
113
114pub trait Indexable {
115    type Item: IsNull;
116
117    fn get(&self, i: usize) -> Self::Item;
118
119    /// # Safety
120    /// Doesn't do any bound checks.
121    unsafe fn get_unchecked(&self, i: usize) -> Self::Item;
122}
123
124impl<T: Copy + IsNull> Indexable for &[T] {
125    type Item = T;
126
127    fn get(&self, i: usize) -> Self::Item {
128        self[i]
129    }
130
131    /// # Safety
132    /// Doesn't do any bound checks.
133    unsafe fn get_unchecked(&self, i: usize) -> Self::Item {
134        *<[T]>::get_unchecked(self, i)
135    }
136}
137
138pub fn check_bounds(idx: &[IdxSize], len: IdxSize) -> PolarsResult<()> {
139    // We iterate in large uninterrupted chunks to help auto-vectorization.
140    let Some(max_idx) = idx.iter().copied().max() else {
141        return Ok(());
142    };
143
144    polars_ensure!(max_idx < len, OutOfBounds: "indices are out of bounds");
145    Ok(())
146}
147
148pub trait ToIdx {
149    fn to_idx(self, len: u64) -> IdxSize;
150}
151
152macro_rules! impl_to_idx {
153    ($ty:ty) => {
154        impl ToIdx for $ty {
155            #[inline]
156            fn to_idx(self, _len: u64) -> IdxSize {
157                self as IdxSize
158            }
159        }
160    };
161    ($ty:ty, $ity:ty) => {
162        impl ToIdx for $ty {
163            #[inline]
164            fn to_idx(self, len: u64) -> IdxSize {
165                let idx = self as $ity;
166                if idx < 0 {
167                    (idx + len as $ity) as IdxSize
168                } else {
169                    idx as IdxSize
170                }
171            }
172        }
173    };
174}
175
176impl_to_idx!(u8);
177impl_to_idx!(u16);
178impl_to_idx!(u32);
179impl_to_idx!(u64);
180impl_to_idx!(i8, i16);
181impl_to_idx!(i16, i32);
182impl_to_idx!(i32, i64);
183impl_to_idx!(i64, i64);
184
185// Allows for 2^24 (~16M) chunks
186// Leaves 2^40 (~1T) rows per chunk
187const DEFAULT_CHUNK_BITS: u64 = 24;
188
189#[derive(Clone, Copy)]
190#[repr(transparent)]
191pub struct ChunkId<const CHUNK_BITS: u64 = DEFAULT_CHUNK_BITS> {
192    swizzled: u64,
193}
194
195impl<const CHUNK_BITS: u64> Debug for ChunkId<CHUNK_BITS> {
196    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
197        if self.is_null() {
198            write!(f, "NULL")
199        } else {
200            let (chunk, row) = self.extract();
201            write!(f, "({chunk}, {row})")
202        }
203    }
204}
205
206impl<const CHUNK_BITS: u64> ChunkId<CHUNK_BITS> {
207    #[inline(always)]
208    pub const fn null() -> Self {
209        Self { swizzled: u64::MAX }
210    }
211
212    #[inline(always)]
213    pub fn is_null(&self) -> bool {
214        self.swizzled == u64::MAX
215    }
216
217    #[inline(always)]
218    #[allow(clippy::unnecessary_cast)]
219    pub fn store(chunk: IdxSize, row: IdxSize) -> Self {
220        debug_assert!(chunk < !(u64::MAX << CHUNK_BITS) as IdxSize);
221        let swizzled = ((row as u64) << CHUNK_BITS) | chunk as u64;
222
223        Self { swizzled }
224    }
225
226    #[inline(always)]
227    #[allow(clippy::unnecessary_cast)]
228    pub fn extract(self) -> (IdxSize, IdxSize) {
229        let row = (self.swizzled >> CHUNK_BITS) as IdxSize;
230        let mask = (1u64 << CHUNK_BITS) - 1;
231        let chunk = (self.swizzled & mask) as IdxSize;
232        (chunk, row)
233    }
234
235    #[inline(always)]
236    pub fn inner_mut(&mut self) -> &mut u64 {
237        &mut self.swizzled
238    }
239
240    pub fn from_inner(inner: u64) -> Self {
241        Self { swizzled: inner }
242    }
243
244    pub fn into_inner(self) -> u64 {
245        self.swizzled
246    }
247}
248
249#[cfg(test)]
250mod test {
251    use super::*;
252
253    #[test]
254    fn test_chunk_idx() {
255        let chunk = 213908;
256        let row = 813457;
257
258        let ci: ChunkId = ChunkId::store(chunk, row);
259        let (c, r) = ci.extract();
260
261        assert_eq!(c, chunk);
262        assert_eq!(r, row);
263    }
264}