polars_arrow/ffi/
mmap.rs

1//! Functionality to mmap in-memory data regions.
2use std::sync::Arc;
3
4use polars_error::{polars_bail, PolarsResult};
5
6use super::{ArrowArray, InternalArrowArray};
7use crate::array::{BooleanArray, FromFfi, PrimitiveArray};
8use crate::datatypes::ArrowDataType;
9use crate::types::NativeType;
10
11#[allow(dead_code)]
12struct PrivateData<T> {
13    // the owner of the pointers' regions
14    data: T,
15    buffers_ptr: Box<[*const std::os::raw::c_void]>,
16    children_ptr: Box<[*mut ArrowArray]>,
17    dictionary_ptr: Option<*mut ArrowArray>,
18}
19
20pub(crate) unsafe fn create_array<
21    T,
22    I: Iterator<Item = Option<*const u8>>,
23    II: Iterator<Item = ArrowArray>,
24>(
25    data: Arc<T>,
26    num_rows: usize,
27    null_count: usize,
28    buffers: I,
29    children: II,
30    dictionary: Option<ArrowArray>,
31    offset: Option<usize>,
32) -> ArrowArray {
33    let buffers_ptr = buffers
34        .map(|maybe_buffer| match maybe_buffer {
35            Some(b) => b as *const std::os::raw::c_void,
36            None => std::ptr::null(),
37        })
38        .collect::<Box<[_]>>();
39    let n_buffers = buffers_ptr.len() as i64;
40
41    let children_ptr = children
42        .map(|child| Box::into_raw(Box::new(child)))
43        .collect::<Box<_>>();
44    let n_children = children_ptr.len() as i64;
45
46    let dictionary_ptr = dictionary.map(|array| Box::into_raw(Box::new(array)));
47
48    let mut private_data = Box::new(PrivateData::<Arc<T>> {
49        data,
50        buffers_ptr,
51        children_ptr,
52        dictionary_ptr,
53    });
54
55    ArrowArray {
56        length: num_rows as i64,
57        null_count: null_count as i64,
58        offset: offset.unwrap_or(0) as i64, // Unwrap: IPC files are by definition not offset
59        n_buffers,
60        n_children,
61        buffers: private_data.buffers_ptr.as_mut_ptr(),
62        children: private_data.children_ptr.as_mut_ptr(),
63        dictionary: private_data.dictionary_ptr.unwrap_or(std::ptr::null_mut()),
64        release: Some(release::<Arc<T>>),
65        private_data: Box::into_raw(private_data) as *mut ::std::os::raw::c_void,
66    }
67}
68
69/// callback used to drop [`ArrowArray`] when it is exported specified for [`PrivateData`].
70unsafe extern "C" fn release<T>(array: *mut ArrowArray) {
71    if array.is_null() {
72        return;
73    }
74    let array = &mut *array;
75
76    // take ownership of `private_data`, therefore dropping it
77    let private = Box::from_raw(array.private_data as *mut PrivateData<T>);
78    for child in private.children_ptr.iter() {
79        let _ = Box::from_raw(*child);
80    }
81
82    if let Some(ptr) = private.dictionary_ptr {
83        let _ = Box::from_raw(ptr);
84    }
85
86    array.release = None;
87}
88
89/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
90/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
91///
92/// This can be useful if you want to apply arrow kernels on slices without incurring
93/// a memcopy cost.
94///
95/// # Safety
96///
97/// Using this function is not unsafe, but the returned PrimitiveArray's lifetime is bound to the lifetime
98/// of the slice. The returned [`PrimitiveArray`] _must not_ outlive the passed slice.
99pub unsafe fn slice<T: NativeType>(slice: &[T]) -> PrimitiveArray<T> {
100    slice_and_owner(slice, ())
101}
102
103/// Creates a (non-null) [`PrimitiveArray`] from a slice of values.
104/// This does not have memcopy and is the fastest way to create a [`PrimitiveArray`].
105///
106/// This can be useful if you want to apply arrow kernels on slices without incurring
107/// a memcopy cost.
108///
109/// # Safety
110///
111/// The caller must ensure the passed `owner` ensures the data remains alive.
112pub unsafe fn slice_and_owner<T: NativeType, O>(slice: &[T], owner: O) -> PrimitiveArray<T> {
113    let num_rows = slice.len();
114    let null_count = 0;
115    let validity = None;
116
117    let data: &[u8] = bytemuck::cast_slice(slice);
118    let ptr = data.as_ptr();
119    let data = Arc::new(owner);
120
121    // SAFETY: the underlying assumption of this function: the array will not be used
122    // beyond the
123    let array = create_array(
124        data,
125        num_rows,
126        null_count,
127        [validity, Some(ptr)].into_iter(),
128        [].into_iter(),
129        None,
130        None,
131    );
132    let array = InternalArrowArray::new(array, T::PRIMITIVE.into());
133
134    // SAFETY: we just created a valid array
135    unsafe { PrimitiveArray::<T>::try_from_ffi(array) }.unwrap()
136}
137
138/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
139/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
140///
141/// This can be useful if you want to apply arrow kernels on slices without
142/// incurring a memcopy cost.
143///
144/// The `offset` indicates where the first bit starts in the first byte.
145///
146/// # Safety
147///
148/// Using this function is not unsafe, but the returned BooleanArrays's lifetime
149/// is bound to the lifetime of the slice. The returned [`BooleanArray`] _must
150/// not_ outlive the passed slice.
151pub unsafe fn bitmap(data: &[u8], offset: usize, length: usize) -> PolarsResult<BooleanArray> {
152    bitmap_and_owner(data, offset, length, ())
153}
154
155/// Creates a (non-null) [`BooleanArray`] from a slice of bits.
156/// This does not have memcopy and is the fastest way to create a [`BooleanArray`].
157///
158/// This can be useful if you want to apply arrow kernels on slices without
159/// incurring a memcopy cost.
160///
161/// The `offset` indicates where the first bit starts in the first byte.
162///
163/// # Safety
164///
165/// The caller must ensure the passed `owner` ensures the data remains alive.
166pub unsafe fn bitmap_and_owner<O>(
167    data: &[u8],
168    offset: usize,
169    length: usize,
170    owner: O,
171) -> PolarsResult<BooleanArray> {
172    if offset >= 8 {
173        polars_bail!(InvalidOperation: "offset should be < 8")
174    };
175    if length > data.len() * 8 - offset {
176        polars_bail!(InvalidOperation: "given length is oob")
177    }
178    let null_count = 0;
179    let validity = None;
180
181    let ptr = data.as_ptr();
182    let data = Arc::new(owner);
183
184    // SAFETY: the underlying assumption of this function: the array will not be used
185    // beyond the
186    let array = create_array(
187        data,
188        length,
189        null_count,
190        [validity, Some(ptr)].into_iter(),
191        [].into_iter(),
192        None,
193        Some(offset),
194    );
195    let array = InternalArrowArray::new(array, ArrowDataType::Boolean);
196
197    // SAFETY: we just created a valid array
198    Ok(unsafe { BooleanArray::try_from_ffi(array) }.unwrap())
199}