polars_compute/gather/
generic_binary.rs

1use arrow::array::{GenericBinaryArray, PrimitiveArray};
2use arrow::bitmap::{Bitmap, BitmapBuilder};
3use arrow::buffer::Buffer;
4use arrow::offset::{Offset, Offsets, OffsetsBuffer};
5use polars_utils::vec::{CapacityByFactor, PushUnchecked};
6
7use super::Index;
8
9fn create_offsets<I: Iterator<Item = usize>, O: Offset>(
10    lengths: I,
11    idx_len: usize,
12) -> OffsetsBuffer<O> {
13    let mut length_so_far = O::default();
14    let mut offsets = Vec::with_capacity(idx_len + 1);
15    offsets.push(length_so_far);
16
17    for len in lengths {
18        unsafe {
19            length_so_far += O::from_usize(len).unwrap_unchecked();
20            offsets.push_unchecked(length_so_far)
21        };
22    }
23    unsafe { Offsets::new_unchecked(offsets).into() }
24}
25
26pub(super) unsafe fn take_values<O: Offset>(
27    length: O,
28    starts: &[O],
29    offsets: &OffsetsBuffer<O>,
30    values: &[u8],
31) -> Buffer<u8> {
32    let new_len = length.to_usize();
33    let mut buffer = Vec::with_capacity(new_len);
34    starts
35        .iter()
36        .map(|start| start.to_usize())
37        .zip(offsets.lengths())
38        .for_each(|(start, length)| {
39            let end = start + length;
40            buffer.extend_from_slice(values.get_unchecked(start..end));
41        });
42    buffer.into()
43}
44
45// take implementation when neither values nor indices contain nulls
46pub(super) unsafe fn take_no_validity_unchecked<O: Offset, I: Index>(
47    offsets: &OffsetsBuffer<O>,
48    values: &[u8],
49    indices: &[I],
50) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
51    let values_len = offsets.last().to_usize();
52    let fraction_estimate = indices.len() as f64 / offsets.len() as f64 + 0.3;
53    let mut buffer = Vec::<u8>::with_capacity_by_factor(values_len, fraction_estimate);
54
55    let lengths = indices.iter().map(|index| index.to_usize()).map(|index| {
56        let (start, end) = offsets.start_end_unchecked(index);
57        buffer.extend_from_slice(values.get_unchecked(start..end));
58        end - start
59    });
60    let offsets = create_offsets(lengths, indices.len());
61
62    (offsets, buffer.into(), None)
63}
64
65// take implementation when only values contain nulls
66pub(super) unsafe fn take_values_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(
67    values: &A,
68    indices: &[I],
69) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
70    let validity_values = values.validity().unwrap();
71    let validity = indices
72        .iter()
73        .map(|index| validity_values.get_bit_unchecked(index.to_usize()));
74    let validity = Bitmap::from_trusted_len_iter(validity);
75
76    let mut total_length = O::default();
77
78    let offsets = values.offsets();
79    let values_values = values.values();
80
81    let mut starts = Vec::<O>::with_capacity(indices.len());
82    let lengths = indices.iter().map(|index| {
83        let index = index.to_usize();
84        let start = *offsets.get_unchecked(index);
85        let length = *offsets.get_unchecked(index + 1) - start;
86        total_length += length;
87        starts.push_unchecked(start);
88        length.to_usize()
89    });
90    let offsets = create_offsets(lengths, indices.len());
91    let buffer = take_values(total_length, starts.as_slice(), &offsets, values_values);
92
93    (offsets, buffer, validity.into())
94}
95
96// take implementation when only indices contain nulls
97pub(super) unsafe fn take_indices_validity<O: Offset, I: Index>(
98    offsets: &OffsetsBuffer<O>,
99    values: &[u8],
100    indices: &PrimitiveArray<I>,
101) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
102    let mut total_length = O::default();
103
104    let offsets = offsets.buffer();
105
106    let mut starts = Vec::<O>::with_capacity(indices.len());
107    let lengths = indices.values().iter().map(|index| {
108        let index = index.to_usize();
109        let length;
110        match offsets.get(index + 1) {
111            Some(&next) => {
112                let start = *offsets.get_unchecked(index);
113                length = next - start;
114                total_length += length;
115                starts.push_unchecked(start);
116            },
117            None => {
118                length = O::zero();
119                starts.push_unchecked(O::default());
120            },
121        };
122        length.to_usize()
123    });
124    let offsets = create_offsets(lengths, indices.len());
125
126    let buffer = take_values(total_length, &starts, &offsets, values);
127
128    (offsets, buffer, indices.validity().cloned())
129}
130
131// take implementation when both indices and values contain nulls
132pub(super) unsafe fn take_values_indices_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(
133    values: &A,
134    indices: &PrimitiveArray<I>,
135) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
136    let mut total_length = O::default();
137    let mut validity = BitmapBuilder::with_capacity(indices.len());
138
139    let values_validity = values.validity().unwrap();
140    let offsets = values.offsets();
141    let values_values = values.values();
142
143    let mut starts = Vec::<O>::with_capacity(indices.len());
144    let lengths = indices.iter().map(|index| {
145        let length;
146        match index {
147            Some(index) => {
148                let index = index.to_usize();
149                if values_validity.get_bit(index) {
150                    validity.push(true);
151                    length = *offsets.get_unchecked(index + 1) - *offsets.get_unchecked(index);
152                    starts.push_unchecked(*offsets.get_unchecked(index));
153                } else {
154                    validity.push(false);
155                    length = O::zero();
156                    starts.push_unchecked(O::default());
157                }
158            },
159            None => {
160                validity.push(false);
161                length = O::zero();
162                starts.push_unchecked(O::default());
163            },
164        };
165        total_length += length;
166        length.to_usize()
167    });
168    let offsets = create_offsets(lengths, indices.len());
169
170    let buffer = take_values(total_length, &starts, &offsets, values_values);
171
172    (offsets, buffer, validity.into_opt_validity())
173}