polars_compute/gather/
generic_binary.rs1use arrow::array::{GenericBinaryArray, PrimitiveArray};
2use arrow::bitmap::{Bitmap, BitmapBuilder};
3use arrow::buffer::Buffer;
4use arrow::offset::{Offset, Offsets, OffsetsBuffer};
5use polars_utils::vec::{CapacityByFactor, PushUnchecked};
6
7use super::Index;
8
9fn create_offsets<I: Iterator<Item = usize>, O: Offset>(
10 lengths: I,
11 idx_len: usize,
12) -> OffsetsBuffer<O> {
13 let mut length_so_far = O::default();
14 let mut offsets = Vec::with_capacity(idx_len + 1);
15 offsets.push(length_so_far);
16
17 for len in lengths {
18 unsafe {
19 length_so_far += O::from_usize(len).unwrap_unchecked();
20 offsets.push_unchecked(length_so_far)
21 };
22 }
23 unsafe { Offsets::new_unchecked(offsets).into() }
24}
25
26pub(super) unsafe fn take_values<O: Offset>(
27 length: O,
28 starts: &[O],
29 offsets: &OffsetsBuffer<O>,
30 values: &[u8],
31) -> Buffer<u8> {
32 let new_len = length.to_usize();
33 let mut buffer = Vec::with_capacity(new_len);
34 starts
35 .iter()
36 .map(|start| start.to_usize())
37 .zip(offsets.lengths())
38 .for_each(|(start, length)| {
39 let end = start + length;
40 buffer.extend_from_slice(values.get_unchecked(start..end));
41 });
42 buffer.into()
43}
44
45pub(super) unsafe fn take_no_validity_unchecked<O: Offset, I: Index>(
47 offsets: &OffsetsBuffer<O>,
48 values: &[u8],
49 indices: &[I],
50) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
51 let values_len = offsets.last().to_usize();
52 let fraction_estimate = indices.len() as f64 / offsets.len() as f64 + 0.3;
53 let mut buffer = Vec::<u8>::with_capacity_by_factor(values_len, fraction_estimate);
54
55 let lengths = indices.iter().map(|index| index.to_usize()).map(|index| {
56 let (start, end) = offsets.start_end_unchecked(index);
57 buffer.extend_from_slice(values.get_unchecked(start..end));
58 end - start
59 });
60 let offsets = create_offsets(lengths, indices.len());
61
62 (offsets, buffer.into(), None)
63}
64
65pub(super) unsafe fn take_values_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(
67 values: &A,
68 indices: &[I],
69) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
70 let validity_values = values.validity().unwrap();
71 let validity = indices
72 .iter()
73 .map(|index| validity_values.get_bit_unchecked(index.to_usize()));
74 let validity = Bitmap::from_trusted_len_iter(validity);
75
76 let mut total_length = O::default();
77
78 let offsets = values.offsets();
79 let values_values = values.values();
80
81 let mut starts = Vec::<O>::with_capacity(indices.len());
82 let lengths = indices.iter().map(|index| {
83 let index = index.to_usize();
84 let start = *offsets.get_unchecked(index);
85 let length = *offsets.get_unchecked(index + 1) - start;
86 total_length += length;
87 starts.push_unchecked(start);
88 length.to_usize()
89 });
90 let offsets = create_offsets(lengths, indices.len());
91 let buffer = take_values(total_length, starts.as_slice(), &offsets, values_values);
92
93 (offsets, buffer, validity.into())
94}
95
96pub(super) unsafe fn take_indices_validity<O: Offset, I: Index>(
98 offsets: &OffsetsBuffer<O>,
99 values: &[u8],
100 indices: &PrimitiveArray<I>,
101) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
102 let mut total_length = O::default();
103
104 let offsets = offsets.buffer();
105
106 let mut starts = Vec::<O>::with_capacity(indices.len());
107 let lengths = indices.values().iter().map(|index| {
108 let index = index.to_usize();
109 let length;
110 match offsets.get(index + 1) {
111 Some(&next) => {
112 let start = *offsets.get_unchecked(index);
113 length = next - start;
114 total_length += length;
115 starts.push_unchecked(start);
116 },
117 None => {
118 length = O::zero();
119 starts.push_unchecked(O::default());
120 },
121 };
122 length.to_usize()
123 });
124 let offsets = create_offsets(lengths, indices.len());
125
126 let buffer = take_values(total_length, &starts, &offsets, values);
127
128 (offsets, buffer, indices.validity().cloned())
129}
130
131pub(super) unsafe fn take_values_indices_validity<O: Offset, I: Index, A: GenericBinaryArray<O>>(
133 values: &A,
134 indices: &PrimitiveArray<I>,
135) -> (OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
136 let mut total_length = O::default();
137 let mut validity = BitmapBuilder::with_capacity(indices.len());
138
139 let values_validity = values.validity().unwrap();
140 let offsets = values.offsets();
141 let values_values = values.values();
142
143 let mut starts = Vec::<O>::with_capacity(indices.len());
144 let lengths = indices.iter().map(|index| {
145 let length;
146 match index {
147 Some(index) => {
148 let index = index.to_usize();
149 if values_validity.get_bit(index) {
150 validity.push(true);
151 length = *offsets.get_unchecked(index + 1) - *offsets.get_unchecked(index);
152 starts.push_unchecked(*offsets.get_unchecked(index));
153 } else {
154 validity.push(false);
155 length = O::zero();
156 starts.push_unchecked(O::default());
157 }
158 },
159 None => {
160 validity.push(false);
161 length = O::zero();
162 starts.push_unchecked(O::default());
163 },
164 };
165 total_length += length;
166 length.to_usize()
167 });
168 let offsets = create_offsets(lengths, indices.len());
169
170 let buffer = take_values(total_length, &starts, &offsets, values_values);
171
172 (offsets, buffer, validity.into_opt_validity())
173}