1use std::mem::MaybeUninit;
2
3use arrow::array::{
4 Array, BinaryArray, BinaryViewArray, BooleanArray, FixedSizeListArray, ListArray,
5 PrimitiveArray, StructArray, Utf8Array, Utf8ViewArray,
6};
7use arrow::bitmap::Bitmap;
8use arrow::datatypes::ArrowDataType;
9use arrow::types::Offset;
10
11use crate::fixed::{boolean, decimal, numeric, packed_u32};
12use crate::row::{RowEncodingOptions, RowsEncoded};
13use crate::variable::{binary, no_order, utf8};
14use crate::widths::RowWidths;
15use crate::{with_match_arrow_primitive_type, ArrayRef, RowEncodingContext};
16
17pub fn convert_columns(
18 num_rows: usize,
19 columns: &[ArrayRef],
20 opts: &[RowEncodingOptions],
21 dicts: &[Option<RowEncodingContext>],
22) -> RowsEncoded {
23 let mut rows = RowsEncoded::new(vec![], vec![]);
24 convert_columns_amortized(
25 num_rows,
26 columns,
27 opts.iter().copied().zip(dicts.iter().map(|v| v.as_ref())),
28 &mut rows,
29 );
30 rows
31}
32
33pub fn convert_columns_no_order(
34 num_rows: usize,
35 columns: &[ArrayRef],
36 dicts: &[Option<RowEncodingContext>],
37) -> RowsEncoded {
38 let mut rows = RowsEncoded::new(vec![], vec![]);
39 convert_columns_amortized_no_order(num_rows, columns, dicts, &mut rows);
40 rows
41}
42
43pub fn convert_columns_amortized_no_order(
44 num_rows: usize,
45 columns: &[ArrayRef],
46 dicts: &[Option<RowEncodingContext>],
47 rows: &mut RowsEncoded,
48) {
49 convert_columns_amortized(
50 num_rows,
51 columns,
52 std::iter::repeat_n(RowEncodingOptions::default(), columns.len())
53 .zip(dicts.iter().map(|v| v.as_ref())),
54 rows,
55 );
56}
57
58pub fn convert_columns_amortized<'a>(
59 num_rows: usize,
60 columns: &[ArrayRef],
61 fields: impl IntoIterator<Item = (RowEncodingOptions, Option<&'a RowEncodingContext>)> + Clone,
62 rows: &mut RowsEncoded,
63) {
64 let mut masked_out_max_length = 0;
65 let mut row_widths = RowWidths::new(num_rows);
66 let mut encoders = columns
67 .iter()
68 .zip(fields.clone())
69 .map(|(column, (opt, dicts))| {
70 get_encoder(
71 column.as_ref(),
72 opt,
73 dicts,
74 &mut row_widths,
75 &mut masked_out_max_length,
76 )
77 })
78 .collect::<Vec<_>>();
79
80 let mut offsets = Vec::with_capacity(num_rows + 1);
83 offsets.push(0);
84 row_widths.extend_with_offsets(&mut offsets);
85
86 let total_num_bytes = row_widths.sum();
88 let mut out = Vec::<u8>::with_capacity(total_num_bytes + masked_out_max_length);
89 let buffer = &mut out.spare_capacity_mut()[..total_num_bytes + masked_out_max_length];
90
91 let masked_out_write_offset = total_num_bytes;
92 let mut scratches = EncodeScratches::default();
93 for (encoder, (opt, dict)) in encoders.iter_mut().zip(fields) {
94 unsafe {
95 encode_array(
96 buffer,
97 encoder,
98 opt,
99 dict,
100 &mut offsets[1..],
101 masked_out_write_offset,
102 &mut scratches,
103 )
104 };
105 }
106 unsafe {
108 out.set_len(total_num_bytes);
109 }
110
111 *rows = RowsEncoded {
112 values: out,
113 offsets,
114 };
115}
116
117fn list_num_column_bytes<O: Offset>(
118 array: &dyn Array,
119 opt: RowEncodingOptions,
120 dicts: Option<&RowEncodingContext>,
121 row_widths: &mut RowWidths,
122 masked_out_max_width: &mut usize,
123) -> Encoder {
124 let array = array.as_any().downcast_ref::<ListArray<O>>().unwrap();
125 let array = array.trim_to_normalized_offsets_recursive();
126 let values = array.values();
127
128 let mut list_row_widths = RowWidths::new(values.len());
129 let encoder = get_encoder(
130 values.as_ref(),
131 opt,
132 dicts,
133 &mut list_row_widths,
134 masked_out_max_width,
135 );
136
137 match array.validity() {
138 None => row_widths.push_iter(array.offsets().offset_and_length_iter().map(
139 |(offset, length)| {
140 let mut sum = 0;
141 for i in offset..offset + length {
142 sum += list_row_widths.get(i);
143 }
144 1 + length + sum
145 },
146 )),
147 Some(validity) => row_widths.push_iter(
148 array
149 .offsets()
150 .offset_and_length_iter()
151 .zip(validity.iter())
152 .map(|((offset, length), is_valid)| {
153 if !is_valid {
154 if length > 0 {
155 for i in offset..offset + length {
156 *masked_out_max_width =
157 (*masked_out_max_width).max(list_row_widths.get(i));
158 }
159 }
160 return 1;
161 }
162
163 let mut sum = 0;
164 for i in offset..offset + length {
165 sum += list_row_widths.get(i);
166 }
167 1 + length + sum
168 }),
169 ),
170 };
171
172 Encoder {
173 array: array.boxed(),
174 state: Some(Box::new(EncoderState::List(
175 Box::new(encoder),
176 list_row_widths,
177 ))),
178 }
179}
180
181fn biniter_num_column_bytes(
182 array: &dyn Array,
183 iter: impl ExactSizeIterator<Item = usize>,
184 validity: Option<&Bitmap>,
185 opt: RowEncodingOptions,
186 row_widths: &mut RowWidths,
187) -> Encoder {
188 if opt.contains(RowEncodingOptions::NO_ORDER) {
189 match validity {
190 None => row_widths.push_iter(iter.map(|v| no_order::len_from_item(Some(v), opt))),
191 Some(validity) => row_widths.push_iter(
192 iter.zip(validity.iter())
193 .map(|(v, is_valid)| no_order::len_from_item(is_valid.then_some(v), opt)),
194 ),
195 }
196 } else {
197 match validity {
198 None => row_widths.push_iter(
199 iter.map(|v| crate::variable::binary::encoded_len_from_len(Some(v), opt)),
200 ),
201 Some(validity) => row_widths.push_iter(
202 iter.zip(validity.iter())
203 .map(|(v, is_valid)| binary::encoded_len_from_len(is_valid.then_some(v), opt)),
204 ),
205 }
206 };
207
208 Encoder {
209 array: array.to_boxed(),
210 state: None,
211 }
212}
213
214fn striter_num_column_bytes(
215 array: &dyn Array,
216 iter: impl ExactSizeIterator<Item = usize>,
217 validity: Option<&Bitmap>,
218 opt: RowEncodingOptions,
219 row_widths: &mut RowWidths,
220) -> Encoder {
221 if opt.contains(RowEncodingOptions::NO_ORDER) {
222 match validity {
223 None => row_widths.push_iter(iter.map(|v| no_order::len_from_item(Some(v), opt))),
224 Some(validity) => row_widths.push_iter(
225 iter.zip(validity.iter())
226 .map(|(v, is_valid)| no_order::len_from_item(is_valid.then_some(v), opt)),
227 ),
228 }
229 } else {
230 match validity {
231 None => row_widths
232 .push_iter(iter.map(|v| crate::variable::utf8::len_from_item(Some(v), opt))),
233 Some(validity) => row_widths.push_iter(
234 iter.zip(validity.iter())
235 .map(|(v, is_valid)| utf8::len_from_item(is_valid.then_some(v), opt)),
236 ),
237 }
238 };
239
240 Encoder {
241 array: array.to_boxed(),
242 state: None,
243 }
244}
245
246fn get_encoder(
248 array: &dyn Array,
249 opt: RowEncodingOptions,
250 dict: Option<&RowEncodingContext>,
251 row_widths: &mut RowWidths,
252 masked_out_max_width: &mut usize,
253) -> Encoder {
254 use ArrowDataType as D;
255 let dtype = array.dtype();
256
257 if let Some(size) = fixed_size(dtype, dict) {
259 row_widths.push_constant(size);
260 let state = match dtype {
261 D::FixedSizeList(_, width) => {
262 let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
263 let array = array.propagate_nulls();
264
265 debug_assert_eq!(array.values().len(), array.len() * width);
266 let mut nested_row_widths = RowWidths::new(array.values().len());
267 let nested_encoder = get_encoder(
268 array.values().as_ref(),
269 opt,
270 dict,
271 &mut nested_row_widths,
272 masked_out_max_width,
273 );
274 Some(EncoderState::FixedSizeList(
275 Box::new(nested_encoder),
276 *width,
277 nested_row_widths,
278 ))
279 },
280 D::Struct(_) => {
281 let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
282 let struct_array = struct_array.propagate_nulls();
283
284 Some(EncoderState::Struct(match dict {
285 None => struct_array
286 .values()
287 .iter()
288 .map(|array| {
289 get_encoder(
290 array.as_ref(),
291 opt,
292 None,
293 &mut RowWidths::new(row_widths.num_rows()),
294 masked_out_max_width,
295 )
296 })
297 .collect(),
298 Some(RowEncodingContext::Struct(dicts)) => struct_array
299 .values()
300 .iter()
301 .zip(dicts)
302 .map(|(array, dict)| {
303 get_encoder(
304 array.as_ref(),
305 opt,
306 dict.as_ref(),
307 &mut RowWidths::new(row_widths.num_rows()),
308 masked_out_max_width,
309 )
310 })
311 .collect(),
312 _ => unreachable!(),
313 }))
314 },
315 _ => None,
316 };
317
318 let state = state.map(Box::new);
319 return Encoder {
320 array: array.to_boxed(),
321 state,
322 };
323 }
324
325 match dtype {
326 D::FixedSizeList(_, width) => {
327 let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
328 let array = array.propagate_nulls();
329
330 debug_assert_eq!(array.values().len(), array.len() * width);
331 let mut nested_row_widths = RowWidths::new(array.values().len());
332 let nested_encoder = get_encoder(
333 array.values().as_ref(),
334 opt,
335 dict,
336 &mut nested_row_widths,
337 masked_out_max_width,
338 );
339
340 let mut fsl_row_widths = nested_row_widths.collapse_chunks(*width, array.len());
341 fsl_row_widths.push_constant(1); row_widths.push(&fsl_row_widths);
344 Encoder {
345 array: array.to_boxed(),
346 state: Some(Box::new(EncoderState::FixedSizeList(
347 Box::new(nested_encoder),
348 *width,
349 nested_row_widths,
350 ))),
351 }
352 },
353 D::Struct(_) => {
354 let array = array.as_any().downcast_ref::<StructArray>().unwrap();
355 let array = array.propagate_nulls();
356
357 let mut nested_encoders = Vec::with_capacity(array.values().len());
358 row_widths.push_constant(1); match dict {
360 None => {
361 for array in array.values() {
362 let encoder = get_encoder(
363 array.as_ref(),
364 opt,
365 None,
366 row_widths,
367 masked_out_max_width,
368 );
369 nested_encoders.push(encoder);
370 }
371 },
372 Some(RowEncodingContext::Struct(dicts)) => {
373 for (array, dict) in array.values().iter().zip(dicts) {
374 let encoder = get_encoder(
375 array.as_ref(),
376 opt,
377 dict.as_ref(),
378 row_widths,
379 masked_out_max_width,
380 );
381 nested_encoders.push(encoder);
382 }
383 },
384 _ => unreachable!(),
385 }
386 Encoder {
387 array: array.to_boxed(),
388 state: Some(Box::new(EncoderState::Struct(nested_encoders))),
389 }
390 },
391
392 D::List(_) => {
393 list_num_column_bytes::<i32>(array, opt, dict, row_widths, masked_out_max_width)
394 },
395 D::LargeList(_) => {
396 list_num_column_bytes::<i64>(array, opt, dict, row_widths, masked_out_max_width)
397 },
398
399 D::BinaryView => {
400 let dc_array = array.as_any().downcast_ref::<BinaryViewArray>().unwrap();
401 biniter_num_column_bytes(
402 array,
403 dc_array.views().iter().map(|v| v.length as usize),
404 dc_array.validity(),
405 opt,
406 row_widths,
407 )
408 },
409 D::Binary => {
410 let dc_array = array.as_any().downcast_ref::<BinaryArray<i32>>().unwrap();
411 biniter_num_column_bytes(
412 array,
413 dc_array.offsets().lengths(),
414 dc_array.validity(),
415 opt,
416 row_widths,
417 )
418 },
419 D::LargeBinary => {
420 let dc_array = array.as_any().downcast_ref::<BinaryArray<i64>>().unwrap();
421 biniter_num_column_bytes(
422 array,
423 dc_array.offsets().lengths(),
424 dc_array.validity(),
425 opt,
426 row_widths,
427 )
428 },
429
430 D::Utf8View => {
431 let dc_array = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
432 striter_num_column_bytes(
433 array,
434 dc_array.views().iter().map(|v| v.length as usize),
435 dc_array.validity(),
436 opt,
437 row_widths,
438 )
439 },
440 D::Utf8 => {
441 let dc_array = array.as_any().downcast_ref::<Utf8Array<i32>>().unwrap();
442 striter_num_column_bytes(
443 array,
444 dc_array.offsets().lengths(),
445 dc_array.validity(),
446 opt,
447 row_widths,
448 )
449 },
450 D::LargeUtf8 => {
451 let dc_array = array.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();
452 striter_num_column_bytes(
453 array,
454 dc_array.offsets().lengths(),
455 dc_array.validity(),
456 opt,
457 row_widths,
458 )
459 },
460
461 D::Union(_) => unreachable!(),
462 D::Map(_, _) => unreachable!(),
463 D::Extension(_) => unreachable!(),
464 D::Unknown => unreachable!(),
465
466 D::Timestamp(_, _)
468 | D::Date32
469 | D::Date64
470 | D::Time32(_)
471 | D::Time64(_)
472 | D::Duration(_)
473 | D::Interval(_)
474 | D::Dictionary(_, _, _)
475 | D::Decimal(_, _)
476 | D::Decimal256(_, _) => unreachable!(),
477
478 _ => unreachable!(),
480 }
481}
482
483struct Encoder {
484 array: Box<dyn Array>,
485
486 state: Option<Box<EncoderState>>,
488}
489
490enum EncoderState {
491 List(Box<Encoder>, RowWidths),
492 FixedSizeList(Box<Encoder>, usize, RowWidths),
493 Struct(Vec<Encoder>),
494}
495
496unsafe fn encode_strs<'a>(
497 buffer: &mut [MaybeUninit<u8>],
498 iter: impl Iterator<Item = Option<&'a str>>,
499 opt: RowEncodingOptions,
500 offsets: &mut [usize],
501) {
502 if opt.contains(RowEncodingOptions::NO_ORDER) {
503 no_order::encode_variable_no_order(
504 buffer,
505 iter.map(|v| v.map(str::as_bytes)),
506 opt,
507 offsets,
508 );
509 } else {
510 utf8::encode_str(buffer, iter, opt, offsets);
511 }
512}
513
514unsafe fn encode_bins<'a>(
515 buffer: &mut [MaybeUninit<u8>],
516 iter: impl Iterator<Item = Option<&'a [u8]>>,
517 opt: RowEncodingOptions,
518 offsets: &mut [usize],
519) {
520 if opt.contains(RowEncodingOptions::NO_ORDER) {
521 no_order::encode_variable_no_order(buffer, iter, opt, offsets);
522 } else {
523 binary::encode_iter(buffer, iter, opt, offsets);
524 }
525}
526
527unsafe fn encode_flat_array(
528 buffer: &mut [MaybeUninit<u8>],
529 array: &dyn Array,
530 opt: RowEncodingOptions,
531 dict: Option<&RowEncodingContext>,
532 offsets: &mut [usize],
533) {
534 use ArrowDataType as D;
535
536 match array.dtype() {
537 D::Null => {},
538 D::Boolean => {
539 let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
540 boolean::encode_bool(buffer, array.iter(), opt, offsets);
541 },
542
543 dt if dt.is_numeric() => {
544 if matches!(dt, D::UInt32) {
545 if let Some(dict) = dict {
546 let keys = array
547 .as_any()
548 .downcast_ref::<PrimitiveArray<u32>>()
549 .unwrap();
550
551 match dict {
552 RowEncodingContext::Categorical(ctx) => {
553 if ctx.is_enum {
554 packed_u32::encode(
555 buffer,
556 keys,
557 opt,
558 offsets,
559 ctx.needed_num_bits(),
560 );
561 } else {
562 if let Some(lexical_sort_idxs) = &ctx.lexical_sort_idxs {
563 numeric::encode_iter(
564 buffer,
565 keys.iter()
566 .map(|k| k.map(|&k| lexical_sort_idxs[k as usize])),
567 opt,
568 offsets,
569 );
570 }
571
572 numeric::encode(buffer, keys, opt, offsets);
573 }
574 },
575
576 _ => unreachable!(),
577 }
578 return;
579 }
580 }
581
582 if matches!(dt, D::Int128) {
583 if let Some(RowEncodingContext::Decimal(precision)) = dict {
584 decimal::encode(
585 buffer,
586 array
587 .as_any()
588 .downcast_ref::<PrimitiveArray<i128>>()
589 .unwrap(),
590 opt,
591 offsets,
592 *precision,
593 );
594 return;
595 }
596 }
597
598 with_match_arrow_primitive_type!(dt, |$T| {
599 let array = array.as_any().downcast_ref::<PrimitiveArray<$T>>().unwrap();
600 numeric::encode(buffer, array, opt, offsets);
601 })
602 },
603
604 D::Binary => {
605 let array = array.as_any().downcast_ref::<BinaryArray<i32>>().unwrap();
606 encode_bins(buffer, array.iter(), opt, offsets);
607 },
608 D::LargeBinary => {
609 let array = array.as_any().downcast_ref::<BinaryArray<i64>>().unwrap();
610 encode_bins(buffer, array.iter(), opt, offsets);
611 },
612 D::BinaryView => {
613 let array = array.as_any().downcast_ref::<BinaryViewArray>().unwrap();
614 encode_bins(buffer, array.iter(), opt, offsets);
615 },
616 D::Utf8 => {
617 let array = array.as_any().downcast_ref::<Utf8Array<i32>>().unwrap();
618 encode_strs(buffer, array.iter(), opt, offsets);
619 },
620 D::LargeUtf8 => {
621 let array = array.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();
622 encode_strs(buffer, array.iter(), opt, offsets);
623 },
624 D::Utf8View => {
625 let array = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
626 encode_strs(buffer, array.iter(), opt, offsets);
627 },
628
629 D::Dictionary(_, _, _) => todo!(),
631
632 D::FixedSizeBinary(_) => todo!(),
633 D::Decimal(_, _) => todo!(),
634 D::Decimal256(_, _) => todo!(),
635
636 D::Union(_) => todo!(),
637 D::Map(_, _) => todo!(),
638 D::Extension(_) => todo!(),
639 D::Unknown => todo!(),
640
641 D::Timestamp(_, _)
643 | D::Date32
644 | D::Date64
645 | D::Time32(_)
646 | D::Time64(_)
647 | D::Duration(_)
648 | D::Interval(_) => unreachable!(),
649
650 _ => unreachable!(),
651 }
652}
653
654#[derive(Default)]
655struct EncodeScratches {
656 nested_offsets: Vec<usize>,
657 nested_buffer: Vec<u8>,
658}
659
660impl EncodeScratches {
661 fn clear(&mut self) {
662 self.nested_offsets.clear();
663 self.nested_buffer.clear();
664 }
665}
666
667unsafe fn encode_array(
668 buffer: &mut [MaybeUninit<u8>],
669 encoder: &Encoder,
670 opt: RowEncodingOptions,
671 dict: Option<&RowEncodingContext>,
672 offsets: &mut [usize],
673 masked_out_write_offset: usize, scratches: &mut EncodeScratches,
677) {
678 let Some(state) = &encoder.state else {
679 return encode_flat_array(buffer, encoder.array.as_ref(), opt, dict, offsets);
683 };
684
685 match state.as_ref() {
686 EncoderState::List(nested_encoder, nested_row_widths) => {
687 let array = encoder
689 .array
690 .as_any()
691 .downcast_ref::<ListArray<i64>>()
692 .unwrap();
693
694 scratches.clear();
695
696 scratches
697 .nested_offsets
698 .reserve(nested_row_widths.num_rows());
699 let nested_offsets = &mut scratches.nested_offsets;
700
701 let list_null_sentinel = opt.list_null_sentinel();
702 let list_continuation_token = opt.list_continuation_token();
703 let list_termination_token = opt.list_termination_token();
704
705 match array.validity() {
706 None => {
707 for (i, (offset, length)) in
708 array.offsets().offset_and_length_iter().enumerate()
709 {
710 for j in offset..offset + length {
711 buffer[offsets[i]] = MaybeUninit::new(list_continuation_token);
712 offsets[i] += 1;
713
714 nested_offsets.push(offsets[i]);
715 offsets[i] += nested_row_widths.get(j);
716 }
717 buffer[offsets[i]] = MaybeUninit::new(list_termination_token);
718 offsets[i] += 1;
719 }
720 },
721 Some(validity) => {
722 for (i, ((offset, length), is_valid)) in array
723 .offsets()
724 .offset_and_length_iter()
725 .zip(validity.iter())
726 .enumerate()
727 {
728 if !is_valid {
729 buffer[offsets[i]] = MaybeUninit::new(list_null_sentinel);
730 offsets[i] += 1;
731
732 if length > 0 {
734 nested_offsets
735 .extend(std::iter::repeat_n(masked_out_write_offset, length));
736 }
737
738 continue;
739 }
740
741 for j in offset..offset + length {
742 buffer[offsets[i]] = MaybeUninit::new(list_continuation_token);
743 offsets[i] += 1;
744
745 nested_offsets.push(offsets[i]);
746 offsets[i] += nested_row_widths.get(j);
747 }
748 buffer[offsets[i]] = MaybeUninit::new(list_termination_token);
749 offsets[i] += 1;
750 }
751 },
752 }
753
754 unsafe {
755 encode_array(
756 buffer,
757 nested_encoder,
758 opt,
759 dict,
760 nested_offsets,
761 masked_out_write_offset,
762 &mut EncodeScratches::default(),
763 )
764 };
765 },
766 EncoderState::FixedSizeList(array, width, nested_row_widths) => {
767 encode_validity(buffer, encoder.array.validity(), opt, offsets);
768
769 if *width == 0 {
770 return;
771 }
772
773 let mut child_offsets = Vec::with_capacity(offsets.len() * width);
774 for (i, offset) in offsets.iter_mut().enumerate() {
775 for j in 0..*width {
776 child_offsets.push(*offset);
777 *offset += nested_row_widths.get((i * width) + j);
778 }
779 }
780 encode_array(
781 buffer,
782 array.as_ref(),
783 opt,
784 dict,
785 &mut child_offsets,
786 masked_out_write_offset,
787 scratches,
788 );
789 for (i, offset) in offsets.iter_mut().enumerate() {
790 *offset = child_offsets[(i + 1) * width - 1];
791 }
792 },
793 EncoderState::Struct(arrays) => {
794 encode_validity(buffer, encoder.array.validity(), opt, offsets);
795
796 match dict {
797 None => {
798 for array in arrays {
799 encode_array(
800 buffer,
801 array,
802 opt,
803 None,
804 offsets,
805 masked_out_write_offset,
806 scratches,
807 );
808 }
809 },
810 Some(RowEncodingContext::Struct(dicts)) => {
811 for (array, dict) in arrays.iter().zip(dicts) {
812 encode_array(
813 buffer,
814 array,
815 opt,
816 dict.as_ref(),
817 offsets,
818 masked_out_write_offset,
819 scratches,
820 );
821 }
822 },
823 _ => unreachable!(),
824 }
825 },
826 }
827}
828
829unsafe fn encode_validity(
830 buffer: &mut [MaybeUninit<u8>],
831 validity: Option<&Bitmap>,
832 opt: RowEncodingOptions,
833 row_starts: &mut [usize],
834) {
835 let null_sentinel = opt.null_sentinel();
836 match validity {
837 None => {
838 for row_start in row_starts.iter_mut() {
839 buffer[*row_start] = MaybeUninit::new(1);
840 *row_start += 1;
841 }
842 },
843 Some(validity) => {
844 for (row_start, is_valid) in row_starts.iter_mut().zip(validity.iter()) {
845 let v = if is_valid {
846 MaybeUninit::new(1)
847 } else {
848 MaybeUninit::new(null_sentinel)
849 };
850 buffer[*row_start] = v;
851 *row_start += 1;
852 }
853 },
854 }
855}
856
857pub fn fixed_size(dtype: &ArrowDataType, dict: Option<&RowEncodingContext>) -> Option<usize> {
858 use numeric::FixedLengthEncoding;
859 use ArrowDataType as D;
860 Some(match dtype {
861 D::Null => 0,
862 D::Boolean => 1,
863
864 D::UInt8 => u8::ENCODED_LEN,
865 D::UInt16 => u16::ENCODED_LEN,
866 D::UInt32 => match dict {
867 None => u32::ENCODED_LEN,
868 Some(RowEncodingContext::Categorical(ctx)) => {
869 if ctx.is_enum {
870 packed_u32::len_from_num_bits(ctx.needed_num_bits())
871 } else {
872 let mut num_bytes = u32::ENCODED_LEN;
873 if ctx.lexical_sort_idxs.is_some() {
874 num_bytes += u32::ENCODED_LEN;
875 }
876 num_bytes
877 }
878 },
879 _ => return None,
880 },
881 D::UInt64 => u64::ENCODED_LEN,
882
883 D::Int8 => i8::ENCODED_LEN,
884 D::Int16 => i16::ENCODED_LEN,
885 D::Int32 => i32::ENCODED_LEN,
886 D::Int64 => i64::ENCODED_LEN,
887 D::Int128 => match dict {
888 None => i128::ENCODED_LEN,
889 Some(RowEncodingContext::Decimal(precision)) => decimal::len_from_precision(*precision),
890 _ => unreachable!(),
891 },
892
893 D::Float32 => f32::ENCODED_LEN,
894 D::Float64 => f64::ENCODED_LEN,
895 D::FixedSizeList(f, width) => 1 + width * fixed_size(f.dtype(), dict)?,
896 D::Struct(fs) => match dict {
897 None => {
898 let mut sum = 0;
899 for f in fs {
900 sum += fixed_size(f.dtype(), None)?;
901 }
902 1 + sum
903 },
904 Some(RowEncodingContext::Struct(dicts)) => {
905 let mut sum = 0;
906 for (f, dict) in fs.iter().zip(dicts) {
907 sum += fixed_size(f.dtype(), dict.as_ref())?;
908 }
909 1 + sum
910 },
911 _ => unreachable!(),
912 },
913 _ => return None,
914 })
915}