polars_row/
encode.rs

1use std::mem::MaybeUninit;
2
3use arrow::array::{
4    Array, BinaryArray, BinaryViewArray, BooleanArray, FixedSizeListArray, ListArray,
5    PrimitiveArray, StructArray, Utf8Array, Utf8ViewArray,
6};
7use arrow::bitmap::Bitmap;
8use arrow::datatypes::ArrowDataType;
9use arrow::types::Offset;
10
11use crate::fixed::{boolean, decimal, numeric, packed_u32};
12use crate::row::{RowEncodingOptions, RowsEncoded};
13use crate::variable::{binary, no_order, utf8};
14use crate::widths::RowWidths;
15use crate::{with_match_arrow_primitive_type, ArrayRef, RowEncodingContext};
16
17pub fn convert_columns(
18    num_rows: usize,
19    columns: &[ArrayRef],
20    opts: &[RowEncodingOptions],
21    dicts: &[Option<RowEncodingContext>],
22) -> RowsEncoded {
23    let mut rows = RowsEncoded::new(vec![], vec![]);
24    convert_columns_amortized(
25        num_rows,
26        columns,
27        opts.iter().copied().zip(dicts.iter().map(|v| v.as_ref())),
28        &mut rows,
29    );
30    rows
31}
32
33pub fn convert_columns_no_order(
34    num_rows: usize,
35    columns: &[ArrayRef],
36    dicts: &[Option<RowEncodingContext>],
37) -> RowsEncoded {
38    let mut rows = RowsEncoded::new(vec![], vec![]);
39    convert_columns_amortized_no_order(num_rows, columns, dicts, &mut rows);
40    rows
41}
42
43pub fn convert_columns_amortized_no_order(
44    num_rows: usize,
45    columns: &[ArrayRef],
46    dicts: &[Option<RowEncodingContext>],
47    rows: &mut RowsEncoded,
48) {
49    convert_columns_amortized(
50        num_rows,
51        columns,
52        std::iter::repeat_n(RowEncodingOptions::default(), columns.len())
53            .zip(dicts.iter().map(|v| v.as_ref())),
54        rows,
55    );
56}
57
58pub fn convert_columns_amortized<'a>(
59    num_rows: usize,
60    columns: &[ArrayRef],
61    fields: impl IntoIterator<Item = (RowEncodingOptions, Option<&'a RowEncodingContext>)> + Clone,
62    rows: &mut RowsEncoded,
63) {
64    let mut masked_out_max_length = 0;
65    let mut row_widths = RowWidths::new(num_rows);
66    let mut encoders = columns
67        .iter()
68        .zip(fields.clone())
69        .map(|(column, (opt, dicts))| {
70            get_encoder(
71                column.as_ref(),
72                opt,
73                dicts,
74                &mut row_widths,
75                &mut masked_out_max_length,
76            )
77        })
78        .collect::<Vec<_>>();
79
80    // Create an offsets array, we append 0 at the beginning here so it can serve as the final
81    // offset array.
82    let mut offsets = Vec::with_capacity(num_rows + 1);
83    offsets.push(0);
84    row_widths.extend_with_offsets(&mut offsets);
85
86    // Create a buffer without initializing everything to zero.
87    let total_num_bytes = row_widths.sum();
88    let mut out = Vec::<u8>::with_capacity(total_num_bytes + masked_out_max_length);
89    let buffer = &mut out.spare_capacity_mut()[..total_num_bytes + masked_out_max_length];
90
91    let masked_out_write_offset = total_num_bytes;
92    let mut scratches = EncodeScratches::default();
93    for (encoder, (opt, dict)) in encoders.iter_mut().zip(fields) {
94        unsafe {
95            encode_array(
96                buffer,
97                encoder,
98                opt,
99                dict,
100                &mut offsets[1..],
101                masked_out_write_offset,
102                &mut scratches,
103            )
104        };
105    }
106    // SAFETY: All the bytes in out up to total_num_bytes should now be initialized.
107    unsafe {
108        out.set_len(total_num_bytes);
109    }
110
111    *rows = RowsEncoded {
112        values: out,
113        offsets,
114    };
115}
116
117fn list_num_column_bytes<O: Offset>(
118    array: &dyn Array,
119    opt: RowEncodingOptions,
120    dicts: Option<&RowEncodingContext>,
121    row_widths: &mut RowWidths,
122    masked_out_max_width: &mut usize,
123) -> Encoder {
124    let array = array.as_any().downcast_ref::<ListArray<O>>().unwrap();
125    let array = array.trim_to_normalized_offsets_recursive();
126    let values = array.values();
127
128    let mut list_row_widths = RowWidths::new(values.len());
129    let encoder = get_encoder(
130        values.as_ref(),
131        opt,
132        dicts,
133        &mut list_row_widths,
134        masked_out_max_width,
135    );
136
137    match array.validity() {
138        None => row_widths.push_iter(array.offsets().offset_and_length_iter().map(
139            |(offset, length)| {
140                let mut sum = 0;
141                for i in offset..offset + length {
142                    sum += list_row_widths.get(i);
143                }
144                1 + length + sum
145            },
146        )),
147        Some(validity) => row_widths.push_iter(
148            array
149                .offsets()
150                .offset_and_length_iter()
151                .zip(validity.iter())
152                .map(|((offset, length), is_valid)| {
153                    if !is_valid {
154                        if length > 0 {
155                            for i in offset..offset + length {
156                                *masked_out_max_width =
157                                    (*masked_out_max_width).max(list_row_widths.get(i));
158                            }
159                        }
160                        return 1;
161                    }
162
163                    let mut sum = 0;
164                    for i in offset..offset + length {
165                        sum += list_row_widths.get(i);
166                    }
167                    1 + length + sum
168                }),
169        ),
170    };
171
172    Encoder {
173        array: array.boxed(),
174        state: Some(Box::new(EncoderState::List(
175            Box::new(encoder),
176            list_row_widths,
177        ))),
178    }
179}
180
181fn biniter_num_column_bytes(
182    array: &dyn Array,
183    iter: impl ExactSizeIterator<Item = usize>,
184    validity: Option<&Bitmap>,
185    opt: RowEncodingOptions,
186    row_widths: &mut RowWidths,
187) -> Encoder {
188    if opt.contains(RowEncodingOptions::NO_ORDER) {
189        match validity {
190            None => row_widths.push_iter(iter.map(|v| no_order::len_from_item(Some(v), opt))),
191            Some(validity) => row_widths.push_iter(
192                iter.zip(validity.iter())
193                    .map(|(v, is_valid)| no_order::len_from_item(is_valid.then_some(v), opt)),
194            ),
195        }
196    } else {
197        match validity {
198            None => row_widths.push_iter(
199                iter.map(|v| crate::variable::binary::encoded_len_from_len(Some(v), opt)),
200            ),
201            Some(validity) => row_widths.push_iter(
202                iter.zip(validity.iter())
203                    .map(|(v, is_valid)| binary::encoded_len_from_len(is_valid.then_some(v), opt)),
204            ),
205        }
206    };
207
208    Encoder {
209        array: array.to_boxed(),
210        state: None,
211    }
212}
213
214fn striter_num_column_bytes(
215    array: &dyn Array,
216    iter: impl ExactSizeIterator<Item = usize>,
217    validity: Option<&Bitmap>,
218    opt: RowEncodingOptions,
219    row_widths: &mut RowWidths,
220) -> Encoder {
221    if opt.contains(RowEncodingOptions::NO_ORDER) {
222        match validity {
223            None => row_widths.push_iter(iter.map(|v| no_order::len_from_item(Some(v), opt))),
224            Some(validity) => row_widths.push_iter(
225                iter.zip(validity.iter())
226                    .map(|(v, is_valid)| no_order::len_from_item(is_valid.then_some(v), opt)),
227            ),
228        }
229    } else {
230        match validity {
231            None => row_widths
232                .push_iter(iter.map(|v| crate::variable::utf8::len_from_item(Some(v), opt))),
233            Some(validity) => row_widths.push_iter(
234                iter.zip(validity.iter())
235                    .map(|(v, is_valid)| utf8::len_from_item(is_valid.then_some(v), opt)),
236            ),
237        }
238    };
239
240    Encoder {
241        array: array.to_boxed(),
242        state: None,
243    }
244}
245
246/// Get the encoder for a specific array.
247fn get_encoder(
248    array: &dyn Array,
249    opt: RowEncodingOptions,
250    dict: Option<&RowEncodingContext>,
251    row_widths: &mut RowWidths,
252    masked_out_max_width: &mut usize,
253) -> Encoder {
254    use ArrowDataType as D;
255    let dtype = array.dtype();
256
257    // Fast path: column has a fixed size encoding
258    if let Some(size) = fixed_size(dtype, dict) {
259        row_widths.push_constant(size);
260        let state = match dtype {
261            D::FixedSizeList(_, width) => {
262                let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
263                let array = array.propagate_nulls();
264
265                debug_assert_eq!(array.values().len(), array.len() * width);
266                let mut nested_row_widths = RowWidths::new(array.values().len());
267                let nested_encoder = get_encoder(
268                    array.values().as_ref(),
269                    opt,
270                    dict,
271                    &mut nested_row_widths,
272                    masked_out_max_width,
273                );
274                Some(EncoderState::FixedSizeList(
275                    Box::new(nested_encoder),
276                    *width,
277                    nested_row_widths,
278                ))
279            },
280            D::Struct(_) => {
281                let struct_array = array.as_any().downcast_ref::<StructArray>().unwrap();
282                let struct_array = struct_array.propagate_nulls();
283
284                Some(EncoderState::Struct(match dict {
285                    None => struct_array
286                        .values()
287                        .iter()
288                        .map(|array| {
289                            get_encoder(
290                                array.as_ref(),
291                                opt,
292                                None,
293                                &mut RowWidths::new(row_widths.num_rows()),
294                                masked_out_max_width,
295                            )
296                        })
297                        .collect(),
298                    Some(RowEncodingContext::Struct(dicts)) => struct_array
299                        .values()
300                        .iter()
301                        .zip(dicts)
302                        .map(|(array, dict)| {
303                            get_encoder(
304                                array.as_ref(),
305                                opt,
306                                dict.as_ref(),
307                                &mut RowWidths::new(row_widths.num_rows()),
308                                masked_out_max_width,
309                            )
310                        })
311                        .collect(),
312                    _ => unreachable!(),
313                }))
314            },
315            _ => None,
316        };
317
318        let state = state.map(Box::new);
319        return Encoder {
320            array: array.to_boxed(),
321            state,
322        };
323    }
324
325    match dtype {
326        D::FixedSizeList(_, width) => {
327            let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
328            let array = array.propagate_nulls();
329
330            debug_assert_eq!(array.values().len(), array.len() * width);
331            let mut nested_row_widths = RowWidths::new(array.values().len());
332            let nested_encoder = get_encoder(
333                array.values().as_ref(),
334                opt,
335                dict,
336                &mut nested_row_widths,
337                masked_out_max_width,
338            );
339
340            let mut fsl_row_widths = nested_row_widths.collapse_chunks(*width, array.len());
341            fsl_row_widths.push_constant(1); // validity byte
342
343            row_widths.push(&fsl_row_widths);
344            Encoder {
345                array: array.to_boxed(),
346                state: Some(Box::new(EncoderState::FixedSizeList(
347                    Box::new(nested_encoder),
348                    *width,
349                    nested_row_widths,
350                ))),
351            }
352        },
353        D::Struct(_) => {
354            let array = array.as_any().downcast_ref::<StructArray>().unwrap();
355            let array = array.propagate_nulls();
356
357            let mut nested_encoders = Vec::with_capacity(array.values().len());
358            row_widths.push_constant(1); // validity byte
359            match dict {
360                None => {
361                    for array in array.values() {
362                        let encoder = get_encoder(
363                            array.as_ref(),
364                            opt,
365                            None,
366                            row_widths,
367                            masked_out_max_width,
368                        );
369                        nested_encoders.push(encoder);
370                    }
371                },
372                Some(RowEncodingContext::Struct(dicts)) => {
373                    for (array, dict) in array.values().iter().zip(dicts) {
374                        let encoder = get_encoder(
375                            array.as_ref(),
376                            opt,
377                            dict.as_ref(),
378                            row_widths,
379                            masked_out_max_width,
380                        );
381                        nested_encoders.push(encoder);
382                    }
383                },
384                _ => unreachable!(),
385            }
386            Encoder {
387                array: array.to_boxed(),
388                state: Some(Box::new(EncoderState::Struct(nested_encoders))),
389            }
390        },
391
392        D::List(_) => {
393            list_num_column_bytes::<i32>(array, opt, dict, row_widths, masked_out_max_width)
394        },
395        D::LargeList(_) => {
396            list_num_column_bytes::<i64>(array, opt, dict, row_widths, masked_out_max_width)
397        },
398
399        D::BinaryView => {
400            let dc_array = array.as_any().downcast_ref::<BinaryViewArray>().unwrap();
401            biniter_num_column_bytes(
402                array,
403                dc_array.views().iter().map(|v| v.length as usize),
404                dc_array.validity(),
405                opt,
406                row_widths,
407            )
408        },
409        D::Binary => {
410            let dc_array = array.as_any().downcast_ref::<BinaryArray<i32>>().unwrap();
411            biniter_num_column_bytes(
412                array,
413                dc_array.offsets().lengths(),
414                dc_array.validity(),
415                opt,
416                row_widths,
417            )
418        },
419        D::LargeBinary => {
420            let dc_array = array.as_any().downcast_ref::<BinaryArray<i64>>().unwrap();
421            biniter_num_column_bytes(
422                array,
423                dc_array.offsets().lengths(),
424                dc_array.validity(),
425                opt,
426                row_widths,
427            )
428        },
429
430        D::Utf8View => {
431            let dc_array = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
432            striter_num_column_bytes(
433                array,
434                dc_array.views().iter().map(|v| v.length as usize),
435                dc_array.validity(),
436                opt,
437                row_widths,
438            )
439        },
440        D::Utf8 => {
441            let dc_array = array.as_any().downcast_ref::<Utf8Array<i32>>().unwrap();
442            striter_num_column_bytes(
443                array,
444                dc_array.offsets().lengths(),
445                dc_array.validity(),
446                opt,
447                row_widths,
448            )
449        },
450        D::LargeUtf8 => {
451            let dc_array = array.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();
452            striter_num_column_bytes(
453                array,
454                dc_array.offsets().lengths(),
455                dc_array.validity(),
456                opt,
457                row_widths,
458            )
459        },
460
461        D::Union(_) => unreachable!(),
462        D::Map(_, _) => unreachable!(),
463        D::Extension(_) => unreachable!(),
464        D::Unknown => unreachable!(),
465
466        // All non-physical types
467        D::Timestamp(_, _)
468        | D::Date32
469        | D::Date64
470        | D::Time32(_)
471        | D::Time64(_)
472        | D::Duration(_)
473        | D::Interval(_)
474        | D::Dictionary(_, _, _)
475        | D::Decimal(_, _)
476        | D::Decimal256(_, _) => unreachable!(),
477
478        // Should be fixed size type
479        _ => unreachable!(),
480    }
481}
482
483struct Encoder {
484    array: Box<dyn Array>,
485
486    /// State contains nested encoders and extra information needed to encode.
487    state: Option<Box<EncoderState>>,
488}
489
490enum EncoderState {
491    List(Box<Encoder>, RowWidths),
492    FixedSizeList(Box<Encoder>, usize, RowWidths),
493    Struct(Vec<Encoder>),
494}
495
496unsafe fn encode_strs<'a>(
497    buffer: &mut [MaybeUninit<u8>],
498    iter: impl Iterator<Item = Option<&'a str>>,
499    opt: RowEncodingOptions,
500    offsets: &mut [usize],
501) {
502    if opt.contains(RowEncodingOptions::NO_ORDER) {
503        no_order::encode_variable_no_order(
504            buffer,
505            iter.map(|v| v.map(str::as_bytes)),
506            opt,
507            offsets,
508        );
509    } else {
510        utf8::encode_str(buffer, iter, opt, offsets);
511    }
512}
513
514unsafe fn encode_bins<'a>(
515    buffer: &mut [MaybeUninit<u8>],
516    iter: impl Iterator<Item = Option<&'a [u8]>>,
517    opt: RowEncodingOptions,
518    offsets: &mut [usize],
519) {
520    if opt.contains(RowEncodingOptions::NO_ORDER) {
521        no_order::encode_variable_no_order(buffer, iter, opt, offsets);
522    } else {
523        binary::encode_iter(buffer, iter, opt, offsets);
524    }
525}
526
527unsafe fn encode_flat_array(
528    buffer: &mut [MaybeUninit<u8>],
529    array: &dyn Array,
530    opt: RowEncodingOptions,
531    dict: Option<&RowEncodingContext>,
532    offsets: &mut [usize],
533) {
534    use ArrowDataType as D;
535
536    match array.dtype() {
537        D::Null => {},
538        D::Boolean => {
539            let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
540            boolean::encode_bool(buffer, array.iter(), opt, offsets);
541        },
542
543        dt if dt.is_numeric() => {
544            if matches!(dt, D::UInt32) {
545                if let Some(dict) = dict {
546                    let keys = array
547                        .as_any()
548                        .downcast_ref::<PrimitiveArray<u32>>()
549                        .unwrap();
550
551                    match dict {
552                        RowEncodingContext::Categorical(ctx) => {
553                            if ctx.is_enum {
554                                packed_u32::encode(
555                                    buffer,
556                                    keys,
557                                    opt,
558                                    offsets,
559                                    ctx.needed_num_bits(),
560                                );
561                            } else {
562                                if let Some(lexical_sort_idxs) = &ctx.lexical_sort_idxs {
563                                    numeric::encode_iter(
564                                        buffer,
565                                        keys.iter()
566                                            .map(|k| k.map(|&k| lexical_sort_idxs[k as usize])),
567                                        opt,
568                                        offsets,
569                                    );
570                                }
571
572                                numeric::encode(buffer, keys, opt, offsets);
573                            }
574                        },
575
576                        _ => unreachable!(),
577                    }
578                    return;
579                }
580            }
581
582            if matches!(dt, D::Int128) {
583                if let Some(RowEncodingContext::Decimal(precision)) = dict {
584                    decimal::encode(
585                        buffer,
586                        array
587                            .as_any()
588                            .downcast_ref::<PrimitiveArray<i128>>()
589                            .unwrap(),
590                        opt,
591                        offsets,
592                        *precision,
593                    );
594                    return;
595                }
596            }
597
598            with_match_arrow_primitive_type!(dt, |$T| {
599                let array = array.as_any().downcast_ref::<PrimitiveArray<$T>>().unwrap();
600                numeric::encode(buffer, array, opt, offsets);
601            })
602        },
603
604        D::Binary => {
605            let array = array.as_any().downcast_ref::<BinaryArray<i32>>().unwrap();
606            encode_bins(buffer, array.iter(), opt, offsets);
607        },
608        D::LargeBinary => {
609            let array = array.as_any().downcast_ref::<BinaryArray<i64>>().unwrap();
610            encode_bins(buffer, array.iter(), opt, offsets);
611        },
612        D::BinaryView => {
613            let array = array.as_any().downcast_ref::<BinaryViewArray>().unwrap();
614            encode_bins(buffer, array.iter(), opt, offsets);
615        },
616        D::Utf8 => {
617            let array = array.as_any().downcast_ref::<Utf8Array<i32>>().unwrap();
618            encode_strs(buffer, array.iter(), opt, offsets);
619        },
620        D::LargeUtf8 => {
621            let array = array.as_any().downcast_ref::<Utf8Array<i64>>().unwrap();
622            encode_strs(buffer, array.iter(), opt, offsets);
623        },
624        D::Utf8View => {
625            let array = array.as_any().downcast_ref::<Utf8ViewArray>().unwrap();
626            encode_strs(buffer, array.iter(), opt, offsets);
627        },
628
629        // Lexical ordered Categorical are cast to PrimitiveArray above.
630        D::Dictionary(_, _, _) => todo!(),
631
632        D::FixedSizeBinary(_) => todo!(),
633        D::Decimal(_, _) => todo!(),
634        D::Decimal256(_, _) => todo!(),
635
636        D::Union(_) => todo!(),
637        D::Map(_, _) => todo!(),
638        D::Extension(_) => todo!(),
639        D::Unknown => todo!(),
640
641        // All are non-physical types.
642        D::Timestamp(_, _)
643        | D::Date32
644        | D::Date64
645        | D::Time32(_)
646        | D::Time64(_)
647        | D::Duration(_)
648        | D::Interval(_) => unreachable!(),
649
650        _ => unreachable!(),
651    }
652}
653
654#[derive(Default)]
655struct EncodeScratches {
656    nested_offsets: Vec<usize>,
657    nested_buffer: Vec<u8>,
658}
659
660impl EncodeScratches {
661    fn clear(&mut self) {
662        self.nested_offsets.clear();
663        self.nested_buffer.clear();
664    }
665}
666
667unsafe fn encode_array(
668    buffer: &mut [MaybeUninit<u8>],
669    encoder: &Encoder,
670    opt: RowEncodingOptions,
671    dict: Option<&RowEncodingContext>,
672    offsets: &mut [usize],
673    masked_out_write_offset: usize, // Masked out values need to be written somewhere. We just
674    // reserved space at the end and tell all values to write
675    // there.
676    scratches: &mut EncodeScratches,
677) {
678    let Some(state) = &encoder.state else {
679        // This is actually the main path.
680        //
681        // If no nested types or special types are needed, this path is taken.
682        return encode_flat_array(buffer, encoder.array.as_ref(), opt, dict, offsets);
683    };
684
685    match state.as_ref() {
686        EncoderState::List(nested_encoder, nested_row_widths) => {
687            // @TODO: make more general.
688            let array = encoder
689                .array
690                .as_any()
691                .downcast_ref::<ListArray<i64>>()
692                .unwrap();
693
694            scratches.clear();
695
696            scratches
697                .nested_offsets
698                .reserve(nested_row_widths.num_rows());
699            let nested_offsets = &mut scratches.nested_offsets;
700
701            let list_null_sentinel = opt.list_null_sentinel();
702            let list_continuation_token = opt.list_continuation_token();
703            let list_termination_token = opt.list_termination_token();
704
705            match array.validity() {
706                None => {
707                    for (i, (offset, length)) in
708                        array.offsets().offset_and_length_iter().enumerate()
709                    {
710                        for j in offset..offset + length {
711                            buffer[offsets[i]] = MaybeUninit::new(list_continuation_token);
712                            offsets[i] += 1;
713
714                            nested_offsets.push(offsets[i]);
715                            offsets[i] += nested_row_widths.get(j);
716                        }
717                        buffer[offsets[i]] = MaybeUninit::new(list_termination_token);
718                        offsets[i] += 1;
719                    }
720                },
721                Some(validity) => {
722                    for (i, ((offset, length), is_valid)) in array
723                        .offsets()
724                        .offset_and_length_iter()
725                        .zip(validity.iter())
726                        .enumerate()
727                    {
728                        if !is_valid {
729                            buffer[offsets[i]] = MaybeUninit::new(list_null_sentinel);
730                            offsets[i] += 1;
731
732                            // Values might have been masked out.
733                            if length > 0 {
734                                nested_offsets
735                                    .extend(std::iter::repeat_n(masked_out_write_offset, length));
736                            }
737
738                            continue;
739                        }
740
741                        for j in offset..offset + length {
742                            buffer[offsets[i]] = MaybeUninit::new(list_continuation_token);
743                            offsets[i] += 1;
744
745                            nested_offsets.push(offsets[i]);
746                            offsets[i] += nested_row_widths.get(j);
747                        }
748                        buffer[offsets[i]] = MaybeUninit::new(list_termination_token);
749                        offsets[i] += 1;
750                    }
751                },
752            }
753
754            unsafe {
755                encode_array(
756                    buffer,
757                    nested_encoder,
758                    opt,
759                    dict,
760                    nested_offsets,
761                    masked_out_write_offset,
762                    &mut EncodeScratches::default(),
763                )
764            };
765        },
766        EncoderState::FixedSizeList(array, width, nested_row_widths) => {
767            encode_validity(buffer, encoder.array.validity(), opt, offsets);
768
769            if *width == 0 {
770                return;
771            }
772
773            let mut child_offsets = Vec::with_capacity(offsets.len() * width);
774            for (i, offset) in offsets.iter_mut().enumerate() {
775                for j in 0..*width {
776                    child_offsets.push(*offset);
777                    *offset += nested_row_widths.get((i * width) + j);
778                }
779            }
780            encode_array(
781                buffer,
782                array.as_ref(),
783                opt,
784                dict,
785                &mut child_offsets,
786                masked_out_write_offset,
787                scratches,
788            );
789            for (i, offset) in offsets.iter_mut().enumerate() {
790                *offset = child_offsets[(i + 1) * width - 1];
791            }
792        },
793        EncoderState::Struct(arrays) => {
794            encode_validity(buffer, encoder.array.validity(), opt, offsets);
795
796            match dict {
797                None => {
798                    for array in arrays {
799                        encode_array(
800                            buffer,
801                            array,
802                            opt,
803                            None,
804                            offsets,
805                            masked_out_write_offset,
806                            scratches,
807                        );
808                    }
809                },
810                Some(RowEncodingContext::Struct(dicts)) => {
811                    for (array, dict) in arrays.iter().zip(dicts) {
812                        encode_array(
813                            buffer,
814                            array,
815                            opt,
816                            dict.as_ref(),
817                            offsets,
818                            masked_out_write_offset,
819                            scratches,
820                        );
821                    }
822                },
823                _ => unreachable!(),
824            }
825        },
826    }
827}
828
829unsafe fn encode_validity(
830    buffer: &mut [MaybeUninit<u8>],
831    validity: Option<&Bitmap>,
832    opt: RowEncodingOptions,
833    row_starts: &mut [usize],
834) {
835    let null_sentinel = opt.null_sentinel();
836    match validity {
837        None => {
838            for row_start in row_starts.iter_mut() {
839                buffer[*row_start] = MaybeUninit::new(1);
840                *row_start += 1;
841            }
842        },
843        Some(validity) => {
844            for (row_start, is_valid) in row_starts.iter_mut().zip(validity.iter()) {
845                let v = if is_valid {
846                    MaybeUninit::new(1)
847                } else {
848                    MaybeUninit::new(null_sentinel)
849                };
850                buffer[*row_start] = v;
851                *row_start += 1;
852            }
853        },
854    }
855}
856
857pub fn fixed_size(dtype: &ArrowDataType, dict: Option<&RowEncodingContext>) -> Option<usize> {
858    use numeric::FixedLengthEncoding;
859    use ArrowDataType as D;
860    Some(match dtype {
861        D::Null => 0,
862        D::Boolean => 1,
863
864        D::UInt8 => u8::ENCODED_LEN,
865        D::UInt16 => u16::ENCODED_LEN,
866        D::UInt32 => match dict {
867            None => u32::ENCODED_LEN,
868            Some(RowEncodingContext::Categorical(ctx)) => {
869                if ctx.is_enum {
870                    packed_u32::len_from_num_bits(ctx.needed_num_bits())
871                } else {
872                    let mut num_bytes = u32::ENCODED_LEN;
873                    if ctx.lexical_sort_idxs.is_some() {
874                        num_bytes += u32::ENCODED_LEN;
875                    }
876                    num_bytes
877                }
878            },
879            _ => return None,
880        },
881        D::UInt64 => u64::ENCODED_LEN,
882
883        D::Int8 => i8::ENCODED_LEN,
884        D::Int16 => i16::ENCODED_LEN,
885        D::Int32 => i32::ENCODED_LEN,
886        D::Int64 => i64::ENCODED_LEN,
887        D::Int128 => match dict {
888            None => i128::ENCODED_LEN,
889            Some(RowEncodingContext::Decimal(precision)) => decimal::len_from_precision(*precision),
890            _ => unreachable!(),
891        },
892
893        D::Float32 => f32::ENCODED_LEN,
894        D::Float64 => f64::ENCODED_LEN,
895        D::FixedSizeList(f, width) => 1 + width * fixed_size(f.dtype(), dict)?,
896        D::Struct(fs) => match dict {
897            None => {
898                let mut sum = 0;
899                for f in fs {
900                    sum += fixed_size(f.dtype(), None)?;
901                }
902                1 + sum
903            },
904            Some(RowEncodingContext::Struct(dicts)) => {
905                let mut sum = 0;
906                for (f, dict) in fs.iter().zip(dicts) {
907                    sum += fixed_size(f.dtype(), dict.as_ref())?;
908                }
909                1 + sum
910            },
911            _ => unreachable!(),
912        },
913        _ => return None,
914    })
915}