polars_arrow/ffi/
schema.rs

1use std::collections::BTreeMap;
2use std::ffi::{CStr, CString};
3use std::ptr;
4
5use polars_error::{polars_bail, polars_err, PolarsResult};
6use polars_utils::pl_str::PlSmallStr;
7
8use super::ArrowSchema;
9use crate::datatypes::{
10    ArrowDataType, Extension, ExtensionType, Field, IntegerType, IntervalUnit, Metadata, TimeUnit,
11    UnionMode, UnionType,
12};
13
14#[allow(dead_code)]
15struct SchemaPrivateData {
16    name: CString,
17    format: CString,
18    metadata: Option<Vec<u8>>,
19    children_ptr: Box<[*mut ArrowSchema]>,
20    dictionary: Option<*mut ArrowSchema>,
21}
22
23// callback used to drop [ArrowSchema] when it is exported.
24unsafe extern "C" fn c_release_schema(schema: *mut ArrowSchema) {
25    if schema.is_null() {
26        return;
27    }
28    let schema = &mut *schema;
29
30    let private = Box::from_raw(schema.private_data as *mut SchemaPrivateData);
31    for child in private.children_ptr.iter() {
32        let _ = Box::from_raw(*child);
33    }
34
35    if let Some(ptr) = private.dictionary {
36        let _ = Box::from_raw(ptr);
37    }
38
39    schema.release = None;
40}
41
42/// allocate (and hold) the children
43fn schema_children(dtype: &ArrowDataType, flags: &mut i64) -> Box<[*mut ArrowSchema]> {
44    match dtype {
45        ArrowDataType::List(field)
46        | ArrowDataType::FixedSizeList(field, _)
47        | ArrowDataType::LargeList(field) => {
48            Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))])
49        },
50        ArrowDataType::Map(field, is_sorted) => {
51            *flags += (*is_sorted as i64) * 4;
52            Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))])
53        },
54        ArrowDataType::Struct(fields) => fields
55            .iter()
56            .map(|field| Box::into_raw(Box::new(ArrowSchema::new(field))))
57            .collect::<Box<[_]>>(),
58        ArrowDataType::Union(u) => u
59            .fields
60            .iter()
61            .map(|field| Box::into_raw(Box::new(ArrowSchema::new(field))))
62            .collect::<Box<[_]>>(),
63        ArrowDataType::Extension(ext) => schema_children(&ext.inner, flags),
64        _ => Box::new([]),
65    }
66}
67
68impl ArrowSchema {
69    /// creates a new [ArrowSchema]
70    pub(crate) fn new(field: &Field) -> Self {
71        let format = to_format(field.dtype());
72        let name = field.name.clone();
73
74        let mut flags = field.is_nullable as i64 * 2;
75
76        // note: this cannot be done along with the above because the above is fallible and this op leaks.
77        let children_ptr = schema_children(field.dtype(), &mut flags);
78        let n_children = children_ptr.len() as i64;
79
80        let dictionary = if let ArrowDataType::Dictionary(_, values, is_ordered) = field.dtype() {
81            flags += *is_ordered as i64;
82            // we do not store field info in the dict values, so can't recover it all :(
83            let field = Field::new(PlSmallStr::EMPTY, values.as_ref().clone(), true);
84            Some(Box::new(ArrowSchema::new(&field)))
85        } else {
86            None
87        };
88
89        let metadata = field
90            .metadata
91            .as_ref()
92            .map(|inner| (**inner).clone())
93            .unwrap_or_default();
94
95        let metadata = if let ArrowDataType::Extension(ext) = field.dtype() {
96            // append extension information.
97            let mut metadata = metadata.clone();
98
99            // metadata
100            if let Some(extension_metadata) = &ext.metadata {
101                metadata.insert(
102                    PlSmallStr::from_static("ARROW:extension:metadata"),
103                    extension_metadata.clone(),
104                );
105            }
106
107            metadata.insert(
108                PlSmallStr::from_static("ARROW:extension:name"),
109                ext.name.clone(),
110            );
111
112            Some(metadata_to_bytes(&metadata))
113        } else if !metadata.is_empty() {
114            Some(metadata_to_bytes(&metadata))
115        } else {
116            None
117        };
118
119        let name = CString::new(name.as_bytes()).unwrap();
120        let format = CString::new(format).unwrap();
121
122        let mut private = Box::new(SchemaPrivateData {
123            name,
124            format,
125            metadata,
126            children_ptr,
127            dictionary: dictionary.map(Box::into_raw),
128        });
129
130        // <https://arrow.apache.org/docs/format/CDataInterface.html#c.ArrowSchema>
131        Self {
132            format: private.format.as_ptr(),
133            name: private.name.as_ptr(),
134            metadata: private
135                .metadata
136                .as_ref()
137                .map(|x| x.as_ptr())
138                .unwrap_or(std::ptr::null()) as *const ::std::os::raw::c_char,
139            flags,
140            n_children,
141            children: private.children_ptr.as_mut_ptr(),
142            dictionary: private.dictionary.unwrap_or(std::ptr::null_mut()),
143            release: Some(c_release_schema),
144            private_data: Box::into_raw(private) as *mut ::std::os::raw::c_void,
145        }
146    }
147
148    /// create an empty [ArrowSchema]
149    pub fn empty() -> Self {
150        Self {
151            format: std::ptr::null_mut(),
152            name: std::ptr::null_mut(),
153            metadata: std::ptr::null_mut(),
154            flags: 0,
155            n_children: 0,
156            children: ptr::null_mut(),
157            dictionary: std::ptr::null_mut(),
158            release: None,
159            private_data: std::ptr::null_mut(),
160        }
161    }
162
163    pub fn is_null(&self) -> bool {
164        self.private_data.is_null()
165    }
166
167    /// returns the format of this schema.
168    pub(crate) fn format(&self) -> &str {
169        assert!(!self.format.is_null());
170        // safe because the lifetime of `self.format` equals `self`
171        unsafe { CStr::from_ptr(self.format) }
172            .to_str()
173            .expect("The external API has a non-utf8 as format")
174    }
175
176    /// returns the name of this schema.
177    ///
178    /// Since this field is optional, `""` is returned if it is not set (as per the spec).
179    pub(crate) fn name(&self) -> &str {
180        if self.name.is_null() {
181            return "";
182        }
183        // safe because the lifetime of `self.name` equals `self`
184        unsafe { CStr::from_ptr(self.name) }.to_str().unwrap()
185    }
186
187    pub(crate) fn child(&self, index: usize) -> &'static Self {
188        assert!(index < self.n_children as usize);
189        unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
190    }
191
192    pub(crate) fn dictionary(&self) -> Option<&'static Self> {
193        if self.dictionary.is_null() {
194            return None;
195        };
196        Some(unsafe { self.dictionary.as_ref().unwrap() })
197    }
198
199    pub(crate) fn nullable(&self) -> bool {
200        (self.flags / 2) & 1 == 1
201    }
202}
203
204impl Drop for ArrowSchema {
205    fn drop(&mut self) {
206        match self.release {
207            None => (),
208            Some(release) => unsafe { release(self) },
209        };
210    }
211}
212
213pub(crate) unsafe fn to_field(schema: &ArrowSchema) -> PolarsResult<Field> {
214    let dictionary = schema.dictionary();
215    let dtype = if let Some(dictionary) = dictionary {
216        let indices = to_integer_type(schema.format())?;
217        let values = to_field(dictionary)?;
218        let is_ordered = schema.flags & 1 == 1;
219        ArrowDataType::Dictionary(indices, Box::new(values.dtype().clone()), is_ordered)
220    } else {
221        to_dtype(schema)?
222    };
223    let (metadata, extension) = unsafe { metadata_from_bytes(schema.metadata) };
224
225    let dtype = if let Some((name, extension_metadata)) = extension {
226        ArrowDataType::Extension(Box::new(ExtensionType {
227            name,
228            inner: dtype,
229            metadata: extension_metadata,
230        }))
231    } else {
232        dtype
233    };
234
235    Ok(Field::new(
236        PlSmallStr::from_str(schema.name()),
237        dtype,
238        schema.nullable(),
239    )
240    .with_metadata(metadata))
241}
242
243fn to_integer_type(format: &str) -> PolarsResult<IntegerType> {
244    use IntegerType::*;
245    Ok(match format {
246        "c" => Int8,
247        "C" => UInt8,
248        "s" => Int16,
249        "S" => UInt16,
250        "i" => Int32,
251        "I" => UInt32,
252        "l" => Int64,
253        "L" => UInt64,
254        _ => {
255            polars_bail!(
256                ComputeError:
257                "dictionary indices can only be integers"
258            )
259        },
260    })
261}
262
263unsafe fn to_dtype(schema: &ArrowSchema) -> PolarsResult<ArrowDataType> {
264    Ok(match schema.format() {
265        "n" => ArrowDataType::Null,
266        "b" => ArrowDataType::Boolean,
267        "c" => ArrowDataType::Int8,
268        "C" => ArrowDataType::UInt8,
269        "s" => ArrowDataType::Int16,
270        "S" => ArrowDataType::UInt16,
271        "i" => ArrowDataType::Int32,
272        "I" => ArrowDataType::UInt32,
273        "l" => ArrowDataType::Int64,
274        "L" => ArrowDataType::UInt64,
275        "e" => ArrowDataType::Float16,
276        "f" => ArrowDataType::Float32,
277        "g" => ArrowDataType::Float64,
278        "z" => ArrowDataType::Binary,
279        "Z" => ArrowDataType::LargeBinary,
280        "u" => ArrowDataType::Utf8,
281        "U" => ArrowDataType::LargeUtf8,
282        "tdD" => ArrowDataType::Date32,
283        "tdm" => ArrowDataType::Date64,
284        "tts" => ArrowDataType::Time32(TimeUnit::Second),
285        "ttm" => ArrowDataType::Time32(TimeUnit::Millisecond),
286        "ttu" => ArrowDataType::Time64(TimeUnit::Microsecond),
287        "ttn" => ArrowDataType::Time64(TimeUnit::Nanosecond),
288        "tDs" => ArrowDataType::Duration(TimeUnit::Second),
289        "tDm" => ArrowDataType::Duration(TimeUnit::Millisecond),
290        "tDu" => ArrowDataType::Duration(TimeUnit::Microsecond),
291        "tDn" => ArrowDataType::Duration(TimeUnit::Nanosecond),
292        "tiM" => ArrowDataType::Interval(IntervalUnit::YearMonth),
293        "tiD" => ArrowDataType::Interval(IntervalUnit::DayTime),
294        "vu" => ArrowDataType::Utf8View,
295        "vz" => ArrowDataType::BinaryView,
296        "+l" => {
297            let child = schema.child(0);
298            ArrowDataType::List(Box::new(to_field(child)?))
299        },
300        "+L" => {
301            let child = schema.child(0);
302            ArrowDataType::LargeList(Box::new(to_field(child)?))
303        },
304        "+m" => {
305            let child = schema.child(0);
306
307            let is_sorted = (schema.flags & 4) != 0;
308            ArrowDataType::Map(Box::new(to_field(child)?), is_sorted)
309        },
310        "+s" => {
311            let children = (0..schema.n_children as usize)
312                .map(|x| to_field(schema.child(x)))
313                .collect::<PolarsResult<Vec<_>>>()?;
314            ArrowDataType::Struct(children)
315        },
316        other => {
317            match other.splitn(2, ':').collect::<Vec<_>>()[..] {
318                // Timestamps with no timezone
319                ["tss", ""] => ArrowDataType::Timestamp(TimeUnit::Second, None),
320                ["tsm", ""] => ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
321                ["tsu", ""] => ArrowDataType::Timestamp(TimeUnit::Microsecond, None),
322                ["tsn", ""] => ArrowDataType::Timestamp(TimeUnit::Nanosecond, None),
323
324                // Timestamps with timezone
325                ["tss", tz] => {
326                    ArrowDataType::Timestamp(TimeUnit::Second, Some(PlSmallStr::from_str(tz)))
327                },
328                ["tsm", tz] => {
329                    ArrowDataType::Timestamp(TimeUnit::Millisecond, Some(PlSmallStr::from_str(tz)))
330                },
331                ["tsu", tz] => {
332                    ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(PlSmallStr::from_str(tz)))
333                },
334                ["tsn", tz] => {
335                    ArrowDataType::Timestamp(TimeUnit::Nanosecond, Some(PlSmallStr::from_str(tz)))
336                },
337
338                ["w", size_raw] => {
339                    // Example: "w:42" fixed-width binary [42 bytes]
340                    let size = size_raw
341                        .parse::<usize>()
342                        .map_err(|_| polars_err!(ComputeError: "size is not a valid integer"))?;
343                    ArrowDataType::FixedSizeBinary(size)
344                },
345                ["+w", size_raw] => {
346                    // Example: "+w:123" fixed-sized list [123 items]
347                    let size = size_raw
348                        .parse::<usize>()
349                        .map_err(|_| polars_err!(ComputeError: "size is not a valid integer"))?;
350                    let child = to_field(schema.child(0))?;
351                    ArrowDataType::FixedSizeList(Box::new(child), size)
352                },
353                ["d", raw] => {
354                    // Decimal
355                    let (precision, scale) = match raw.split(',').collect::<Vec<_>>()[..] {
356                        [precision_raw, scale_raw] => {
357                            // Example: "d:19,10" decimal128 [precision 19, scale 10]
358                            (precision_raw, scale_raw)
359                        },
360                        [precision_raw, scale_raw, width_raw] => {
361                            // Example: "d:19,10,NNN" decimal bitwidth = NNN [precision 19, scale 10]
362                            // Only bitwdth of 128 currently supported
363                            let bit_width = width_raw.parse::<usize>().map_err(|_| {
364                                polars_err!(ComputeError: "Decimal bit width is not a valid integer")
365                            })?;
366                            if bit_width == 256 {
367                                return Ok(ArrowDataType::Decimal256(
368                                    precision_raw.parse::<usize>().map_err(|_| {
369                                        polars_err!(ComputeError: "Decimal precision is not a valid integer")
370                                    })?,
371                                    scale_raw.parse::<usize>().map_err(|_| {
372                                        polars_err!(ComputeError: "Decimal scale is not a valid integer")
373                                    })?,
374                                ));
375                            }
376                            (precision_raw, scale_raw)
377                        },
378                        _ => {
379                            polars_bail!(ComputeError:
380                                "Decimal must contain 2 or 3 comma-separated values"
381                            )
382                        },
383                    };
384
385                    ArrowDataType::Decimal(
386                        precision.parse::<usize>().map_err(|_| {
387                            polars_err!(ComputeError:
388                            "Decimal precision is not a valid integer"
389                            )
390                        })?,
391                        scale.parse::<usize>().map_err(|_| {
392                            polars_err!(ComputeError:
393                            "Decimal scale is not a valid integer"
394                            )
395                        })?,
396                    )
397                },
398                [union_type @ "+us", union_parts] | [union_type @ "+ud", union_parts] => {
399                    // union, sparse
400                    // Example "+us:I,J,..." sparse union with type ids I,J...
401                    // Example: "+ud:I,J,..." dense union with type ids I,J...
402                    let mode = UnionMode::sparse(union_type == "+us");
403                    let type_ids = union_parts
404                        .split(',')
405                        .map(|x| {
406                            x.parse::<i32>().map_err(|_| {
407                                polars_err!(ComputeError:
408                                "Union type id is not a valid integer"
409                                )
410                            })
411                        })
412                        .collect::<PolarsResult<Vec<_>>>()?;
413                    let fields = (0..schema.n_children as usize)
414                        .map(|x| to_field(schema.child(x)))
415                        .collect::<PolarsResult<Vec<_>>>()?;
416                    ArrowDataType::Union(Box::new(UnionType {
417                        fields,
418                        ids: Some(type_ids),
419                        mode,
420                    }))
421                },
422                _ => {
423                    polars_bail!(ComputeError:
424                    "The datatype \"{other}\" is still not supported in Rust implementation",
425                        )
426                },
427            }
428        },
429    })
430}
431
432/// the inverse of [to_field]
433fn to_format(dtype: &ArrowDataType) -> String {
434    match dtype {
435        ArrowDataType::Null => "n".to_string(),
436        ArrowDataType::Boolean => "b".to_string(),
437        ArrowDataType::Int8 => "c".to_string(),
438        ArrowDataType::UInt8 => "C".to_string(),
439        ArrowDataType::Int16 => "s".to_string(),
440        ArrowDataType::UInt16 => "S".to_string(),
441        ArrowDataType::Int32 => "i".to_string(),
442        ArrowDataType::UInt32 => "I".to_string(),
443        ArrowDataType::Int64 => "l".to_string(),
444        ArrowDataType::UInt64 => "L".to_string(),
445        // Doesn't exist in arrow, '_pl' prefixed is Polars specific
446        ArrowDataType::Int128 => "_pli128".to_string(),
447        ArrowDataType::Float16 => "e".to_string(),
448        ArrowDataType::Float32 => "f".to_string(),
449        ArrowDataType::Float64 => "g".to_string(),
450        ArrowDataType::Binary => "z".to_string(),
451        ArrowDataType::LargeBinary => "Z".to_string(),
452        ArrowDataType::Utf8 => "u".to_string(),
453        ArrowDataType::LargeUtf8 => "U".to_string(),
454        ArrowDataType::Date32 => "tdD".to_string(),
455        ArrowDataType::Date64 => "tdm".to_string(),
456        ArrowDataType::Time32(TimeUnit::Second) => "tts".to_string(),
457        ArrowDataType::Time32(TimeUnit::Millisecond) => "ttm".to_string(),
458        ArrowDataType::Time32(_) => {
459            unreachable!("Time32 is only supported for seconds and milliseconds")
460        },
461        ArrowDataType::Time64(TimeUnit::Microsecond) => "ttu".to_string(),
462        ArrowDataType::Time64(TimeUnit::Nanosecond) => "ttn".to_string(),
463        ArrowDataType::Time64(_) => {
464            unreachable!("Time64 is only supported for micro and nanoseconds")
465        },
466        ArrowDataType::Duration(TimeUnit::Second) => "tDs".to_string(),
467        ArrowDataType::Duration(TimeUnit::Millisecond) => "tDm".to_string(),
468        ArrowDataType::Duration(TimeUnit::Microsecond) => "tDu".to_string(),
469        ArrowDataType::Duration(TimeUnit::Nanosecond) => "tDn".to_string(),
470        ArrowDataType::Interval(IntervalUnit::YearMonth) => "tiM".to_string(),
471        ArrowDataType::Interval(IntervalUnit::DayTime) => "tiD".to_string(),
472        ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
473            todo!("Spec for FFI for MonthDayNano still not defined.")
474        },
475        ArrowDataType::Timestamp(unit, tz) => {
476            let unit = match unit {
477                TimeUnit::Second => "s",
478                TimeUnit::Millisecond => "m",
479                TimeUnit::Microsecond => "u",
480                TimeUnit::Nanosecond => "n",
481            };
482            format!(
483                "ts{}:{}",
484                unit,
485                tz.as_ref().map(|x| x.as_str()).unwrap_or("")
486            )
487        },
488        ArrowDataType::Utf8View => "vu".to_string(),
489        ArrowDataType::BinaryView => "vz".to_string(),
490        ArrowDataType::Decimal(precision, scale) => format!("d:{precision},{scale}"),
491        ArrowDataType::Decimal256(precision, scale) => format!("d:{precision},{scale},256"),
492        ArrowDataType::List(_) => "+l".to_string(),
493        ArrowDataType::LargeList(_) => "+L".to_string(),
494        ArrowDataType::Struct(_) => "+s".to_string(),
495        ArrowDataType::FixedSizeBinary(size) => format!("w:{size}"),
496        ArrowDataType::FixedSizeList(_, size) => format!("+w:{size}"),
497        ArrowDataType::Union(u) => {
498            let sparsness = if u.mode.is_sparse() { 's' } else { 'd' };
499            let mut r = format!("+u{sparsness}:");
500            let ids = if let Some(ids) = &u.ids {
501                ids.iter()
502                    .fold(String::new(), |a, b| a + b.to_string().as_str() + ",")
503            } else {
504                (0..u.fields.len()).fold(String::new(), |a, b| a + b.to_string().as_str() + ",")
505            };
506            let ids = &ids[..ids.len() - 1]; // take away last ","
507            r.push_str(ids);
508            r
509        },
510        ArrowDataType::Map(_, _) => "+m".to_string(),
511        ArrowDataType::Dictionary(index, _, _) => to_format(&(*index).into()),
512        ArrowDataType::Extension(ext) => to_format(&ext.inner),
513        ArrowDataType::Unknown => unimplemented!(),
514    }
515}
516
517pub(super) fn get_child(dtype: &ArrowDataType, index: usize) -> PolarsResult<ArrowDataType> {
518    match (index, dtype) {
519        (0, ArrowDataType::List(field)) => Ok(field.dtype().clone()),
520        (0, ArrowDataType::FixedSizeList(field, _)) => Ok(field.dtype().clone()),
521        (0, ArrowDataType::LargeList(field)) => Ok(field.dtype().clone()),
522        (0, ArrowDataType::Map(field, _)) => Ok(field.dtype().clone()),
523        (index, ArrowDataType::Struct(fields)) => Ok(fields[index].dtype().clone()),
524        (index, ArrowDataType::Union(u)) => Ok(u.fields[index].dtype().clone()),
525        (index, ArrowDataType::Extension(ext)) => get_child(&ext.inner, index),
526        (child, dtype) => polars_bail!(ComputeError:
527            "Requested child {child} to type {dtype:?} that has no such child",
528        ),
529    }
530}
531
532fn metadata_to_bytes(metadata: &BTreeMap<PlSmallStr, PlSmallStr>) -> Vec<u8> {
533    let a = (metadata.len() as i32).to_ne_bytes().to_vec();
534    metadata.iter().fold(a, |mut acc, (key, value)| {
535        acc.extend((key.len() as i32).to_ne_bytes());
536        acc.extend(key.as_bytes());
537        acc.extend((value.len() as i32).to_ne_bytes());
538        acc.extend(value.as_bytes());
539        acc
540    })
541}
542
543unsafe fn read_ne_i32(ptr: *const u8) -> i32 {
544    let slice = std::slice::from_raw_parts(ptr, 4);
545    i32::from_ne_bytes(slice.try_into().unwrap())
546}
547
548unsafe fn read_bytes(ptr: *const u8, len: usize) -> &'static str {
549    let slice = std::slice::from_raw_parts(ptr, len);
550    simdutf8::basic::from_utf8(slice).unwrap()
551}
552
553unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata, Extension) {
554    let mut data = data as *const u8; // u8 = i8
555    if data.is_null() {
556        return (Metadata::default(), None);
557    };
558    let len = read_ne_i32(data);
559    data = data.add(4);
560
561    let mut result = BTreeMap::new();
562    let mut extension_name = None;
563    let mut extension_metadata = None;
564    for _ in 0..len {
565        let key_len = read_ne_i32(data) as usize;
566        data = data.add(4);
567        let key = read_bytes(data, key_len);
568        data = data.add(key_len);
569        let value_len = read_ne_i32(data) as usize;
570        data = data.add(4);
571        let value = read_bytes(data, value_len);
572        data = data.add(value_len);
573        match key {
574            "ARROW:extension:name" => {
575                extension_name = Some(PlSmallStr::from_str(value));
576            },
577            "ARROW:extension:metadata" => {
578                extension_metadata = Some(PlSmallStr::from_str(value));
579            },
580            _ => {
581                result.insert(PlSmallStr::from_str(key), PlSmallStr::from_str(value));
582            },
583        };
584    }
585    let extension = extension_name.map(|name| (name, extension_metadata));
586    (result, extension)
587}
588
589#[cfg(test)]
590mod tests {
591    use super::*;
592
593    #[test]
594    fn test_all() {
595        let mut dts = vec![
596            ArrowDataType::Null,
597            ArrowDataType::Boolean,
598            ArrowDataType::UInt8,
599            ArrowDataType::UInt16,
600            ArrowDataType::UInt32,
601            ArrowDataType::UInt64,
602            ArrowDataType::Int8,
603            ArrowDataType::Int16,
604            ArrowDataType::Int32,
605            ArrowDataType::Int64,
606            ArrowDataType::Float32,
607            ArrowDataType::Float64,
608            ArrowDataType::Date32,
609            ArrowDataType::Date64,
610            ArrowDataType::Time32(TimeUnit::Second),
611            ArrowDataType::Time32(TimeUnit::Millisecond),
612            ArrowDataType::Time64(TimeUnit::Microsecond),
613            ArrowDataType::Time64(TimeUnit::Nanosecond),
614            ArrowDataType::Decimal(5, 5),
615            ArrowDataType::Utf8,
616            ArrowDataType::LargeUtf8,
617            ArrowDataType::Binary,
618            ArrowDataType::LargeBinary,
619            ArrowDataType::FixedSizeBinary(2),
620            ArrowDataType::List(Box::new(Field::new(
621                PlSmallStr::from_static("example"),
622                ArrowDataType::Boolean,
623                false,
624            ))),
625            ArrowDataType::FixedSizeList(
626                Box::new(Field::new(
627                    PlSmallStr::from_static("example"),
628                    ArrowDataType::Boolean,
629                    false,
630                )),
631                2,
632            ),
633            ArrowDataType::LargeList(Box::new(Field::new(
634                PlSmallStr::from_static("example"),
635                ArrowDataType::Boolean,
636                false,
637            ))),
638            ArrowDataType::Struct(vec![
639                Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
640                Field::new(
641                    PlSmallStr::from_static("b"),
642                    ArrowDataType::List(Box::new(Field::new(
643                        PlSmallStr::from_static("item"),
644                        ArrowDataType::Int32,
645                        true,
646                    ))),
647                    true,
648                ),
649            ]),
650            ArrowDataType::Map(
651                Box::new(Field::new(
652                    PlSmallStr::from_static("a"),
653                    ArrowDataType::Int64,
654                    true,
655                )),
656                true,
657            ),
658            ArrowDataType::Union(Box::new(UnionType {
659                fields: vec![
660                    Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
661                    Field::new(
662                        PlSmallStr::from_static("b"),
663                        ArrowDataType::List(Box::new(Field::new(
664                            PlSmallStr::from_static("item"),
665                            ArrowDataType::Int32,
666                            true,
667                        ))),
668                        true,
669                    ),
670                ],
671                ids: Some(vec![1, 2]),
672                mode: UnionMode::Dense,
673            })),
674            ArrowDataType::Union(Box::new(UnionType {
675                fields: vec![
676                    Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
677                    Field::new(
678                        PlSmallStr::from_static("b"),
679                        ArrowDataType::List(Box::new(Field::new(
680                            PlSmallStr::from_static("item"),
681                            ArrowDataType::Int32,
682                            true,
683                        ))),
684                        true,
685                    ),
686                ],
687                ids: Some(vec![0, 1]),
688                mode: UnionMode::Sparse,
689            })),
690        ];
691        for time_unit in [
692            TimeUnit::Second,
693            TimeUnit::Millisecond,
694            TimeUnit::Microsecond,
695            TimeUnit::Nanosecond,
696        ] {
697            dts.push(ArrowDataType::Timestamp(time_unit, None));
698            dts.push(ArrowDataType::Timestamp(
699                time_unit,
700                Some(PlSmallStr::from_static("00:00")),
701            ));
702            dts.push(ArrowDataType::Duration(time_unit));
703        }
704        for interval_type in [
705            IntervalUnit::DayTime,
706            IntervalUnit::YearMonth,
707            //IntervalUnit::MonthDayNano, // not yet defined on the C data interface
708        ] {
709            dts.push(ArrowDataType::Interval(interval_type));
710        }
711
712        for expected in dts {
713            let field = Field::new(PlSmallStr::from_static("a"), expected.clone(), true);
714            let schema = ArrowSchema::new(&field);
715            let result = unsafe { super::to_dtype(&schema).unwrap() };
716            assert_eq!(result, expected);
717        }
718    }
719}