1use std::collections::BTreeMap;
2use std::ffi::{CStr, CString};
3use std::ptr;
4
5use polars_error::{polars_bail, polars_err, PolarsResult};
6use polars_utils::pl_str::PlSmallStr;
7
8use super::ArrowSchema;
9use crate::datatypes::{
10 ArrowDataType, Extension, ExtensionType, Field, IntegerType, IntervalUnit, Metadata, TimeUnit,
11 UnionMode, UnionType,
12};
13
14#[allow(dead_code)]
15struct SchemaPrivateData {
16 name: CString,
17 format: CString,
18 metadata: Option<Vec<u8>>,
19 children_ptr: Box<[*mut ArrowSchema]>,
20 dictionary: Option<*mut ArrowSchema>,
21}
22
23unsafe extern "C" fn c_release_schema(schema: *mut ArrowSchema) {
25 if schema.is_null() {
26 return;
27 }
28 let schema = &mut *schema;
29
30 let private = Box::from_raw(schema.private_data as *mut SchemaPrivateData);
31 for child in private.children_ptr.iter() {
32 let _ = Box::from_raw(*child);
33 }
34
35 if let Some(ptr) = private.dictionary {
36 let _ = Box::from_raw(ptr);
37 }
38
39 schema.release = None;
40}
41
42fn schema_children(dtype: &ArrowDataType, flags: &mut i64) -> Box<[*mut ArrowSchema]> {
44 match dtype {
45 ArrowDataType::List(field)
46 | ArrowDataType::FixedSizeList(field, _)
47 | ArrowDataType::LargeList(field) => {
48 Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))])
49 },
50 ArrowDataType::Map(field, is_sorted) => {
51 *flags += (*is_sorted as i64) * 4;
52 Box::new([Box::into_raw(Box::new(ArrowSchema::new(field.as_ref())))])
53 },
54 ArrowDataType::Struct(fields) => fields
55 .iter()
56 .map(|field| Box::into_raw(Box::new(ArrowSchema::new(field))))
57 .collect::<Box<[_]>>(),
58 ArrowDataType::Union(u) => u
59 .fields
60 .iter()
61 .map(|field| Box::into_raw(Box::new(ArrowSchema::new(field))))
62 .collect::<Box<[_]>>(),
63 ArrowDataType::Extension(ext) => schema_children(&ext.inner, flags),
64 _ => Box::new([]),
65 }
66}
67
68impl ArrowSchema {
69 pub(crate) fn new(field: &Field) -> Self {
71 let format = to_format(field.dtype());
72 let name = field.name.clone();
73
74 let mut flags = field.is_nullable as i64 * 2;
75
76 let children_ptr = schema_children(field.dtype(), &mut flags);
78 let n_children = children_ptr.len() as i64;
79
80 let dictionary = if let ArrowDataType::Dictionary(_, values, is_ordered) = field.dtype() {
81 flags += *is_ordered as i64;
82 let field = Field::new(PlSmallStr::EMPTY, values.as_ref().clone(), true);
84 Some(Box::new(ArrowSchema::new(&field)))
85 } else {
86 None
87 };
88
89 let metadata = field
90 .metadata
91 .as_ref()
92 .map(|inner| (**inner).clone())
93 .unwrap_or_default();
94
95 let metadata = if let ArrowDataType::Extension(ext) = field.dtype() {
96 let mut metadata = metadata.clone();
98
99 if let Some(extension_metadata) = &ext.metadata {
101 metadata.insert(
102 PlSmallStr::from_static("ARROW:extension:metadata"),
103 extension_metadata.clone(),
104 );
105 }
106
107 metadata.insert(
108 PlSmallStr::from_static("ARROW:extension:name"),
109 ext.name.clone(),
110 );
111
112 Some(metadata_to_bytes(&metadata))
113 } else if !metadata.is_empty() {
114 Some(metadata_to_bytes(&metadata))
115 } else {
116 None
117 };
118
119 let name = CString::new(name.as_bytes()).unwrap();
120 let format = CString::new(format).unwrap();
121
122 let mut private = Box::new(SchemaPrivateData {
123 name,
124 format,
125 metadata,
126 children_ptr,
127 dictionary: dictionary.map(Box::into_raw),
128 });
129
130 Self {
132 format: private.format.as_ptr(),
133 name: private.name.as_ptr(),
134 metadata: private
135 .metadata
136 .as_ref()
137 .map(|x| x.as_ptr())
138 .unwrap_or(std::ptr::null()) as *const ::std::os::raw::c_char,
139 flags,
140 n_children,
141 children: private.children_ptr.as_mut_ptr(),
142 dictionary: private.dictionary.unwrap_or(std::ptr::null_mut()),
143 release: Some(c_release_schema),
144 private_data: Box::into_raw(private) as *mut ::std::os::raw::c_void,
145 }
146 }
147
148 pub fn empty() -> Self {
150 Self {
151 format: std::ptr::null_mut(),
152 name: std::ptr::null_mut(),
153 metadata: std::ptr::null_mut(),
154 flags: 0,
155 n_children: 0,
156 children: ptr::null_mut(),
157 dictionary: std::ptr::null_mut(),
158 release: None,
159 private_data: std::ptr::null_mut(),
160 }
161 }
162
163 pub fn is_null(&self) -> bool {
164 self.private_data.is_null()
165 }
166
167 pub(crate) fn format(&self) -> &str {
169 assert!(!self.format.is_null());
170 unsafe { CStr::from_ptr(self.format) }
172 .to_str()
173 .expect("The external API has a non-utf8 as format")
174 }
175
176 pub(crate) fn name(&self) -> &str {
180 if self.name.is_null() {
181 return "";
182 }
183 unsafe { CStr::from_ptr(self.name) }.to_str().unwrap()
185 }
186
187 pub(crate) fn child(&self, index: usize) -> &'static Self {
188 assert!(index < self.n_children as usize);
189 unsafe { self.children.add(index).as_ref().unwrap().as_ref().unwrap() }
190 }
191
192 pub(crate) fn dictionary(&self) -> Option<&'static Self> {
193 if self.dictionary.is_null() {
194 return None;
195 };
196 Some(unsafe { self.dictionary.as_ref().unwrap() })
197 }
198
199 pub(crate) fn nullable(&self) -> bool {
200 (self.flags / 2) & 1 == 1
201 }
202}
203
204impl Drop for ArrowSchema {
205 fn drop(&mut self) {
206 match self.release {
207 None => (),
208 Some(release) => unsafe { release(self) },
209 };
210 }
211}
212
213pub(crate) unsafe fn to_field(schema: &ArrowSchema) -> PolarsResult<Field> {
214 let dictionary = schema.dictionary();
215 let dtype = if let Some(dictionary) = dictionary {
216 let indices = to_integer_type(schema.format())?;
217 let values = to_field(dictionary)?;
218 let is_ordered = schema.flags & 1 == 1;
219 ArrowDataType::Dictionary(indices, Box::new(values.dtype().clone()), is_ordered)
220 } else {
221 to_dtype(schema)?
222 };
223 let (metadata, extension) = unsafe { metadata_from_bytes(schema.metadata) };
224
225 let dtype = if let Some((name, extension_metadata)) = extension {
226 ArrowDataType::Extension(Box::new(ExtensionType {
227 name,
228 inner: dtype,
229 metadata: extension_metadata,
230 }))
231 } else {
232 dtype
233 };
234
235 Ok(Field::new(
236 PlSmallStr::from_str(schema.name()),
237 dtype,
238 schema.nullable(),
239 )
240 .with_metadata(metadata))
241}
242
243fn to_integer_type(format: &str) -> PolarsResult<IntegerType> {
244 use IntegerType::*;
245 Ok(match format {
246 "c" => Int8,
247 "C" => UInt8,
248 "s" => Int16,
249 "S" => UInt16,
250 "i" => Int32,
251 "I" => UInt32,
252 "l" => Int64,
253 "L" => UInt64,
254 _ => {
255 polars_bail!(
256 ComputeError:
257 "dictionary indices can only be integers"
258 )
259 },
260 })
261}
262
263unsafe fn to_dtype(schema: &ArrowSchema) -> PolarsResult<ArrowDataType> {
264 Ok(match schema.format() {
265 "n" => ArrowDataType::Null,
266 "b" => ArrowDataType::Boolean,
267 "c" => ArrowDataType::Int8,
268 "C" => ArrowDataType::UInt8,
269 "s" => ArrowDataType::Int16,
270 "S" => ArrowDataType::UInt16,
271 "i" => ArrowDataType::Int32,
272 "I" => ArrowDataType::UInt32,
273 "l" => ArrowDataType::Int64,
274 "L" => ArrowDataType::UInt64,
275 "e" => ArrowDataType::Float16,
276 "f" => ArrowDataType::Float32,
277 "g" => ArrowDataType::Float64,
278 "z" => ArrowDataType::Binary,
279 "Z" => ArrowDataType::LargeBinary,
280 "u" => ArrowDataType::Utf8,
281 "U" => ArrowDataType::LargeUtf8,
282 "tdD" => ArrowDataType::Date32,
283 "tdm" => ArrowDataType::Date64,
284 "tts" => ArrowDataType::Time32(TimeUnit::Second),
285 "ttm" => ArrowDataType::Time32(TimeUnit::Millisecond),
286 "ttu" => ArrowDataType::Time64(TimeUnit::Microsecond),
287 "ttn" => ArrowDataType::Time64(TimeUnit::Nanosecond),
288 "tDs" => ArrowDataType::Duration(TimeUnit::Second),
289 "tDm" => ArrowDataType::Duration(TimeUnit::Millisecond),
290 "tDu" => ArrowDataType::Duration(TimeUnit::Microsecond),
291 "tDn" => ArrowDataType::Duration(TimeUnit::Nanosecond),
292 "tiM" => ArrowDataType::Interval(IntervalUnit::YearMonth),
293 "tiD" => ArrowDataType::Interval(IntervalUnit::DayTime),
294 "vu" => ArrowDataType::Utf8View,
295 "vz" => ArrowDataType::BinaryView,
296 "+l" => {
297 let child = schema.child(0);
298 ArrowDataType::List(Box::new(to_field(child)?))
299 },
300 "+L" => {
301 let child = schema.child(0);
302 ArrowDataType::LargeList(Box::new(to_field(child)?))
303 },
304 "+m" => {
305 let child = schema.child(0);
306
307 let is_sorted = (schema.flags & 4) != 0;
308 ArrowDataType::Map(Box::new(to_field(child)?), is_sorted)
309 },
310 "+s" => {
311 let children = (0..schema.n_children as usize)
312 .map(|x| to_field(schema.child(x)))
313 .collect::<PolarsResult<Vec<_>>>()?;
314 ArrowDataType::Struct(children)
315 },
316 other => {
317 match other.splitn(2, ':').collect::<Vec<_>>()[..] {
318 ["tss", ""] => ArrowDataType::Timestamp(TimeUnit::Second, None),
320 ["tsm", ""] => ArrowDataType::Timestamp(TimeUnit::Millisecond, None),
321 ["tsu", ""] => ArrowDataType::Timestamp(TimeUnit::Microsecond, None),
322 ["tsn", ""] => ArrowDataType::Timestamp(TimeUnit::Nanosecond, None),
323
324 ["tss", tz] => {
326 ArrowDataType::Timestamp(TimeUnit::Second, Some(PlSmallStr::from_str(tz)))
327 },
328 ["tsm", tz] => {
329 ArrowDataType::Timestamp(TimeUnit::Millisecond, Some(PlSmallStr::from_str(tz)))
330 },
331 ["tsu", tz] => {
332 ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(PlSmallStr::from_str(tz)))
333 },
334 ["tsn", tz] => {
335 ArrowDataType::Timestamp(TimeUnit::Nanosecond, Some(PlSmallStr::from_str(tz)))
336 },
337
338 ["w", size_raw] => {
339 let size = size_raw
341 .parse::<usize>()
342 .map_err(|_| polars_err!(ComputeError: "size is not a valid integer"))?;
343 ArrowDataType::FixedSizeBinary(size)
344 },
345 ["+w", size_raw] => {
346 let size = size_raw
348 .parse::<usize>()
349 .map_err(|_| polars_err!(ComputeError: "size is not a valid integer"))?;
350 let child = to_field(schema.child(0))?;
351 ArrowDataType::FixedSizeList(Box::new(child), size)
352 },
353 ["d", raw] => {
354 let (precision, scale) = match raw.split(',').collect::<Vec<_>>()[..] {
356 [precision_raw, scale_raw] => {
357 (precision_raw, scale_raw)
359 },
360 [precision_raw, scale_raw, width_raw] => {
361 let bit_width = width_raw.parse::<usize>().map_err(|_| {
364 polars_err!(ComputeError: "Decimal bit width is not a valid integer")
365 })?;
366 if bit_width == 256 {
367 return Ok(ArrowDataType::Decimal256(
368 precision_raw.parse::<usize>().map_err(|_| {
369 polars_err!(ComputeError: "Decimal precision is not a valid integer")
370 })?,
371 scale_raw.parse::<usize>().map_err(|_| {
372 polars_err!(ComputeError: "Decimal scale is not a valid integer")
373 })?,
374 ));
375 }
376 (precision_raw, scale_raw)
377 },
378 _ => {
379 polars_bail!(ComputeError:
380 "Decimal must contain 2 or 3 comma-separated values"
381 )
382 },
383 };
384
385 ArrowDataType::Decimal(
386 precision.parse::<usize>().map_err(|_| {
387 polars_err!(ComputeError:
388 "Decimal precision is not a valid integer"
389 )
390 })?,
391 scale.parse::<usize>().map_err(|_| {
392 polars_err!(ComputeError:
393 "Decimal scale is not a valid integer"
394 )
395 })?,
396 )
397 },
398 [union_type @ "+us", union_parts] | [union_type @ "+ud", union_parts] => {
399 let mode = UnionMode::sparse(union_type == "+us");
403 let type_ids = union_parts
404 .split(',')
405 .map(|x| {
406 x.parse::<i32>().map_err(|_| {
407 polars_err!(ComputeError:
408 "Union type id is not a valid integer"
409 )
410 })
411 })
412 .collect::<PolarsResult<Vec<_>>>()?;
413 let fields = (0..schema.n_children as usize)
414 .map(|x| to_field(schema.child(x)))
415 .collect::<PolarsResult<Vec<_>>>()?;
416 ArrowDataType::Union(Box::new(UnionType {
417 fields,
418 ids: Some(type_ids),
419 mode,
420 }))
421 },
422 _ => {
423 polars_bail!(ComputeError:
424 "The datatype \"{other}\" is still not supported in Rust implementation",
425 )
426 },
427 }
428 },
429 })
430}
431
432fn to_format(dtype: &ArrowDataType) -> String {
434 match dtype {
435 ArrowDataType::Null => "n".to_string(),
436 ArrowDataType::Boolean => "b".to_string(),
437 ArrowDataType::Int8 => "c".to_string(),
438 ArrowDataType::UInt8 => "C".to_string(),
439 ArrowDataType::Int16 => "s".to_string(),
440 ArrowDataType::UInt16 => "S".to_string(),
441 ArrowDataType::Int32 => "i".to_string(),
442 ArrowDataType::UInt32 => "I".to_string(),
443 ArrowDataType::Int64 => "l".to_string(),
444 ArrowDataType::UInt64 => "L".to_string(),
445 ArrowDataType::Int128 => "_pli128".to_string(),
447 ArrowDataType::Float16 => "e".to_string(),
448 ArrowDataType::Float32 => "f".to_string(),
449 ArrowDataType::Float64 => "g".to_string(),
450 ArrowDataType::Binary => "z".to_string(),
451 ArrowDataType::LargeBinary => "Z".to_string(),
452 ArrowDataType::Utf8 => "u".to_string(),
453 ArrowDataType::LargeUtf8 => "U".to_string(),
454 ArrowDataType::Date32 => "tdD".to_string(),
455 ArrowDataType::Date64 => "tdm".to_string(),
456 ArrowDataType::Time32(TimeUnit::Second) => "tts".to_string(),
457 ArrowDataType::Time32(TimeUnit::Millisecond) => "ttm".to_string(),
458 ArrowDataType::Time32(_) => {
459 unreachable!("Time32 is only supported for seconds and milliseconds")
460 },
461 ArrowDataType::Time64(TimeUnit::Microsecond) => "ttu".to_string(),
462 ArrowDataType::Time64(TimeUnit::Nanosecond) => "ttn".to_string(),
463 ArrowDataType::Time64(_) => {
464 unreachable!("Time64 is only supported for micro and nanoseconds")
465 },
466 ArrowDataType::Duration(TimeUnit::Second) => "tDs".to_string(),
467 ArrowDataType::Duration(TimeUnit::Millisecond) => "tDm".to_string(),
468 ArrowDataType::Duration(TimeUnit::Microsecond) => "tDu".to_string(),
469 ArrowDataType::Duration(TimeUnit::Nanosecond) => "tDn".to_string(),
470 ArrowDataType::Interval(IntervalUnit::YearMonth) => "tiM".to_string(),
471 ArrowDataType::Interval(IntervalUnit::DayTime) => "tiD".to_string(),
472 ArrowDataType::Interval(IntervalUnit::MonthDayNano) => {
473 todo!("Spec for FFI for MonthDayNano still not defined.")
474 },
475 ArrowDataType::Timestamp(unit, tz) => {
476 let unit = match unit {
477 TimeUnit::Second => "s",
478 TimeUnit::Millisecond => "m",
479 TimeUnit::Microsecond => "u",
480 TimeUnit::Nanosecond => "n",
481 };
482 format!(
483 "ts{}:{}",
484 unit,
485 tz.as_ref().map(|x| x.as_str()).unwrap_or("")
486 )
487 },
488 ArrowDataType::Utf8View => "vu".to_string(),
489 ArrowDataType::BinaryView => "vz".to_string(),
490 ArrowDataType::Decimal(precision, scale) => format!("d:{precision},{scale}"),
491 ArrowDataType::Decimal256(precision, scale) => format!("d:{precision},{scale},256"),
492 ArrowDataType::List(_) => "+l".to_string(),
493 ArrowDataType::LargeList(_) => "+L".to_string(),
494 ArrowDataType::Struct(_) => "+s".to_string(),
495 ArrowDataType::FixedSizeBinary(size) => format!("w:{size}"),
496 ArrowDataType::FixedSizeList(_, size) => format!("+w:{size}"),
497 ArrowDataType::Union(u) => {
498 let sparsness = if u.mode.is_sparse() { 's' } else { 'd' };
499 let mut r = format!("+u{sparsness}:");
500 let ids = if let Some(ids) = &u.ids {
501 ids.iter()
502 .fold(String::new(), |a, b| a + b.to_string().as_str() + ",")
503 } else {
504 (0..u.fields.len()).fold(String::new(), |a, b| a + b.to_string().as_str() + ",")
505 };
506 let ids = &ids[..ids.len() - 1]; r.push_str(ids);
508 r
509 },
510 ArrowDataType::Map(_, _) => "+m".to_string(),
511 ArrowDataType::Dictionary(index, _, _) => to_format(&(*index).into()),
512 ArrowDataType::Extension(ext) => to_format(&ext.inner),
513 ArrowDataType::Unknown => unimplemented!(),
514 }
515}
516
517pub(super) fn get_child(dtype: &ArrowDataType, index: usize) -> PolarsResult<ArrowDataType> {
518 match (index, dtype) {
519 (0, ArrowDataType::List(field)) => Ok(field.dtype().clone()),
520 (0, ArrowDataType::FixedSizeList(field, _)) => Ok(field.dtype().clone()),
521 (0, ArrowDataType::LargeList(field)) => Ok(field.dtype().clone()),
522 (0, ArrowDataType::Map(field, _)) => Ok(field.dtype().clone()),
523 (index, ArrowDataType::Struct(fields)) => Ok(fields[index].dtype().clone()),
524 (index, ArrowDataType::Union(u)) => Ok(u.fields[index].dtype().clone()),
525 (index, ArrowDataType::Extension(ext)) => get_child(&ext.inner, index),
526 (child, dtype) => polars_bail!(ComputeError:
527 "Requested child {child} to type {dtype:?} that has no such child",
528 ),
529 }
530}
531
532fn metadata_to_bytes(metadata: &BTreeMap<PlSmallStr, PlSmallStr>) -> Vec<u8> {
533 let a = (metadata.len() as i32).to_ne_bytes().to_vec();
534 metadata.iter().fold(a, |mut acc, (key, value)| {
535 acc.extend((key.len() as i32).to_ne_bytes());
536 acc.extend(key.as_bytes());
537 acc.extend((value.len() as i32).to_ne_bytes());
538 acc.extend(value.as_bytes());
539 acc
540 })
541}
542
543unsafe fn read_ne_i32(ptr: *const u8) -> i32 {
544 let slice = std::slice::from_raw_parts(ptr, 4);
545 i32::from_ne_bytes(slice.try_into().unwrap())
546}
547
548unsafe fn read_bytes(ptr: *const u8, len: usize) -> &'static str {
549 let slice = std::slice::from_raw_parts(ptr, len);
550 simdutf8::basic::from_utf8(slice).unwrap()
551}
552
553unsafe fn metadata_from_bytes(data: *const ::std::os::raw::c_char) -> (Metadata, Extension) {
554 let mut data = data as *const u8; if data.is_null() {
556 return (Metadata::default(), None);
557 };
558 let len = read_ne_i32(data);
559 data = data.add(4);
560
561 let mut result = BTreeMap::new();
562 let mut extension_name = None;
563 let mut extension_metadata = None;
564 for _ in 0..len {
565 let key_len = read_ne_i32(data) as usize;
566 data = data.add(4);
567 let key = read_bytes(data, key_len);
568 data = data.add(key_len);
569 let value_len = read_ne_i32(data) as usize;
570 data = data.add(4);
571 let value = read_bytes(data, value_len);
572 data = data.add(value_len);
573 match key {
574 "ARROW:extension:name" => {
575 extension_name = Some(PlSmallStr::from_str(value));
576 },
577 "ARROW:extension:metadata" => {
578 extension_metadata = Some(PlSmallStr::from_str(value));
579 },
580 _ => {
581 result.insert(PlSmallStr::from_str(key), PlSmallStr::from_str(value));
582 },
583 };
584 }
585 let extension = extension_name.map(|name| (name, extension_metadata));
586 (result, extension)
587}
588
589#[cfg(test)]
590mod tests {
591 use super::*;
592
593 #[test]
594 fn test_all() {
595 let mut dts = vec![
596 ArrowDataType::Null,
597 ArrowDataType::Boolean,
598 ArrowDataType::UInt8,
599 ArrowDataType::UInt16,
600 ArrowDataType::UInt32,
601 ArrowDataType::UInt64,
602 ArrowDataType::Int8,
603 ArrowDataType::Int16,
604 ArrowDataType::Int32,
605 ArrowDataType::Int64,
606 ArrowDataType::Float32,
607 ArrowDataType::Float64,
608 ArrowDataType::Date32,
609 ArrowDataType::Date64,
610 ArrowDataType::Time32(TimeUnit::Second),
611 ArrowDataType::Time32(TimeUnit::Millisecond),
612 ArrowDataType::Time64(TimeUnit::Microsecond),
613 ArrowDataType::Time64(TimeUnit::Nanosecond),
614 ArrowDataType::Decimal(5, 5),
615 ArrowDataType::Utf8,
616 ArrowDataType::LargeUtf8,
617 ArrowDataType::Binary,
618 ArrowDataType::LargeBinary,
619 ArrowDataType::FixedSizeBinary(2),
620 ArrowDataType::List(Box::new(Field::new(
621 PlSmallStr::from_static("example"),
622 ArrowDataType::Boolean,
623 false,
624 ))),
625 ArrowDataType::FixedSizeList(
626 Box::new(Field::new(
627 PlSmallStr::from_static("example"),
628 ArrowDataType::Boolean,
629 false,
630 )),
631 2,
632 ),
633 ArrowDataType::LargeList(Box::new(Field::new(
634 PlSmallStr::from_static("example"),
635 ArrowDataType::Boolean,
636 false,
637 ))),
638 ArrowDataType::Struct(vec![
639 Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
640 Field::new(
641 PlSmallStr::from_static("b"),
642 ArrowDataType::List(Box::new(Field::new(
643 PlSmallStr::from_static("item"),
644 ArrowDataType::Int32,
645 true,
646 ))),
647 true,
648 ),
649 ]),
650 ArrowDataType::Map(
651 Box::new(Field::new(
652 PlSmallStr::from_static("a"),
653 ArrowDataType::Int64,
654 true,
655 )),
656 true,
657 ),
658 ArrowDataType::Union(Box::new(UnionType {
659 fields: vec![
660 Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
661 Field::new(
662 PlSmallStr::from_static("b"),
663 ArrowDataType::List(Box::new(Field::new(
664 PlSmallStr::from_static("item"),
665 ArrowDataType::Int32,
666 true,
667 ))),
668 true,
669 ),
670 ],
671 ids: Some(vec![1, 2]),
672 mode: UnionMode::Dense,
673 })),
674 ArrowDataType::Union(Box::new(UnionType {
675 fields: vec![
676 Field::new(PlSmallStr::from_static("a"), ArrowDataType::Int64, true),
677 Field::new(
678 PlSmallStr::from_static("b"),
679 ArrowDataType::List(Box::new(Field::new(
680 PlSmallStr::from_static("item"),
681 ArrowDataType::Int32,
682 true,
683 ))),
684 true,
685 ),
686 ],
687 ids: Some(vec![0, 1]),
688 mode: UnionMode::Sparse,
689 })),
690 ];
691 for time_unit in [
692 TimeUnit::Second,
693 TimeUnit::Millisecond,
694 TimeUnit::Microsecond,
695 TimeUnit::Nanosecond,
696 ] {
697 dts.push(ArrowDataType::Timestamp(time_unit, None));
698 dts.push(ArrowDataType::Timestamp(
699 time_unit,
700 Some(PlSmallStr::from_static("00:00")),
701 ));
702 dts.push(ArrowDataType::Duration(time_unit));
703 }
704 for interval_type in [
705 IntervalUnit::DayTime,
706 IntervalUnit::YearMonth,
707 ] {
709 dts.push(ArrowDataType::Interval(interval_type));
710 }
711
712 for expected in dts {
713 let field = Field::new(PlSmallStr::from_static("a"), expected.clone(), true);
714 let schema = ArrowSchema::new(&field);
715 let result = unsafe { super::to_dtype(&schema).unwrap() };
716 assert_eq!(result, expected);
717 }
718 }
719}