polars_arrow/compute/aggregate/
memory.rs1use crate::array::*;
2use crate::bitmap::Bitmap;
3use crate::datatypes::PhysicalType;
4pub use crate::types::PrimitiveType;
5use crate::{match_integer_type, with_match_primitive_type_full};
6fn validity_size(validity: Option<&Bitmap>) -> usize {
7 validity.as_ref().map(|b| b.as_slice().0.len()).unwrap_or(0)
8}
9
10macro_rules! dyn_binary {
11 ($array:expr, $ty:ty, $o:ty) => {{
12 let array = $array.as_any().downcast_ref::<$ty>().unwrap();
13 let offsets = array.offsets().buffer();
14
15 let values_start = offsets[0] as usize;
18 let values_end = offsets[offsets.len() - 1] as usize;
19
20 values_end - values_start
21 + offsets.len() * size_of::<$o>()
22 + validity_size(array.validity())
23 }};
24}
25
26fn binview_size<T: ViewType + ?Sized>(array: &BinaryViewArrayGeneric<T>) -> usize {
27 array.total_bytes_len()
30}
31
32pub fn estimated_bytes_size(array: &dyn Array) -> usize {
44 use PhysicalType::*;
45 match array.dtype().to_physical_type() {
46 Null => 0,
47 Boolean => {
48 let array = array.as_any().downcast_ref::<BooleanArray>().unwrap();
49 array.values().as_slice().0.len() + validity_size(array.validity())
50 },
51 Primitive(PrimitiveType::DaysMs) => {
52 let array = array.as_any().downcast_ref::<DaysMsArray>().unwrap();
53 array.values().len() * size_of::<i32>() * 2 + validity_size(array.validity())
54 },
55 Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
56 let array = array
57 .as_any()
58 .downcast_ref::<PrimitiveArray<$T>>()
59 .unwrap();
60
61 array.values().len() * size_of::<$T>() + validity_size(array.validity())
62 }),
63 Binary => dyn_binary!(array, BinaryArray<i32>, i32),
64 FixedSizeBinary => {
65 let array = array
66 .as_any()
67 .downcast_ref::<FixedSizeBinaryArray>()
68 .unwrap();
69 array.values().len() + validity_size(array.validity())
70 },
71 LargeBinary => dyn_binary!(array, BinaryArray<i64>, i64),
72 Utf8 => dyn_binary!(array, Utf8Array<i32>, i32),
73 LargeUtf8 => dyn_binary!(array, Utf8Array<i64>, i64),
74 List => {
75 let array = array.as_any().downcast_ref::<ListArray<i32>>().unwrap();
76 estimated_bytes_size(array.values().as_ref())
77 + array.offsets().len_proxy() * size_of::<i32>()
78 + validity_size(array.validity())
79 },
80 FixedSizeList => {
81 let array = array.as_any().downcast_ref::<FixedSizeListArray>().unwrap();
82 estimated_bytes_size(array.values().as_ref()) + validity_size(array.validity())
83 },
84 LargeList => {
85 let array = array.as_any().downcast_ref::<ListArray<i64>>().unwrap();
86 estimated_bytes_size(array.values().as_ref())
87 + array.offsets().len_proxy() * size_of::<i64>()
88 + validity_size(array.validity())
89 },
90 Struct => {
91 let array = array.as_any().downcast_ref::<StructArray>().unwrap();
92 array
93 .values()
94 .iter()
95 .map(|x| x.as_ref())
96 .map(estimated_bytes_size)
97 .sum::<usize>()
98 + validity_size(array.validity())
99 },
100 Union => {
101 let array = array.as_any().downcast_ref::<UnionArray>().unwrap();
102 let types = array.types().len() * size_of::<i8>();
103 let offsets = array
104 .offsets()
105 .as_ref()
106 .map(|x| x.len() * size_of::<i32>())
107 .unwrap_or_default();
108 let fields = array
109 .fields()
110 .iter()
111 .map(|x| x.as_ref())
112 .map(estimated_bytes_size)
113 .sum::<usize>();
114 types + offsets + fields
115 },
116 Dictionary(key_type) => match_integer_type!(key_type, |$T| {
117 let array = array
118 .as_any()
119 .downcast_ref::<DictionaryArray<$T>>()
120 .unwrap();
121 estimated_bytes_size(array.keys()) + estimated_bytes_size(array.values().as_ref())
122 }),
123 Utf8View => binview_size::<str>(array.as_any().downcast_ref().unwrap()),
124 BinaryView => binview_size::<[u8]>(array.as_any().downcast_ref().unwrap()),
125 Map => {
126 let array = array.as_any().downcast_ref::<MapArray>().unwrap();
127 let offsets = array.offsets().len_proxy() * size_of::<i32>();
128 offsets + estimated_bytes_size(array.field().as_ref()) + validity_size(array.validity())
129 },
130 }
131}