polars_arrow/legacy/kernels/
sort_partition.rs1use std::fmt::Debug;
2
3use polars_utils::IdxSize;
4
5use crate::types::NativeType;
6
7fn find_partition_points<T>(values: &[T], n: usize, descending: bool) -> Vec<usize>
9where
10 T: Debug + NativeType,
11{
12 let len = values.len();
13 if n > len {
14 return find_partition_points(values, len / 2, descending);
15 }
16 if n < 2 {
17 return vec![];
18 }
19 let chunk_size = len / n;
20
21 let mut partition_points = Vec::with_capacity(n + 1);
22
23 let mut start_idx = 0;
24 loop {
25 let end_idx = start_idx + chunk_size;
26 if end_idx >= len {
27 break;
28 }
29 let part = &values[start_idx..end_idx];
32
33 let latest_val = values[end_idx];
34 let idx = if descending {
35 part.partition_point(|v| v.tot_gt(&latest_val))
36 } else {
37 part.partition_point(|v| v.tot_lt(&latest_val))
38 };
39
40 if idx != 0 {
41 partition_points.push(idx + start_idx)
42 }
43
44 start_idx += chunk_size;
45 }
46 partition_points
47}
48
49pub fn create_clean_partitions<T>(values: &[T], n: usize, descending: bool) -> Vec<&[T]>
50where
51 T: Debug + NativeType,
52{
53 let part_idx = find_partition_points(values, n, descending);
54 let mut out = Vec::with_capacity(n + 1);
55
56 let mut start_idx = 0_usize;
57 for end_idx in part_idx {
58 if end_idx != start_idx {
59 out.push(&values[start_idx..end_idx]);
60 start_idx = end_idx;
61 }
62 }
63 let latest = &values[start_idx..];
64 if !latest.is_empty() {
65 out.push(latest)
66 }
67
68 out
69}
70
71pub fn partition_to_groups_amortized<T>(
72 values: &[T],
73 first_group_offset: IdxSize,
74 nulls_first: bool,
75 offset: IdxSize,
76 out: &mut Vec<[IdxSize; 2]>,
77) where
78 T: Debug + NativeType,
79{
80 if let Some(mut first) = values.first() {
81 out.clear();
82 if nulls_first && first_group_offset > 0 {
83 out.push([0, first_group_offset])
84 }
85
86 let mut first_idx = if nulls_first { first_group_offset } else { 0 } + offset;
87
88 for val in values {
89 if val.tot_ne(first) {
91 let val_ptr = val as *const T;
92 let first_ptr = first as *const T;
93
94 let len = unsafe { val_ptr.offset_from(first_ptr) } as IdxSize;
97 out.push([first_idx, len]);
98 first_idx += len;
99 first = val;
100 }
101 }
102 if nulls_first {
104 out.push([
105 first_idx,
106 values.len() as IdxSize + first_group_offset - first_idx,
107 ]);
108 } else {
109 out.push([first_idx, values.len() as IdxSize - (first_idx - offset)]);
110 }
111
112 if !nulls_first && first_group_offset > 0 {
113 out.push([values.len() as IdxSize + offset, first_group_offset])
114 }
115 }
116}
117
118pub fn partition_to_groups<T>(
123 values: &[T],
124 first_group_offset: IdxSize,
125 nulls_first: bool,
126 offset: IdxSize,
127) -> Vec<[IdxSize; 2]>
128where
129 T: Debug + NativeType,
130{
131 if values.is_empty() {
132 return vec![];
133 }
134 let mut out = Vec::with_capacity(values.len() / 10);
135 partition_to_groups_amortized(values, first_group_offset, nulls_first, offset, &mut out);
136 out
137}
138
139#[cfg(test)]
140mod test {
141 use super::*;
142
143 #[test]
144 fn test_partition_points() {
145 let values = &[1, 3, 3, 3, 3, 5, 5, 5, 9, 9, 10];
146
147 assert_eq!(find_partition_points(values, 4, false), &[1, 5, 8, 10]);
148 assert_eq!(
149 partition_to_groups(values, 0, true, 0),
150 &[[0, 1], [1, 4], [5, 3], [8, 2], [10, 1]]
151 );
152 assert_eq!(
153 partition_to_groups(values, 5, true, 0),
154 &[[0, 5], [5, 1], [6, 4], [10, 3], [13, 2], [15, 1]]
155 );
156 assert_eq!(
157 partition_to_groups(values, 5, false, 0),
158 &[[0, 1], [1, 4], [5, 3], [8, 2], [10, 1], [11, 5]]
159 );
160 }
161}