polars_core/frame/
horizontal.rs1use polars_error::{polars_ensure, polars_err, PolarsResult};
2use polars_utils::aliases::PlHashSet;
3
4use super::Column;
5use crate::datatypes::AnyValue;
6use crate::frame::DataFrame;
7use crate::prelude::PlSmallStr;
8
9fn check_hstack(
10 col: &Column,
11 names: &mut PlHashSet<PlSmallStr>,
12 height: usize,
13 is_empty: bool,
14) -> PolarsResult<()> {
15 polars_ensure!(
16 col.len() == height || is_empty,
17 ShapeMismatch: "unable to hstack Series of length {} and DataFrame of height {}",
18 col.len(), height,
19 );
20 polars_ensure!(
21 names.insert(col.name().clone()),
22 Duplicate: "unable to hstack, column with name {:?} already exists", col.name().as_str(),
23 );
24 Ok(())
25}
26
27impl DataFrame {
28 pub unsafe fn hstack_mut_unchecked(&mut self, columns: &[Column]) -> &mut Self {
35 if let Some(fst) = columns.first() {
37 if self.width() == 0 {
38 unsafe { self.set_height(fst.len()) };
41 }
42 }
43
44 self.clear_schema();
45 self.columns.extend_from_slice(columns);
46 self
47 }
48
49 pub fn hstack_mut(&mut self, columns: &[Column]) -> PolarsResult<&mut Self> {
61 let mut names = self
62 .columns
63 .iter()
64 .map(|c| c.name().clone())
65 .collect::<PlHashSet<_>>();
66
67 let height = self.height();
68 let is_empty = self.is_empty();
69 for col in columns {
72 check_hstack(col, &mut names, height, is_empty)?;
73 }
74 drop(names);
75 Ok(unsafe { self.hstack_mut_unchecked(columns) })
76 }
77}
78pub fn concat_df_horizontal(dfs: &[DataFrame], check_duplicates: bool) -> PolarsResult<DataFrame> {
81 let output_height = dfs
82 .iter()
83 .map(|df| df.height())
84 .max()
85 .ok_or_else(|| polars_err!(ComputeError: "cannot concat empty dataframes"))?;
86
87 let owned_df;
88
89 let dfs = if !dfs.iter().all(|df| df.height() == output_height) {
91 owned_df = dfs
92 .iter()
93 .cloned()
94 .map(|mut df| {
95 if df.height() != output_height {
96 let diff = output_height - df.height();
97
98 unsafe { df.get_columns_mut() }.iter_mut().for_each(|c| {
101 *c = c.extend_constant(AnyValue::Null, diff).unwrap();
102 });
103 df.clear_schema();
104 unsafe {
105 df.set_height(output_height);
106 }
107 }
108 df
109 })
110 .collect::<Vec<_>>();
111 owned_df.as_slice()
112 } else {
113 dfs
114 };
115
116 let mut first_df = dfs[0].clone();
117 let height = first_df.height();
118 let is_empty = first_df.is_empty();
119
120 let mut names = if check_duplicates {
121 first_df
122 .columns
123 .iter()
124 .map(|s| s.name().clone())
125 .collect::<PlHashSet<_>>()
126 } else {
127 Default::default()
128 };
129
130 for df in &dfs[1..] {
131 let cols = df.get_columns();
132
133 if check_duplicates {
134 for col in cols {
135 check_hstack(col, &mut names, height, is_empty)?;
136 }
137 }
138
139 unsafe { first_df.hstack_mut_unchecked(cols) };
140 }
141 Ok(first_df)
142}