polars_core/frame/
horizontal.rs

1use polars_error::{polars_ensure, polars_err, PolarsResult};
2use polars_utils::aliases::PlHashSet;
3
4use super::Column;
5use crate::datatypes::AnyValue;
6use crate::frame::DataFrame;
7use crate::prelude::PlSmallStr;
8
9fn check_hstack(
10    col: &Column,
11    names: &mut PlHashSet<PlSmallStr>,
12    height: usize,
13    is_empty: bool,
14) -> PolarsResult<()> {
15    polars_ensure!(
16        col.len() == height || is_empty,
17        ShapeMismatch: "unable to hstack Series of length {} and DataFrame of height {}",
18        col.len(), height,
19    );
20    polars_ensure!(
21        names.insert(col.name().clone()),
22        Duplicate: "unable to hstack, column with name {:?} already exists", col.name().as_str(),
23    );
24    Ok(())
25}
26
27impl DataFrame {
28    /// Add columns horizontally.
29    ///
30    /// # Safety
31    /// The caller must ensure:
32    /// - the length of all [`Column`] is equal to the height of this [`DataFrame`]
33    /// - the columns names are unique
34    pub unsafe fn hstack_mut_unchecked(&mut self, columns: &[Column]) -> &mut Self {
35        // If we don't have any columns yet, copy the height from the given columns.
36        if let Some(fst) = columns.first() {
37            if self.width() == 0 {
38                // SAFETY: The functions invariants asks for all columns to be the same length so
39                // that makes that a valid height.
40                unsafe { self.set_height(fst.len()) };
41            }
42        }
43
44        self.clear_schema();
45        self.columns.extend_from_slice(columns);
46        self
47    }
48
49    /// Add multiple [`Column`] to a [`DataFrame`].
50    /// The added `Series` are required to have the same length.
51    ///
52    /// # Example
53    ///
54    /// ```rust
55    /// # use polars_core::prelude::*;
56    /// fn stack(df: &mut DataFrame, columns: &[Column]) {
57    ///     df.hstack_mut(columns);
58    /// }
59    /// ```
60    pub fn hstack_mut(&mut self, columns: &[Column]) -> PolarsResult<&mut Self> {
61        let mut names = self
62            .columns
63            .iter()
64            .map(|c| c.name().clone())
65            .collect::<PlHashSet<_>>();
66
67        let height = self.height();
68        let is_empty = self.is_empty();
69        // first loop check validity. We don't do this in a single pass otherwise
70        // this DataFrame is already modified when an error occurs.
71        for col in columns {
72            check_hstack(col, &mut names, height, is_empty)?;
73        }
74        drop(names);
75        Ok(unsafe { self.hstack_mut_unchecked(columns) })
76    }
77}
78/// Concat [`DataFrame`]s horizontally.
79/// Concat horizontally and extend with null values if lengths don't match
80pub fn concat_df_horizontal(dfs: &[DataFrame], check_duplicates: bool) -> PolarsResult<DataFrame> {
81    let output_height = dfs
82        .iter()
83        .map(|df| df.height())
84        .max()
85        .ok_or_else(|| polars_err!(ComputeError: "cannot concat empty dataframes"))?;
86
87    let owned_df;
88
89    // if not all equal length, extend the DataFrame with nulls
90    let dfs = if !dfs.iter().all(|df| df.height() == output_height) {
91        owned_df = dfs
92            .iter()
93            .cloned()
94            .map(|mut df| {
95                if df.height() != output_height {
96                    let diff = output_height - df.height();
97
98                    // SAFETY: We extend each column with nulls to the point of being of length
99                    // `output_height`. Then, we set the height of the resulting dataframe.
100                    unsafe { df.get_columns_mut() }.iter_mut().for_each(|c| {
101                        *c = c.extend_constant(AnyValue::Null, diff).unwrap();
102                    });
103                    df.clear_schema();
104                    unsafe {
105                        df.set_height(output_height);
106                    }
107                }
108                df
109            })
110            .collect::<Vec<_>>();
111        owned_df.as_slice()
112    } else {
113        dfs
114    };
115
116    let mut first_df = dfs[0].clone();
117    let height = first_df.height();
118    let is_empty = first_df.is_empty();
119
120    let mut names = if check_duplicates {
121        first_df
122            .columns
123            .iter()
124            .map(|s| s.name().clone())
125            .collect::<PlHashSet<_>>()
126    } else {
127        Default::default()
128    };
129
130    for df in &dfs[1..] {
131        let cols = df.get_columns();
132
133        if check_duplicates {
134            for col in cols {
135                check_hstack(col, &mut names, height, is_empty)?;
136            }
137        }
138
139        unsafe { first_df.hstack_mut_unchecked(cols) };
140    }
141    Ok(first_df)
142}