diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index e5e50fcaaa00..c772031f01da 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -1,4 +1,3 @@ -use arrow::legacy::kernels::concatenate::concatenate_owned_unchecked; use arrow::offset::OffsetsBuffer; use rayon::prelude::*; use smartstring::alias::String as SmartString; @@ -6,7 +5,6 @@ use smartstring::alias::String as SmartString; use crate::chunked_array::ops::explode::offsets_to_indexes; use crate::prelude::*; use crate::series::IsSorted; -use crate::utils::try_get_supertype; use crate::POOL; fn get_exploded(series: &Series) -> PolarsResult<(Series, OffsetsBuffer)> { @@ -185,175 +183,6 @@ impl DataFrame { let columns = self.select_series(columns)?; self.explode_impl(columns) } - - /// - /// Unpivot a `DataFrame` from wide to long format. - /// - /// # Example - /// - /// # Arguments - /// - /// * `on` - String slice that represent the columns to use as value variables. - /// * `index` - String slice that represent the columns to use as id variables. - /// - /// If `on` is empty all columns that are not in `index` will be used. - /// - /// ```ignore - /// # use polars_core::prelude::*; - /// let df = df!("A" => &["a", "b", "a"], - /// "B" => &[1, 3, 5], - /// "C" => &[10, 11, 12], - /// "D" => &[2, 4, 6] - /// )?; - /// - /// let unpivoted = df.unpivot(&["A", "B"], &["C", "D"])?; - /// println!("{:?}", df); - /// println!("{:?}", unpivoted); - /// # Ok::<(), PolarsError>(()) - /// ``` - /// Outputs: - /// ```text - /// +-----+-----+-----+-----+ - /// | A | B | C | D | - /// | --- | --- | --- | --- | - /// | str | i32 | i32 | i32 | - /// +=====+=====+=====+=====+ - /// | "a" | 1 | 10 | 2 | - /// +-----+-----+-----+-----+ - /// | "b" | 3 | 11 | 4 | - /// +-----+-----+-----+-----+ - /// | "a" | 5 | 12 | 6 | - /// +-----+-----+-----+-----+ - /// - /// +-----+-----+----------+-------+ - /// | A | B | variable | value | - /// | --- | --- | --- | --- | - /// | str | i32 | str | i32 | - /// +=====+=====+==========+=======+ - /// | "a" | 1 | "C" | 10 | - /// +-----+-----+----------+-------+ - /// | "b" | 3 | "C" | 11 | - /// +-----+-----+----------+-------+ - /// | "a" | 5 | "C" | 12 | - /// +-----+-----+----------+-------+ - /// | "a" | 1 | "D" | 2 | - /// +-----+-----+----------+-------+ - /// | "b" | 3 | "D" | 4 | - /// +-----+-----+----------+-------+ - /// | "a" | 5 | "D" | 6 | - /// +-----+-----+----------+-------+ - /// ``` - pub fn unpivot(&self, on: I, index: J) -> PolarsResult - where - I: IntoVec, - J: IntoVec, - { - let index = index.into_vec(); - let on = on.into_vec(); - self.unpivot2(UnpivotArgsIR { - on, - index, - ..Default::default() - }) - } - - /// Similar to unpivot, but without generics. This may be easier if you want to pass - /// an empty `index` or empty `on`. - pub fn unpivot2(&self, args: UnpivotArgsIR) -> PolarsResult { - let index = args.index; - let mut on = args.on; - - let variable_name = args.variable_name.as_deref().unwrap_or("variable"); - let value_name = args.value_name.as_deref().unwrap_or("value"); - - let len = self.height(); - - // if value vars is empty we take all columns that are not in id_vars. - if on.is_empty() { - // return empty frame if there are no columns available to use as value vars - if index.len() == self.width() { - let variable_col = Series::new_empty(variable_name, &DataType::String); - let value_col = Series::new_empty(variable_name, &DataType::Null); - - let mut out = self.select(index).unwrap().clear().columns; - out.push(variable_col); - out.push(value_col); - - return Ok(unsafe { DataFrame::new_no_checks(out) }); - } - - let index_set = PlHashSet::from_iter(index.iter().map(|s| s.as_str())); - on = self - .get_columns() - .iter() - .filter_map(|s| { - if index_set.contains(s.name()) { - None - } else { - Some(s.name().into()) - } - }) - .collect(); - } - - // values will all be placed in single column, so we must find their supertype - let schema = self.schema(); - let mut iter = on - .iter() - .map(|v| schema.get(v).ok_or_else(|| polars_err!(col_not_found = v))); - let mut st = iter.next().unwrap()?.clone(); - for dt in iter { - st = try_get_supertype(&st, dt?)?; - } - - // The column name of the variable that is unpivoted - let mut variable_col = MutablePlString::with_capacity(len * on.len() + 1); - // prepare ids - let ids_ = self.select_with_schema_unchecked(index, &schema)?; - let mut ids = ids_.clone(); - if ids.width() > 0 { - for _ in 0..on.len() - 1 { - ids.vstack_mut_unchecked(&ids_) - } - } - ids.as_single_chunk_par(); - drop(ids_); - - let mut values = Vec::with_capacity(on.len()); - - for value_column_name in &on { - variable_col.extend_constant(len, Some(value_column_name.as_str())); - // ensure we go via the schema so we are O(1) - // self.column() is linear - // together with this loop that would make it O^2 over `on` - let (pos, _name, _dtype) = schema.try_get_full(value_column_name)?; - let col = &self.columns[pos]; - let value_col = col.cast(&st).map_err( - |_| polars_err!(InvalidOperation: "'unpivot' not supported for dtype: {}", col.dtype()), - )?; - values.extend_from_slice(value_col.chunks()) - } - let values_arr = concatenate_owned_unchecked(&values)?; - // SAFETY: - // The give dtype is correct - let values = - unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) }; - - let variable_col = variable_col.as_box(); - // SAFETY: - // The given dtype is correct - let variables = unsafe { - Series::from_chunks_and_dtype_unchecked( - variable_name, - vec![variable_col], - &DataType::String, - ) - }; - - ids.hstack_mut(&[variables, values])?; - - Ok(ids) - } } #[cfg(test)] @@ -432,55 +261,4 @@ mod test { Ok(()) } - - #[test] - #[cfg_attr(miri, ignore)] - fn test_unpivot() -> PolarsResult<()> { - let df = df!("A" => &["a", "b", "a"], - "B" => &[1, 3, 5], - "C" => &[10, 11, 12], - "D" => &[2, 4, 6] - ) - .unwrap(); - - let unpivoted = df.unpivot(["C", "D"], ["A", "B"])?; - assert_eq!( - Vec::from(unpivoted.column("value")?.i32()?), - &[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)] - ); - - let args = UnpivotArgsIR { - on: vec![], - index: vec![], - ..Default::default() - }; - - let unpivoted = df.unpivot2(args).unwrap(); - let value = unpivoted.column("value")?; - // String because of supertype - let value = value.str()?; - let value = value.into_no_null_iter().collect::>(); - assert_eq!( - value, - &["a", "b", "a", "1", "3", "5", "10", "11", "12", "2", "4", "6"] - ); - - let args = UnpivotArgsIR { - on: vec![], - index: vec!["A".into()], - ..Default::default() - }; - - let unpivoted = df.unpivot2(args).unwrap(); - let value = unpivoted.column("value")?; - let value = value.i32()?; - let value = value.into_no_null_iter().collect::>(); - assert_eq!(value, &[1, 3, 5, 10, 11, 12, 2, 4, 6]); - let variable = unpivoted.column("variable")?; - let variable = variable.str()?; - let variable = variable.into_no_null_iter().collect::>(); - assert_eq!(variable, &["B", "B", "B", "C", "C", "C", "D", "D", "D"]); - assert!(unpivoted.column("A").is_ok()); - Ok(()) - } } diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index 45e110ba7fa1..eb893c71f320 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -575,6 +575,11 @@ impl DataFrame { &mut self.columns } + /// Take ownership of the underlying columns vec. + pub fn take_columns(self) -> Vec { + self.columns + } + /// Iterator over the columns as [`Series`]. /// /// # Example @@ -926,8 +931,13 @@ impl DataFrame { Ok(self) } - /// Does not check if schema is correct - pub(crate) fn vstack_mut_unchecked(&mut self, other: &DataFrame) { + /// Concatenate a [`DataFrame`] to this [`DataFrame`] + /// + /// If many `vstack` operations are done, it is recommended to call [`DataFrame::align_chunks`]. + /// + /// # Panics + /// Panics if the schema's don't match. + pub fn vstack_mut_unchecked(&mut self, other: &DataFrame) { self.columns .iter_mut() .zip(other.columns.iter()) diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index 91477915c381..e784049cd78a 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -217,7 +217,7 @@ arg_where = ["polars-plan/arg_where"] search_sorted = ["polars-plan/search_sorted"] merge_sorted = ["polars-plan/merge_sorted"] meta = ["polars-plan/meta"] -pivot = ["polars-core/rows", "polars-ops/pivot"] +pivot = ["polars-core/rows", "polars-ops/pivot", "polars-plan/pivot"] top_k = ["polars-plan/top_k"] semi_anti_join = ["polars-plan/semi_anti_join"] cse = ["polars-plan/cse", "polars-mem-engine/cse"] diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index ecd9aa842263..9f81d07a97d3 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -1652,6 +1652,7 @@ impl LazyFrame { /// Unpivot the DataFrame from wide to long format. /// /// See [`UnpivotArgsIR`] for information on how to unpivot a DataFrame. + #[cfg(feature = "pivot")] pub fn unpivot(self, args: UnpivotArgsDSL) -> LazyFrame { let opt_state = self.get_opt_state(); let lp = self.get_plan_builder().unpivot(args).build(); diff --git a/crates/polars-lazy/src/tests/queries.rs b/crates/polars-lazy/src/tests/queries.rs index 872c4c66e465..fe777499812d 100644 --- a/crates/polars-lazy/src/tests/queries.rs +++ b/crates/polars-lazy/src/tests/queries.rs @@ -46,6 +46,7 @@ fn test_lazy_alias() { } #[test] +#[cfg(feature = "pivot")] fn test_lazy_unpivot() { let df = get_df(); diff --git a/crates/polars-ops/src/frame/pivot/mod.rs b/crates/polars-ops/src/frame/pivot/mod.rs index 7fea3564532e..8fedd2f1860b 100644 --- a/crates/polars-ops/src/frame/pivot/mod.rs +++ b/crates/polars-ops/src/frame/pivot/mod.rs @@ -1,4 +1,5 @@ mod positioning; +mod unpivot; use std::borrow::Cow; @@ -7,6 +8,7 @@ use polars_core::frame::group_by::expr::PhysicalAggExpr; use polars_core::prelude::*; use polars_core::utils::_split_offsets; use polars_core::{downcast_as_macro_arg_physical, POOL}; +pub use unpivot::UnpivotDF; const HASHMAP_INIT_SIZE: usize = 512; diff --git a/crates/polars-ops/src/frame/pivot/unpivot.rs b/crates/polars-ops/src/frame/pivot/unpivot.rs new file mode 100644 index 000000000000..3b45b1986fa5 --- /dev/null +++ b/crates/polars-ops/src/frame/pivot/unpivot.rs @@ -0,0 +1,248 @@ +use arrow::array::{MutableArray, MutablePlString}; +use arrow::legacy::kernels::concatenate::concatenate_owned_unchecked; +use polars_core::datatypes::{DataType, SmartString}; +use polars_core::frame::DataFrame; +use polars_core::prelude::{IntoVec, Series, UnpivotArgsIR}; +use polars_core::utils::try_get_supertype; +use polars_error::{polars_err, PolarsResult}; +use polars_utils::aliases::PlHashSet; + +use crate::frame::IntoDf; + +pub trait UnpivotDF: IntoDf { + /// Unpivot a `DataFrame` from wide to long format. + /// + /// # Example + /// + /// # Arguments + /// + /// * `on` - String slice that represent the columns to use as value variables. + /// * `index` - String slice that represent the columns to use as id variables. + /// + /// If `on` is empty all columns that are not in `index` will be used. + /// + /// ```ignore + /// # use polars_core::prelude::*; + /// let df = df!("A" => &["a", "b", "a"], + /// "B" => &[1, 3, 5], + /// "C" => &[10, 11, 12], + /// "D" => &[2, 4, 6] + /// )?; + /// + /// let unpivoted = df.unpivot(&["A", "B"], &["C", "D"])?; + /// println!("{:?}", df); + /// println!("{:?}", unpivoted); + /// # Ok::<(), PolarsError>(()) + /// ``` + /// Outputs: + /// ```text + /// +-----+-----+-----+-----+ + /// | A | B | C | D | + /// | --- | --- | --- | --- | + /// | str | i32 | i32 | i32 | + /// +=====+=====+=====+=====+ + /// | "a" | 1 | 10 | 2 | + /// +-----+-----+-----+-----+ + /// | "b" | 3 | 11 | 4 | + /// +-----+-----+-----+-----+ + /// | "a" | 5 | 12 | 6 | + /// +-----+-----+-----+-----+ + /// + /// +-----+-----+----------+-------+ + /// | A | B | variable | value | + /// | --- | --- | --- | --- | + /// | str | i32 | str | i32 | + /// +=====+=====+==========+=======+ + /// | "a" | 1 | "C" | 10 | + /// +-----+-----+----------+-------+ + /// | "b" | 3 | "C" | 11 | + /// +-----+-----+----------+-------+ + /// | "a" | 5 | "C" | 12 | + /// +-----+-----+----------+-------+ + /// | "a" | 1 | "D" | 2 | + /// +-----+-----+----------+-------+ + /// | "b" | 3 | "D" | 4 | + /// +-----+-----+----------+-------+ + /// | "a" | 5 | "D" | 6 | + /// +-----+-----+----------+-------+ + /// ``` + fn unpivot(&self, on: I, index: J) -> PolarsResult + where + I: IntoVec, + J: IntoVec, + { + let index = index.into_vec(); + let on = on.into_vec(); + self.unpivot2(UnpivotArgsIR { + on, + index, + ..Default::default() + }) + } + + /// Similar to unpivot, but without generics. This may be easier if you want to pass + /// an empty `index` or empty `on`. + fn unpivot2(&self, args: UnpivotArgsIR) -> PolarsResult { + let self_ = self.to_df(); + let index = args.index; + let mut on = args.on; + + let variable_name = args.variable_name.as_deref().unwrap_or("variable"); + let value_name = args.value_name.as_deref().unwrap_or("value"); + + if self_.get_columns().is_empty() { + return DataFrame::new(vec![ + Series::new_empty(variable_name, &DataType::String), + Series::new_empty(value_name, &DataType::Null), + ]); + } + + let len = self_.height(); + + // if value vars is empty we take all columns that are not in id_vars. + if on.is_empty() { + // return empty frame if there are no columns available to use as value vars + if index.len() == self_.width() { + let variable_col = Series::new_empty(variable_name, &DataType::String); + let value_col = Series::new_empty(variable_name, &DataType::Null); + + let mut out = self_.select(index).unwrap().clear().take_columns(); + out.push(variable_col); + out.push(value_col); + + return Ok(unsafe { DataFrame::new_no_checks(out) }); + } + + let index_set = PlHashSet::from_iter(index.iter().map(|s| s.as_str())); + on = self_ + .get_columns() + .iter() + .filter_map(|s| { + if index_set.contains(s.name()) { + None + } else { + Some(s.name().into()) + } + }) + .collect(); + } + + // values will all be placed in single column, so we must find their supertype + let schema = self_.schema(); + let mut iter = on + .iter() + .map(|v| schema.get(v).ok_or_else(|| polars_err!(col_not_found = v))); + let mut st = iter.next().unwrap()?.clone(); + for dt in iter { + st = try_get_supertype(&st, dt?)?; + } + + // The column name of the variable that is unpivoted + let mut variable_col = MutablePlString::with_capacity(len * on.len() + 1); + // prepare ids + let ids_ = self_.select_with_schema_unchecked(index, &schema)?; + let mut ids = ids_.clone(); + if ids.width() > 0 { + for _ in 0..on.len() - 1 { + ids.vstack_mut_unchecked(&ids_) + } + } + ids.as_single_chunk_par(); + drop(ids_); + + let mut values = Vec::with_capacity(on.len()); + let columns = self_.get_columns(); + + for value_column_name in &on { + variable_col.extend_constant(len, Some(value_column_name.as_str())); + // ensure we go via the schema so we are O(1) + // self.column() is linear + // together with this loop that would make it O^2 over `on` + let (pos, _name, _dtype) = schema.try_get_full(value_column_name)?; + let col = &columns[pos]; + let value_col = col.cast(&st).map_err( + |_| polars_err!(InvalidOperation: "'unpivot' not supported for dtype: {}", col.dtype()), + )?; + values.extend_from_slice(value_col.chunks()) + } + let values_arr = concatenate_owned_unchecked(&values)?; + // SAFETY: + // The give dtype is correct + let values = + unsafe { Series::from_chunks_and_dtype_unchecked(value_name, vec![values_arr], &st) }; + + let variable_col = variable_col.as_box(); + // SAFETY: + // The given dtype is correct + let variables = unsafe { + Series::from_chunks_and_dtype_unchecked( + variable_name, + vec![variable_col], + &DataType::String, + ) + }; + + ids.hstack_mut(&[variables, values])?; + + Ok(ids) + } +} + +impl UnpivotDF for DataFrame {} + +#[cfg(test)] +mod test { + use polars_core::df; + + use super::*; + + #[test] + fn test_unpivot() -> PolarsResult<()> { + let df = df!("A" => &["a", "b", "a"], + "B" => &[1, 3, 5], + "C" => &[10, 11, 12], + "D" => &[2, 4, 6] + ) + .unwrap(); + + let unpivoted = df.unpivot(["C", "D"], ["A", "B"])?; + assert_eq!( + Vec::from(unpivoted.column("value")?.i32()?), + &[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)] + ); + + let args = UnpivotArgsIR { + on: vec![], + index: vec![], + ..Default::default() + }; + + let unpivoted = df.unpivot2(args).unwrap(); + let value = unpivoted.column("value")?; + // String because of supertype + let value = value.str()?; + let value = value.into_no_null_iter().collect::>(); + assert_eq!( + value, + &["a", "b", "a", "1", "3", "5", "10", "11", "12", "2", "4", "6"] + ); + + let args = UnpivotArgsIR { + on: vec![], + index: vec!["A".into()], + ..Default::default() + }; + + let unpivoted = df.unpivot2(args).unwrap(); + let value = unpivoted.column("value")?; + let value = value.i32()?; + let value = value.into_no_null_iter().collect::>(); + assert_eq!(value, &[1, 3, 5, 10, 11, 12, 2, 4, 6]); + let variable = unpivoted.column("variable")?; + let variable = variable.str()?; + let variable = variable.into_no_null_iter().collect::>(); + assert_eq!(variable, &["B", "B", "B", "C", "C", "C", "D", "D", "D"]); + assert!(unpivoted.column("A").is_ok()); + Ok(()) + } +} diff --git a/crates/polars-ops/src/prelude.rs b/crates/polars-ops/src/prelude.rs index 1f0717945b49..2353afaefbc8 100644 --- a/crates/polars-ops/src/prelude.rs +++ b/crates/polars-ops/src/prelude.rs @@ -5,5 +5,7 @@ pub use crate::chunked_array::*; #[cfg(feature = "merge_sorted")] pub use crate::frame::_merge_sorted_dfs; pub use crate::frame::join::*; +#[cfg(feature = "pivot")] +pub use crate::frame::pivot::UnpivotDF; pub use crate::frame::{DataFrameJoinOps, DataFrameOps}; pub use crate::series::*; diff --git a/crates/polars-plan/src/plans/builder_dsl.rs b/crates/polars-plan/src/plans/builder_dsl.rs index b016e0a90160..82fa483643d7 100644 --- a/crates/polars-plan/src/plans/builder_dsl.rs +++ b/crates/polars-plan/src/plans/builder_dsl.rs @@ -354,6 +354,7 @@ impl DslBuilder { .into() } + #[cfg(feature = "pivot")] pub fn unpivot(self, args: UnpivotArgsDSL) -> Self { DslPlan::MapFunction { input: Arc::new(self.0), diff --git a/crates/polars-plan/src/plans/builder_ir.rs b/crates/polars-plan/src/plans/builder_ir.rs index 3907584140ea..f1f3b8089e2c 100644 --- a/crates/polars-plan/src/plans/builder_ir.rs +++ b/crates/polars-plan/src/plans/builder_ir.rs @@ -297,6 +297,7 @@ impl<'a> IRBuilder<'a> { self.add_alp(lp) } + #[cfg(feature = "pivot")] pub fn unpivot(self, args: Arc) -> Self { let lp = IR::MapFunction { input: self.root, diff --git a/crates/polars-plan/src/plans/functions/dsl.rs b/crates/polars-plan/src/plans/functions/dsl.rs index 2dd94d7f0f45..76c7dc9d3211 100644 --- a/crates/polars-plan/src/plans/functions/dsl.rs +++ b/crates/polars-plan/src/plans/functions/dsl.rs @@ -30,6 +30,7 @@ pub enum DslFunction { Explode { columns: Vec, }, + #[cfg(feature = "pivot")] Unpivot { args: UnpivotArgsDSL, }, @@ -100,6 +101,7 @@ impl DslFunction { schema: Default::default(), } }, + #[cfg(feature = "pivot")] DslFunction::Unpivot { args } => { let on = expand_selectors(args.on, input_schema, &[])?; let index = expand_selectors(args.index, input_schema, &[])?; diff --git a/crates/polars-plan/src/plans/functions/mod.rs b/crates/polars-plan/src/plans/functions/mod.rs index acf47b88dccc..fb3edbe12bd3 100644 --- a/crates/polars-plan/src/plans/functions/mod.rs +++ b/crates/polars-plan/src/plans/functions/mod.rs @@ -77,6 +77,7 @@ pub enum FunctionIR { columns: Arc<[ColumnName]>, schema: CachedSchema, }, + #[cfg(feature = "pivot")] Unpivot { args: Arc, schema: CachedSchema, @@ -112,6 +113,7 @@ impl PartialEq for FunctionIR { }, ) => existing_l == existing_r && new_l == new_r, (Explode { columns: l, .. }, Explode { columns: r, .. }) => l == r, + #[cfg(feature = "pivot")] (Unpivot { args: l, .. }, Unpivot { args: r, .. }) => l == r, (RowIndex { name: l, .. }, RowIndex { name: r, .. }) => l == r, #[cfg(feature = "merge_sorted")] @@ -152,6 +154,7 @@ impl Hash for FunctionIR { new.hash(state); }, FunctionIR::Explode { columns, schema: _ } => columns.hash(state), + #[cfg(feature = "pivot")] FunctionIR::Unpivot { args, schema: _ } => args.hash(state), FunctionIR::RowIndex { name, @@ -174,6 +177,7 @@ impl FunctionIR { #[cfg(feature = "merge_sorted")] MergeSorted { .. } => false, FastCount { .. } | Unnest { .. } | Rename { .. } | Explode { .. } => true, + #[cfg(feature = "pivot")] Unpivot { .. } => true, Opaque { streamable, .. } => *streamable, #[cfg(feature = "python")] @@ -188,7 +192,9 @@ impl FunctionIR { match self { #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, - Explode { .. } | Unpivot { .. } => true, + #[cfg(feature = "pivot")] + Unpivot { .. } => true, + Explode { .. } => true, _ => false, } } @@ -199,7 +205,9 @@ impl FunctionIR { Opaque { predicate_pd, .. } => *predicate_pd, #[cfg(feature = "python")] OpaquePython(OpaquePythonUdf { predicate_pd, .. }) => *predicate_pd, - Rechunk | Unnest { .. } | Rename { .. } | Explode { .. } | Unpivot { .. } => true, + #[cfg(feature = "pivot")] + Unpivot { .. } => true, + Rechunk | Unnest { .. } | Rename { .. } | Explode { .. } => true, #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, RowIndex { .. } | FastCount { .. } => false, @@ -213,12 +221,9 @@ impl FunctionIR { Opaque { projection_pd, .. } => *projection_pd, #[cfg(feature = "python")] OpaquePython(OpaquePythonUdf { projection_pd, .. }) => *projection_pd, - Rechunk - | FastCount { .. } - | Unnest { .. } - | Rename { .. } - | Explode { .. } - | Unpivot { .. } => true, + Rechunk | FastCount { .. } | Unnest { .. } | Rename { .. } | Explode { .. } => true, + #[cfg(feature = "pivot")] + Unpivot { .. } => true, #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, RowIndex { .. } => true, @@ -282,7 +287,9 @@ impl FunctionIR { }, Rename { existing, new, .. } => rename::rename_impl(df, existing, new), Explode { columns, .. } => df.explode(columns.as_ref()), + #[cfg(feature = "pivot")] Unpivot { args, .. } => { + use polars_ops::pivot::UnpivotDF; let args = (**args).clone(); df.unpivot2(args) }, diff --git a/crates/polars-plan/src/plans/functions/schema.rs b/crates/polars-plan/src/plans/functions/schema.rs index 14de166cfcbf..58ae0a43609a 100644 --- a/crates/polars-plan/src/plans/functions/schema.rs +++ b/crates/polars-plan/src/plans/functions/schema.rs @@ -1,3 +1,4 @@ +#[cfg(feature = "pivot")] use polars_core::utils::try_get_supertype; use super::*; @@ -8,10 +9,12 @@ impl FunctionIR { // We will likely add more branches later #[allow(clippy::single_match)] match self { - RowIndex { schema, .. } - | Explode { schema, .. } - | Rename { schema, .. } - | Unpivot { schema, .. } => { + #[cfg(feature = "pivot")] + Unpivot { schema, .. } => { + let mut guard = schema.lock().unwrap(); + *guard = None; + }, + RowIndex { schema, .. } | Explode { schema, .. } | Rename { schema, .. } => { let mut guard = schema.lock().unwrap(); *guard = None; }, @@ -98,6 +101,7 @@ impl FunctionIR { Ok(Cow::Owned(row_index_schema(schema, input_schema, name))) }, Explode { schema, columns } => explode_schema(schema, input_schema, columns), + #[cfg(feature = "pivot")] Unpivot { schema, args } => unpivot_schema(args, schema, input_schema), } } @@ -143,6 +147,7 @@ fn explode_schema<'a>( Ok(Cow::Owned(schema)) } +#[cfg(feature = "pivot")] fn unpivot_schema<'a>( args: &UnpivotArgsIR, cached_schema: &CachedSchema, diff --git a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs index f68ec98d459e..a8b922b6d726 100644 --- a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs @@ -535,6 +535,7 @@ impl<'a> PredicatePushDown<'a> { expr_arena, )) }, + #[cfg(feature = "pivot")] FunctionIR::Unpivot { args, .. } => { let variable_name = args.variable_name.as_deref().unwrap_or("variable"); let value_name = args.value_name.as_deref().unwrap_or("value"); diff --git a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs index f92fc2f06fb9..08c0ddf15bd6 100644 --- a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs @@ -1,5 +1,7 @@ +#[cfg(feature = "pivot")] mod unpivot; +#[cfg(feature = "pivot")] use unpivot::process_unpivot; use super::*; @@ -64,6 +66,7 @@ pub(super) fn process_functions( .explode(columns.clone()) .build()) }, + #[cfg(feature = "pivot")] Unpivot { ref args, .. } => { let lp = IR::MapFunction { input, diff --git a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs index 47f4a24a8399..33edd4b6ed8f 100644 --- a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs +++ b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs @@ -386,7 +386,6 @@ impl SlicePushDown { | m @ (DataFrameScan {..}, _) | m @ (Sort {..}, _) | m @ (MapFunction {function: FunctionIR::Explode {..}, ..}, _) - | m @ (MapFunction {function: FunctionIR::Unpivot {..}, ..}, _) | m @ (Cache {..}, _) | m @ (Distinct {..}, _) | m @ (GroupBy{..},_) @@ -395,7 +394,12 @@ impl SlicePushDown { => { let (lp, state) = m; self.no_pushdown_restart_opt(lp, state, lp_arena, expr_arena) - } + }, + #[cfg(feature = "pivot")] + m @ (MapFunction {function: FunctionIR::Unpivot {..}, ..}, _) => { + let (lp, state) = m; + self.no_pushdown_restart_opt(lp, state, lp_arena, expr_arena) + }, // [Pushdown] (MapFunction {input, function}, _) if function.allow_predicate_pd() => { let lp = MapFunction {input, function}; diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index 97bb99b279cf..dc1e5c952371 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -190,7 +190,7 @@ moment = ["polars-ops/moment", "polars-lazy?/moment"] partition_by = ["polars-core/partition_by"] pct_change = ["polars-ops/pct_change", "polars-lazy?/pct_change"] peaks = ["polars-lazy/peaks"] -pivot = ["polars-lazy?/pivot"] +pivot = ["polars-lazy?/pivot", "polars-ops/pivot", "dtype-struct", "rows"] product = ["polars-core/product"] propagate_nans = ["polars-lazy?/propagate_nans"] range = ["polars-lazy?/range"] diff --git a/docs/src/rust/Cargo.toml b/docs/src/rust/Cargo.toml index c1897560eb59..c99561340b12 100644 --- a/docs/src/rust/Cargo.toml +++ b/docs/src/rust/Cargo.toml @@ -134,6 +134,7 @@ required-features = ["polars/lazy", "polars/asof_join"] [[bin]] name = "user-guide-transformations-unpivot" path = "user-guide/transformations/unpivot.rs" +required-features = ["polars/pivot"] [[bin]] name = "user-guide-transformations-pivot" path = "user-guide/transformations/pivot.rs" diff --git a/py-polars/src/dataframe/general.rs b/py-polars/src/dataframe/general.rs index 9b40ca2f86dc..9ee1d7d2b591 100644 --- a/py-polars/src/dataframe/general.rs +++ b/py-polars/src/dataframe/general.rs @@ -372,6 +372,7 @@ impl PyDataFrame { PyDataFrame::new(self.df.clone()) } + #[cfg(feature = "pivot")] pub fn unpivot( &self, on: Vec, @@ -379,6 +380,7 @@ impl PyDataFrame { value_name: Option<&str>, variable_name: Option<&str>, ) -> PyResult { + use polars_ops::pivot::UnpivotDF; let args = UnpivotArgsIR { on: strings_to_smartstrings(on), index: strings_to_smartstrings(index), diff --git a/py-polars/tests/unit/operations/test_unpivot.py b/py-polars/tests/unit/operations/test_unpivot.py index a4155da56874..7b51d91122dc 100644 --- a/py-polars/tests/unit/operations/test_unpivot.py +++ b/py-polars/tests/unit/operations/test_unpivot.py @@ -92,3 +92,9 @@ def test_unpivot_raise_list() -> None: pl.LazyFrame( {"a": ["x", "y"], "b": [["test", "test2"], ["test3", "test4"]]} ).unpivot().collect() + + +def test_unpivot_empty_18170() -> None: + assert pl.DataFrame().unpivot().schema == pl.Schema( + {"variable": pl.String(), "value": pl.Null()} + )