Skip to content

Commit

Permalink
Added command polars profile for profiling lazy dataframes (#13904)
Browse files Browse the repository at this point in the history
# Description
Introduce a new command `polars profile` for profiling lazy dataframes:

<img width="965" alt="Screenshot 2024-09-22 at 23 46 18"
src="https://github.com/user-attachments/assets/11402dd3-8256-43df-a986-64241c15354f">

# User-Facing Changes
- Introduces new command `polars profile`
  • Loading branch information
ayax79 authored Sep 23, 2024
1 parent 6f47990 commit 28a7461
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 0 deletions.
2 changes: 2 additions & 0 deletions crates/nu_plugin_polars/src/dataframe/command/core/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ mod cache;
mod columns;
mod fetch;
mod open;
mod profile;
mod save;
mod schema;
mod shape;
Expand All @@ -28,6 +29,7 @@ pub(crate) fn core_commands() -> Vec<Box<dyn PluginCommand<Plugin = PolarsPlugin
Box::new(cache::LazyCache),
Box::new(LazyFetch),
Box::new(OpenDataFrame),
Box::new(profile::ProfileDF),
Box::new(Summary),
Box::new(ShapeDF),
Box::new(SchemaCmd),
Expand Down
105 changes: 105 additions & 0 deletions crates/nu_plugin_polars/src/dataframe/command/core/profile.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
use nu_plugin::{EngineInterface, EvaluatedCall, PluginCommand};
use nu_protocol::{
record, Category, Example, LabeledError, PipelineData, ShellError, Signature, Type, Value,
};

use crate::{
values::{
cant_convert_err, CustomValueSupport, NuDataFrame, NuLazyFrame, PolarsPluginObject,
PolarsPluginType,
},
PolarsPlugin,
};

pub struct ProfileDF;

impl PluginCommand for ProfileDF {
type Plugin = PolarsPlugin;

fn name(&self) -> &str {
"polars profile"
}

fn signature(&self) -> Signature {
Signature::build(self.name())
.input_output_type(
Type::Custom("dataframe".into()),
Type::Custom("dataframe".into()),
)
.category(Category::Custom("dataframe".into()))
}

fn description(&self) -> &str {
"Profile a lazy dataframe. This will run the query and return a record containing the materialized DataFrame and a DataFrame that contains profiling information of each node that is executed.
The units of the timings are microseconds."
}

fn examples(&self) -> Vec<Example> {
vec![Example {
description: "Profile a lazy dataframe",
example: r#"[[a b]; [1 2] [1 4] [2 6] [2 4]]
| polars into-lazy
| polars group-by a
| polars agg [
(polars col b | polars min | polars as "b_min")
(polars col b | polars max | polars as "b_max")
(polars col b | polars sum | polars as "b_sum")
]
| polars profile
"#,
result: None,
}]
}

fn run(
&self,
plugin: &Self::Plugin,
engine: &EngineInterface,
call: &EvaluatedCall,
input: PipelineData,
) -> Result<PipelineData, LabeledError> {
let value = input.into_value(call.head)?;
match PolarsPluginObject::try_from_value(plugin, &value)? {
PolarsPluginObject::NuDataFrame(df) => command_lazy(plugin, engine, call, df.lazy()),
PolarsPluginObject::NuLazyFrame(lazy) => command_lazy(plugin, engine, call, lazy),
_ => Err(cant_convert_err(
&value,
&[PolarsPluginType::NuDataFrame, PolarsPluginType::NuLazyFrame],
)),
}
.map_err(LabeledError::from)
}
}

fn command_lazy(
plugin: &PolarsPlugin,
engine: &EngineInterface,
call: &EvaluatedCall,
lazy: NuLazyFrame,
) -> Result<PipelineData, ShellError> {
let (df, profiling_df) = lazy
.to_polars()
.profile()
.map_err(|e| ShellError::GenericError {
error: format!("Could not profile dataframe: {e}"),
msg: "".into(),
span: Some(call.head),
help: None,
inner: vec![],
})?;

let df = NuDataFrame::from(df).cache_and_to_value(plugin, engine, call.head)?;
let profiling_df =
NuDataFrame::from(profiling_df).cache_and_to_value(plugin, engine, call.head)?;

let result = Value::record(
record!(
"dataframe" => df,
"profiling" => profiling_df,
),
call.head,
);

Ok(PipelineData::Value(result, None))
}

0 comments on commit 28a7461

Please sign in to comment.