Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Output detailed lineage graph #4533

Merged
merged 20 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions prqlc/bindings/prqlc-python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ features = ["pyo3/extension-module"]

# The module is named `prqlc` rather than `prqlc-python`.
module-name = "prqlc"
python-source = "python"

[project.optional-dependencies]
dev = [
Expand Down
8 changes: 8 additions & 0 deletions prqlc/bindings/prqlc-python/python/prqlc/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# ruff: noqa: F403, F405
#
# This is the default module init provided automatically by Maturin.
from .prqlc import *

__doc__ = prqlc.__doc__
if hasattr(prqlc, "__all__"):
__all__ = prqlc.__all__
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(FYI this file seems a bit inelegant, but am guessing as a result of the python bindings. We could add a TODO to clean up if we're not confident we need this indirection)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When I added the debug submodule to the python bindings, I needed to update the type declarations as well... which led me towards needing to use Maturin's mixed Rust/Python project layout. I then just copied Maturin's default __init__.py (that they provide when you have a pure Rust project) into that directory.

I am sure we could be more precise about the module init here, but given that it was working fine before (plus the type stubs are there, which are more descriptive anyway) I decided this was good for now. The TODO makes sense especially if more Python-native code will be added later.

2 changes: 2 additions & 0 deletions prqlc/bindings/prqlc-python/python/prqlc/debug.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
def prql_lineage(prql_query: str) -> str: ...
def pl_to_lineage(pl_json: str) -> str: ...
Empty file.
9 changes: 9 additions & 0 deletions prqlc/bindings/prqlc-python/python/tests/test_all.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import json

import prqlc


Expand Down Expand Up @@ -80,3 +82,10 @@ def test_compile_options() -> None:
assert res.startswith(
"SELECT\n *\nFROM\n a\nORDER BY\n (\n SELECT\n NULL\n ) OFFSET 0 ROWS\nFETCH FIRST\n 3 ROWS ONLY"
)


def test_debug_functions() -> None:
prql_query = "from invoices | select { id, customer_id }"

lineage = json.loads(prqlc.debug.prql_lineage(prql_query))
assert lineage.keys() == {"frames", "nodes", "ast"}
44 changes: 44 additions & 0 deletions prqlc/bindings/prqlc-python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,45 @@
.map_err(|err| (PyErr::new::<exceptions::PyValueError, _>(err.to_json())))
}

mod debug {
use super::*;

#[pyfunction]

Check warning on line 56 in prqlc/bindings/prqlc-python/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/bindings/prqlc-python/src/lib.rs#L56

Added line #L56 was not covered by tests
pub fn prql_lineage(prql_query: &str) -> PyResult<String> {
kgutwin marked this conversation as resolved.
Show resolved Hide resolved
prqlc_lib::prql_to_pl(prql_query)
.and_then(prqlc_lib::debug::pl_to_lineage)
.and_then(|x| prqlc_lib::debug::json::from_lineage(&x))
.map_err(|err| (PyErr::new::<exceptions::PyValueError, _>(err.to_json())))
}

#[pyfunction]

Check warning on line 64 in prqlc/bindings/prqlc-python/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/bindings/prqlc-python/src/lib.rs#L64

Added line #L64 was not covered by tests
pub fn pl_to_lineage(pl_json: &str) -> PyResult<String> {
prqlc_lib::json::to_pl(pl_json)
.and_then(prqlc_lib::debug::pl_to_lineage)
.and_then(|x| prqlc_lib::debug::json::from_lineage(&x))
.map_err(|err| (PyErr::new::<exceptions::PyValueError, _>(err.to_json())))
}
}

#[pymodule]
fn prqlc(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compile, m)?)?;
m.add_function(wrap_pyfunction!(prql_to_pl, m)?)?;
m.add_function(wrap_pyfunction!(pl_to_rq, m)?)?;
m.add_function(wrap_pyfunction!(rq_to_sql, m)?)?;
m.add_function(wrap_pyfunction!(get_targets, m)?)?;

m.add_class::<CompileOptions>()?;
// From https://github.com/PyO3/maturin/issues/100
m.add("__version__", env!("CARGO_PKG_VERSION"))?;

// add debug submodule
let debug_module = PyModule::new(_py, "debug")?;
debug_module.add_function(wrap_pyfunction!(debug::prql_lineage, debug_module)?)?;
debug_module.add_function(wrap_pyfunction!(debug::pl_to_lineage, debug_module)?)?;

Check warning on line 88 in prqlc/bindings/prqlc-python/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/bindings/prqlc-python/src/lib.rs#L86-L88

Added lines #L86 - L88 were not covered by tests

m.add_submodule(debug_module)?;

Check warning on line 90 in prqlc/bindings/prqlc-python/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/bindings/prqlc-python/src/lib.rs#L90

Added line #L90 was not covered by tests

Ok(())
}

Expand Down Expand Up @@ -183,4 +211,20 @@
id IN (1, 2, 3)
"###);
}

#[test]
fn debug_prql_lineage() {
assert_snapshot!(
debug::prql_lineage(r#"from a"#).unwrap(),
@r#"{"frames":[],"nodes":[{"id":115,"kind":"Ident","span":"1:0-6","ident":{"Ident":["default_db","a"]}}],"ast":{"name":"Project","stmts":[{"VarDef":{"kind":"Main","name":"main","value":{"FuncCall":{"name":{"Ident":"from"},"args":[{"Ident":"a"}]}}},"span":"1:0-6"}]}}"#
);
}

#[test]
fn debug_pl_to_lineage() {
assert_snapshot!(
prql_to_pl(r#"from a"#).and_then(|x| debug::pl_to_lineage(&x)).unwrap(),
@r#"{"frames":[],"nodes":[{"id":115,"kind":"Ident","ident":{"Ident":["default_db","a"]}}],"ast":{"name":"Project","stmts":[{"VarDef":{"kind":"Main","name":"main","value":{"FuncCall":{"name":{"Ident":"from"},"args":[{"Ident":"a"}]}}},"span":"1:0-6"}]}}"#
);
}
}
58 changes: 56 additions & 2 deletions prqlc/prqlc/src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
use clio::Output;
use is_terminal::IsTerminal;
use itertools::Itertools;
use prqlc::debug::pl_to_lineage;
use prqlc::semantic;
use prqlc::semantic::reporting::{collect_frames, label_references};
use prqlc::semantic::NS_DEFAULT_DB;
Expand Down Expand Up @@ -173,7 +174,7 @@

/// Commands for meant for debugging, prone to change
#[derive(Subcommand, Debug, Clone)]
pub enum DebugCommand {
enum DebugCommand {
/// Parse & and expand into PL, but don't resolve
ExpandPL(IoArgs),

Expand All @@ -189,6 +190,48 @@
/// Parse, resolve & combine source with comments annotating relation type
Annotate(IoArgs),

/// Output column-level lineage graph
///
/// The returned data includes:
///
/// * "frames": a list of Span and Lineage records corresponding to each
/// transformation frame in the main pipeline.
///
/// * "nodes": a list of expression graph nodes.
///
/// * "ast": the parsed PL abstract syntax tree.
///
/// Each expression node has attributes:
///
/// * "id": A unique ID for each expression.
///
/// * "kind": Descriptive text about the expression type.
///
/// * "span": Position of the expression in the original source (optional).
///
/// * "alias": When this expression is part of a Tuple, this is its alias
/// (optional).
///
/// * "ident": When this expression is an Ident, this is its reference
/// (optional).
///
/// * "targets": Any upstream sources of data for this expression, as a list
/// of node IDs (optional).
///
/// * "children": A list of expression IDs contained within this expression
/// (optional).
///
/// * "parent": The expression ID that contains this expression (optional).
///
/// A Python script for rendering this output as a GraphViz visualization is
/// available at https://gist.github.com/kgutwin/efe5f03df5ff930d899249018a0a551b.
kgutwin marked this conversation as resolved.
Show resolved Hide resolved
Lineage {
#[command(flatten)]
io_args: IoArgs,
#[arg(value_enum, long, default_value = "yaml")]
format: Format,

Check warning on line 232 in prqlc/prqlc/src/cli/mod.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc/src/cli/mod.rs#L232

Added line #L232 was not covered by tests
},

/// Print info about the AST data structure
Ast,
}
Expand Down Expand Up @@ -406,14 +449,23 @@
let ctx = semantic::resolve(root_mod, Default::default())?;

let frames = if let Ok((main, _)) = ctx.find_main_rel(&[]) {
collect_frames(*main.clone().into_relation_var().unwrap())
collect_frames(*main.clone().into_relation_var().unwrap()).frames
} else {
vec![]
};

// combine with source
combine_prql_and_frames(&source, frames).as_bytes().to_vec()
}
Command::Debug(DebugCommand::Lineage { format, .. }) => {
let stmts = prql_to_pl_tree(sources)?;
let fc = pl_to_lineage(stmts)?;

match format {
Format::Json => serde_json::to_string_pretty(&fc)?.into_bytes(),

Check warning on line 465 in prqlc/prqlc/src/cli/mod.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc/src/cli/mod.rs#L465

Added line #L465 was not covered by tests
Format::Yaml => serde_yaml::to_string(&fc)?.into_bytes(),
}
}
Command::Debug(DebugCommand::Eval(_)) => {
let root_mod = prql_to_pl_tree(sources)?;

Expand Down Expand Up @@ -508,6 +560,7 @@
DebugCommand::Resolve(io_args)
| DebugCommand::ExpandPL(io_args)
| DebugCommand::Annotate(io_args)
| DebugCommand::Lineage { io_args, .. }
| DebugCommand::Eval(io_args),
) => io_args,
Experimental(ExperimentalCommand::GenerateDocs(io_args)) => io_args,
Expand Down Expand Up @@ -551,6 +604,7 @@
DebugCommand::Resolve(io_args)
| DebugCommand::ExpandPL(io_args)
| DebugCommand::Annotate(io_args)
| DebugCommand::Lineage { io_args, .. }
| DebugCommand::Eval(io_args),
) => io_args.output.clone(),
Experimental(ExperimentalCommand::GenerateDocs(io_args)) => io_args.output.clone(),
Expand Down
36 changes: 36 additions & 0 deletions prqlc/prqlc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,42 @@
}
}

/// Debugging and unstable API functions
pub mod debug {
use super::*;

/// Create column-level lineage graph
pub fn pl_to_lineage(
pl: ast::ModuleDef,
) -> Result<semantic::reporting::FrameCollector, ErrorMessages> {
let ast = Some(pl.clone());

let root_module = semantic::resolve(pl, Default::default()).map_err(ErrorMessages::from)?;

let (main, _) = root_module.find_main_rel(&[]).unwrap();
let mut fc =
semantic::reporting::collect_frames(*main.clone().into_relation_var().unwrap());
fc.ast = ast;

Ok(fc)
}

pub mod json {
use super::*;

/// JSON serialization of FrameCollector lineage
pub fn from_lineage(
kgutwin marked this conversation as resolved.
Show resolved Hide resolved
fc: &semantic::reporting::FrameCollector,
) -> Result<String, ErrorMessages> {
serde_json::to_string(fc).map_err(convert_json_err)
}

fn convert_json_err(err: serde_json::Error) -> ErrorMessages {
ErrorMessages::from(Error::new_simple(err.to_string()))
}

Check warning on line 483 in prqlc/prqlc/src/lib.rs

View check run for this annotation

Codecov / codecov/patch

prqlc/prqlc/src/lib.rs#L481-L483

Added lines #L481 - L483 were not covered by tests
}
}

#[cfg(test)]
mod tests {
use std::str::FromStr;
Expand Down
Loading
Loading