Skip to content

Commit

Permalink
feat: Add prqlc lex command to the CLI (#4467)
Browse files Browse the repository at this point in the history
  • Loading branch information
max-sixty authored May 12, 2024
1 parent 625fe36 commit db24427
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 36 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Initial implementation of an experimental documentation generator that
generates Markdown documentation from `.prql` files. (@vanillajonathan,
#4152).
- Add `prqlc lex` command to the CLI (@max-sixty)

**Fixes**:

Expand Down
7 changes: 3 additions & 4 deletions prqlc/prqlc-parser/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@ mod types;
use chumsky::error::SimpleReason;
use chumsky::{prelude::*, Stream};

use prqlc_ast::error::Reason;
use prqlc_ast::error::{Error, WithErrorInfo};
use prqlc_ast::error::{Error, Reason, WithErrorInfo};
use prqlc_ast::stmt::*;
use prqlc_ast::Span;

use lexer::TokenKind;
use lexer::{Token, TokenVec};
use lexer::Token;
pub use lexer::{TokenKind, TokenVec};
use span::ParserSpan;

/// Build PRQL AST from a PRQL query string.
Expand Down
69 changes: 63 additions & 6 deletions prqlc/prqlc/src/cli/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ use std::path::Path;
use std::process::exit;
use std::str::FromStr;

use prqlc::ast;
use prqlc::semantic;
use prqlc::semantic::reporting::{collect_frames, label_references};
use prqlc::semantic::NS_DEFAULT_DB;
use prqlc::{ast, prql_to_tokens};
use prqlc::{ir::pl::Lineage, ir::Span};
use prqlc::{pl_to_prql, pl_to_rq_tree, prql_to_pl, prql_to_pl_tree, rq_to_sql, SourceTree};
use prqlc::{Options, Target};
Expand Down Expand Up @@ -79,6 +79,14 @@ enum Command {
format: Format,
},

/// Lex into Tokens
Lex {
#[command(flatten)]
io_args: IoArgs,
#[arg(value_enum, long, default_value = "yaml")]
format: Format,
},

/// Parse & generate PRQL code back
#[command(name = "fmt")]
Format {
Expand Down Expand Up @@ -288,6 +296,17 @@ impl Command {
Format::Yaml => serde_yaml::to_string(&ast)?.into_bytes(),
}
}
Command::Lex { format, .. } => {
let s = sources.sources.values().exactly_one().or_else(|_| {
// TODO: allow multiple sources
bail!("Currently `lex` only works with a single source, but found multiple sources")
})?;
let tokens = prql_to_tokens(s)?;
match format {
Format::Json => serde_json::to_string_pretty(&tokens)?.into_bytes(),
Format::Yaml => serde_yaml::to_string(&tokens)?.into_bytes(),
}
}
Command::Collect(_) => {
let mut root_module_def = prql_to_pl_tree(sources)?;

Expand Down Expand Up @@ -429,7 +448,7 @@ impl Command {
}
}

_ => unreachable!(),
_ => unreachable!("Other commands shouldn't reach `execute`"),
})
}

Expand All @@ -438,11 +457,10 @@ impl Command {
// `input`, rather than matching on them and grabbing `input` from
// `self`? But possibly if everything moves to `io_args`, then this is
// quite reasonable?
use Command::{
Collect, Debug, Experimental, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
};
use Command::*;
let io_args = match self {
Parse { io_args, .. }
| Lex { io_args, .. }
| Collect(io_args)
| Resolve { io_args, .. }
| SQLCompile { io_args, .. }
Expand Down Expand Up @@ -481,10 +499,11 @@ impl Command {

fn write_output(&mut self, data: &[u8]) -> std::io::Result<()> {
use Command::{
Collect, Debug, Experimental, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
Collect, Debug, Experimental, Lex, Parse, Resolve, SQLAnchor, SQLCompile, SQLPreprocess,
};
let mut output = match self {
Parse { io_args, .. }
| Lex { io_args, .. }
| Collect(io_args)
| Resolve { io_args, .. }
| SQLCompile { io_args, .. }
Expand Down Expand Up @@ -815,4 +834,42 @@ sort full
column: 2
"###);
}

#[test]
fn lex() {
let output = Command::execute(
&Command::Lex {
io_args: IoArgs::default(),
format: Format::Yaml,
},
&mut "from x | select y".into(),
"",
)
.unwrap();

// TODO: terser output; maybe serialize span as `0..4`? Remove the
// `!Ident` complication?
assert_snapshot!(String::from_utf8(output).unwrap().trim(), @r###"
- kind: !Ident from
span:
start: 0
end: 4
- kind: !Ident x
span:
start: 5
end: 6
- kind: !Control '|'
span:
start: 7
end: 8
- kind: !Ident select
span:
start: 9
end: 15
- kind: !Ident y
span:
start: 16
end: 17
"###);
}
}
6 changes: 6 additions & 0 deletions prqlc/prqlc/src/error_message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ impl From<Error> for ErrorMessage {
}
}

impl From<Vec<ErrorMessage>> for ErrorMessages {
fn from(errors: Vec<ErrorMessage>) -> Self {
ErrorMessages { inner: errors }
}
}

#[derive(Debug, Clone, Serialize)]
pub struct ErrorMessages {
pub inner: Vec<ErrorMessage>,
Expand Down
11 changes: 11 additions & 0 deletions prqlc/prqlc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ pub static COMPILER_VERSION: Lazy<Version> =
Lazy::new(|| Version::parse(env!("CARGO_PKG_VERSION")).expect("Invalid prqlc version number"));

use once_cell::sync::Lazy;
use prqlc_parser::TokenVec;
use semver::Version;
use serde::{Deserialize, Serialize};
use std::{collections::HashMap, path::PathBuf, str::FromStr};
Expand Down Expand Up @@ -306,6 +307,16 @@ pub enum DisplayOptions {
#[cfg(doctest)]
pub struct ReadmeDoctests;

/// Lex PRQL source into tokens.
pub fn prql_to_tokens(prql: &str) -> Result<TokenVec, ErrorMessages> {
prqlc_parser::lex_source(prql).map_err(|e| {
e.into_iter()
.map(|e| e.into())
.collect::<Vec<ErrorMessage>>()
.into()
})
}

/// Parse PRQL into a PL AST
// TODO: rename this to `prql_to_pl_simple`
pub fn prql_to_pl(prql: &str) -> Result<ast::ModuleDef, ErrorMessages> {
Expand Down
47 changes: 47 additions & 0 deletions prqlc/prqlc/tests/integration/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ fn help() {
Commands:
parse Parse into PL AST
lex Lex into Tokens
fmt Parse & generate PRQL code back
collect Parse the whole project and collect it into a single PRQL source file
debug Commands for meant for debugging, prone to change
Expand Down Expand Up @@ -471,3 +472,49 @@ fn compile_no_prql_files() {
"###);
}

#[test]
fn lex() {
assert_cmd_snapshot!(prqlc_command().args(["lex"]).pass_stdin("from tracks"), @r###"
success: true
exit_code: 0
----- stdout -----
- kind: !Ident from
span:
start: 0
end: 4
- kind: !Ident tracks
span:
start: 5
end: 11
----- stderr -----
"###);

assert_cmd_snapshot!(prqlc_command().args(["lex", "--format=json"]).pass_stdin("from tracks"), @r###"
success: true
exit_code: 0
----- stdout -----
[
{
"kind": {
"Ident": "from"
},
"span": {
"start": 0,
"end": 4
}
},
{
"kind": {
"Ident": "tracks"
},
"span": {
"start": 5,
"end": 11
}
}
]
----- stderr -----
"###);
}
Loading

0 comments on commit db24427

Please sign in to comment.