Skip to content

Commit

Permalink
Merge pull request #12 from noritada/feat/support-for-format-variants
Browse files Browse the repository at this point in the history
Support for some format variants

This PR adds support for some format variants.

Some of the systems that generate data in the WNI RU format generate data in variant formats,
which are slightly different from the format specified in the specifications referenced in the development of this library.
This PR adds support for following 3 cases:

- Allow a trailing comma in the schema description
- Allow an empty field name in the schema description when it is the only field in the schema
- Allow use of `<N>STR` instead of `<N>NSTR` in the schema description

Each of these cases can be optionally enabled using flags.

All of these options are enabled in the CLI and the web demo app.
  • Loading branch information
noritada authored Oct 22, 2024
2 parents 1b6325d + 3f75ea8 commit 0f70b87
Show file tree
Hide file tree
Showing 11 changed files with 198 additions and 51 deletions.
5 changes: 4 additions & 1 deletion cli/src/command/dump.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@ pub(crate) fn cli() -> Command {

pub(crate) async fn exec(args: &ArgMatches) -> Result<()> {
let fname = args.get_one::<String>("PATH_OR_URI").unwrap();
let options = DataReaderOptions::ENABLE_READING_BODY;
let options = DataReaderOptions::ALLOW_TRAILING_COMMA
| DataReaderOptions::ALLOW_EMPTY_FIELD_NAME
| DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR
| DataReaderOptions::ENABLE_READING_BODY;
let options = if args.get_flag("ignore-size") {
options.union(DataReaderOptions::IGNORE_DATA_SIZE_FIELD)
} else {
Expand Down
4 changes: 3 additions & 1 deletion cli/src/command/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ pub(crate) fn cli() -> Command {
pub(crate) async fn exec(args: &ArgMatches) -> Result<()> {
let fname = args.get_one::<String>("PATH_OR_URI").unwrap();
let n_bytes = args.get_one::<usize>("N").unwrap();
let options = DataReaderOptions::default();
let options = DataReaderOptions::ALLOW_TRAILING_COMMA
| DataReaderOptions::ALLOW_EMPTY_FIELD_NAME
| DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR;
let (_, header, _) = read_from_source(fname, Some(n_bytes), options).await?;

println!("{}", HeaderDisplay(&header));
Expand Down
4 changes: 3 additions & 1 deletion cli/src/command/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ pub(crate) fn cli() -> Command {
pub(crate) async fn exec(args: &ArgMatches) -> Result<()> {
let fname = args.get_one::<String>("PATH_OR_URI").unwrap();
let n_bytes = args.get_one::<usize>("N").unwrap();
let options = DataReaderOptions::default();
let options = DataReaderOptions::ALLOW_TRAILING_COMMA
| DataReaderOptions::ALLOW_EMPTY_FIELD_NAME
| DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR;
let (schema, _, _) = read_from_source(fname, Some(n_bytes), options).await?;

if args.get_flag("tree") {
Expand Down
5 changes: 3 additions & 2 deletions cli/src/visitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ fn prettify_special_field_name(name: &str) -> &str {

#[cfg(test)]
mod tests {
use rrr::Schema;
use rrr::{parse, DataReaderOptions};

use super::*;

Expand All @@ -205,7 +205,8 @@ mod tests {
#[test]
fn $name() {
let input = $input;
let schema = input.parse::<Schema>().unwrap();
let options = DataReaderOptions::default();
let schema = parse(input.as_bytes(), options).unwrap();
let actual = format!("{}", SchemaTreeDisplay(&schema.ast));
let actual = console::strip_ansi_codes(&actual);
let expected = $expected;
Expand Down
172 changes: 140 additions & 32 deletions src/ast.rs
Original file line number Diff line number Diff line change
@@ -1,32 +1,18 @@
use std::str::FromStr;
use crate::{param::ParamStack, DataReaderOptions};

use crate::param::ParamStack;
pub fn parse(bytes: &[u8], options: DataReaderOptions) -> Result<Schema, crate::Error> {
let parser = SchemaParser::new(bytes, options);
parser
.parse()
.map_err(|e| crate::Error::Schema(e, bytes.to_vec()))
}

#[derive(Debug, PartialEq, Eq)]
pub struct Schema {
pub ast: Ast,
pub params: ParamStack,
}

impl TryFrom<&[u8]> for Schema {
type Error = crate::Error;

fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
let parser = SchemaParser::new(bytes);
parser
.parse()
.map_err(|e| crate::Error::Schema(e, bytes.to_vec()))
}
}

impl FromStr for Schema {
type Err = crate::Error;

fn from_str(s: &str) -> Result<Self, Self::Err> {
<Self>::try_from(s.as_bytes())
}
}

#[derive(Debug, PartialEq, Eq)]
pub struct Ast {
pub kind: AstKind,
Expand Down Expand Up @@ -86,19 +72,35 @@ struct SchemaParser<'b> {
lexer: std::iter::Peekable<SchemaLexer<'b>>,
location: Location,
params: ParamStack,
options: DataReaderOptions,
}

impl<'b> SchemaParser<'b> {
fn new(input: &'b [u8]) -> Self {
fn new(input: &'b [u8], options: DataReaderOptions) -> Self {
Self {
lexer: SchemaLexer::new(input).peekable(),
location: Location(0, 0),
params: ParamStack::new(),
options,
}
}

fn parse(mut self) -> Result<Schema, SchemaParseError> {
let kind = self.parse_field_list()?;
let kind = if self
.options
.contains(DataReaderOptions::ALLOW_EMPTY_FIELD_NAME)
&& matches!(
self.lexer.peek(),
Some(Ok(Token {
kind: TokenKind::Colon,
..
}))
) {
self.parse_field_with_empty_name()?
} else {
self.parse_field_list()?
};

if let Some(result) = self.lexer.next() {
// should be TokenKind::RBracket
let token = result.unwrap();
Expand All @@ -116,6 +118,17 @@ impl<'b> SchemaParser<'b> {
Ok(schema)
}

fn parse_field_with_empty_name(&mut self) -> Result<AstKind, SchemaParseError> {
self.consume_symbol(TokenKind::Colon)?;

let kind = self.parse_type()?;
let name = String::new();
let member = Ast { kind, name };
let members = vec![member];
let kind = AstKind::Struct(members);
Ok(kind)
}

fn parse_field_list(&mut self) -> Result<AstKind, SchemaParseError> {
let mut members = Vec::new();

Expand Down Expand Up @@ -148,6 +161,20 @@ impl<'b> SchemaParser<'b> {
if self.next_token()?.kind != TokenKind::Comma {
return Err(self.err_unexpected_token());
}

if self
.options
.contains(DataReaderOptions::ALLOW_TRAILING_COMMA)
&& matches!(
self.lexer.peek(),
None | Some(Ok(Token {
kind: TokenKind::RBracket,
..
}))
)
{
break;
}
}

if members.is_empty() {
Expand Down Expand Up @@ -201,7 +228,12 @@ impl<'b> SchemaParser<'b> {
self.consume_symbol(TokenKind::RAngleBracket)?;

if let TokenKind::Ident(s) = self.next_token()?.kind {
if s.as_str() != "NSTR" {
if !(s.as_str() == "NSTR"
|| (self
.options
.contains(DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR)
&& s.as_str() == "STR"))
{
return Err(self.err_unexpected_token());
}
} else {
Expand Down Expand Up @@ -460,7 +492,7 @@ mod tests {
#[test]
fn parse_single_field() {
let input = "fld1:INT16";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand All @@ -480,7 +512,7 @@ mod tests {
#[test]
fn parse_single_struct() {
let input = "fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -513,7 +545,7 @@ mod tests {
#[test]
fn parse_nested_struct() {
let input = "fld1:[sfld1:[ssfld1:<4>NSTR,ssfld2:STR,ssfld3:INT32]]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -549,7 +581,7 @@ mod tests {
#[test]
fn parse_single_fixed_length_builtin_type_array() {
let input = "fld1:{3}INT8";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand All @@ -575,7 +607,7 @@ mod tests {
#[test]
fn parse_single_fixed_length_struct_array() {
let input = "fld1:{3}[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -614,7 +646,7 @@ mod tests {
#[test]
fn parse_single_variable_length_struct_array() {
let input = "fld1:INT8,fld2:{fld1}[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -662,7 +694,7 @@ mod tests {
#[test]
fn parse_single_unlimited_length_struct_array() {
let input = "fld1:+[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32]";
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected_ast = Ast {
name: "".to_owned(),
Expand Down Expand Up @@ -698,12 +730,88 @@ mod tests {
assert_eq!(actual, expected);
}

macro_rules! test_format_options_support {
($(($name:ident, $input:expr, $options:expr, $success_expected:expr),)*) => ($(
#[test]
fn $name() {
let input = $input;
let parser = SchemaParser::new(input.as_bytes(), $options);
let succeeded = parser.parse().is_ok();

assert_eq!(succeeded, $success_expected);
}
)*);
}

test_format_options_support! {
(
trailing_comma_not_allowed,
"fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,],",
DataReaderOptions::default(),
false
),
(
trailing_comma_allowed,
"fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,],",
DataReaderOptions::ALLOW_TRAILING_COMMA,
true
),
(
multiple_trailing_commas_not_allowed_even_when_trailing_comma_is_allowed,
"fld1:[sfld1:<4>NSTR,sfld2:STR,sfld3:INT32,,],,",
DataReaderOptions::ALLOW_TRAILING_COMMA,
false
),
(
double_commas_not_allowed_even_when_trailing_comma_is_allowed,
"fld1:[sfld1:<4>NSTR,sfld2:STR,,sfld3:INT32]",
DataReaderOptions::ALLOW_TRAILING_COMMA,
false
),
(
empty_field_name_not_allowed,
":+UINT8",
DataReaderOptions::default(),
false
),
(
empty_field_name_allowed,
":+UINT8",
DataReaderOptions::ALLOW_EMPTY_FIELD_NAME,
true
),
(
empty_field_name_not_allowed_when_there_are_other_fields,
":UINT8,fld1:INT8",
DataReaderOptions::ALLOW_EMPTY_FIELD_NAME,
false
),
(
empty_field_name_not_allowed_when_trailing_comma_exists,
":UINT8,",
DataReaderOptions::ALLOW_TRAILING_COMMA | DataReaderOptions::ALLOW_EMPTY_FIELD_NAME,
false
),
(
str_instead_of_nstr_not_allowed,
"fld1:<4>NSTR,fld2:<4>STR",
DataReaderOptions::default(),
false
),
(
str_instead_of_nstr_allowed,
"fld1:<4>NSTR,fld2:<4>STR",
DataReaderOptions::ALLOW_STR_INSTEAD_OF_NSTR,
true
),
}

macro_rules! test_parse_errors {
($(($name:ident, $input:expr, $kind:ident, $start:expr, $end:expr),)*) => ($(
#[test]
fn $name() {
let input = $input;
let parser = SchemaParser::new(input.as_bytes());
let parser = SchemaParser::new(input.as_bytes(), DataReaderOptions::default());
let actual = parser.parse();
let expected = Err(SchemaParseError {
kind: SchemaParseErrorKind::$kind,
Expand Down
10 changes: 6 additions & 4 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ mod walker;
use std::borrow::Cow;

pub use crate::{
ast::{Ast, AstKind, Len, Location, Schema, SchemaParseError, SchemaParseErrorKind},
ast::{parse, Ast, AstKind, Len, Location, Schema, SchemaParseError, SchemaParseErrorKind},
reader::{DataReader, DataReaderOptions},
utils::json_escape_str,
visitor::{AstVisitor, JsonDisplay, JsonFormattingStyle, SchemaOnelineDisplay},
Expand Down Expand Up @@ -100,21 +100,23 @@ mod tests {

use super::*;
use crate::{
ast::{Schema, Size},
ast::{parse, Schema, Size},
value::{Number, Value, ValueTree},
walker::BufWalker,
};

fn schema_without_str() -> Result<Schema, Error> {
let options = DataReaderOptions::default();
let ast = "date:[year:UINT16,month:UINT8,day:UINT8],\
data:{4}[loc:<4>NSTR,temp:INT16,rhum:UINT16],comment:<16>NSTR";
ast.parse()
parse(ast.as_bytes(), options)
}

fn schema_with_str() -> Result<Schema, Error> {
let options = DataReaderOptions::default();
let ast = "date:[year:UINT16,month:UINT8,day:UINT8],\
data:{4}[loc:STR,temp:INT16,rhum:UINT16],comment:<16>NSTR";
ast.parse()
parse(ast.as_bytes(), options)
}

#[test]
Expand Down
7 changes: 5 additions & 2 deletions src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ use std::{
use flate2::read::GzDecoder;
pub use options::DataReaderOptions;

use crate::{ast::Schema, Error};
use crate::{
ast::{parse, Schema},
Error,
};

mod options;

Expand Down Expand Up @@ -36,7 +39,7 @@ where
let map = self.read_header_fields()?;

let schema = map.get_required_field("format")?;
let schema: Schema = schema.as_slice().try_into()?;
let schema = parse(schema.as_slice(), self.options)?;

let body = if self
.options
Expand Down
Loading

0 comments on commit 0f70b87

Please sign in to comment.