diff --git a/lib/src/compiler/emit.rs b/lib/src/compiler/emit.rs index 44f65bbd6..3bab70f35 100644 --- a/lib/src/compiler/emit.rs +++ b/lib/src/compiler/emit.rs @@ -22,7 +22,7 @@ use walrus::{FunctionId, InstrSeqBuilder, ValType}; use crate::compiler::ir::{ Expr, ForIn, ForOf, Iterable, MatchAnchor, Of, OfItems, PatternIdx, - Quantifier, + Quantifier, With, }; use crate::compiler::{ LiteralId, PatternId, RegexpId, RuleId, RuleInfo, Var, VarStackFrame, @@ -635,6 +635,10 @@ fn emit_expr( } }, + Expr::With(with) => { + emit_with(ctx, instr, with); + } + Expr::FuncCall(fn_call) => { // Emit the arguments first. for expr in fn_call.args.iter_mut() { @@ -2134,6 +2138,31 @@ fn emit_for( }); } +/// Emits the code for a `with` statement. +/// +/// Each `with` statement has a corresponding = pair. +/// Each pair is stored in the `identifiers` and `expressions` fields of the +/// `with` statement. +/// For each pair, the code emitted by this function sets the variable +/// corresponding to the identifier to the value of the emmited expression. +/// Those variables are later used in the condition of the `with` statement. +fn emit_with( + ctx: &mut EmitContext, + instr: &mut InstrSeqBuilder, + with: &mut With, +) { + // Emit the code that sets the variables in the `with` statement. + for (id, expr) in with.declarations.iter_mut() { + set_var(ctx, instr, *id, |ctx, instr| { + emit_expr(ctx, instr, expr); + }); + } + + // Emit the code that evaluates the condition of the `with` statement. + // This condition is a boolean expression that uses the variables set + emit_bool_expr(ctx, instr, &mut with.condition) +} + /// Produces a switch statement by calling a `branch_generator` function /// multiple times. /// diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index a3f4fd028..f7ed0fd61 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -24,7 +24,7 @@ use crate::compiler::ir::hex2hir::hex_pattern_hir_from_ast; use crate::compiler::ir::{ Expr, ForIn, ForOf, FuncCall, Iterable, LiteralPattern, Lookup, MatchAnchor, Of, OfItems, Pattern, PatternFlagSet, PatternFlags, - PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, + PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, With, }; use crate::compiler::report::ReportBuilder; use crate::compiler::{warnings, CompileContext, CompileError}; @@ -520,6 +520,7 @@ pub(in crate::compiler) fn expr_from_ast( ast::Expr::Of(of) => of_expr_from_ast(ctx, of), ast::Expr::ForOf(for_of) => for_of_expr_from_ast(ctx, for_of), ast::Expr::ForIn(for_in) => for_in_expr_from_ast(ctx, for_in), + ast::Expr::With(with) => with_expr_from_ast(ctx, with), ast::Expr::FuncCall(fn_call) => func_call_from_ast(ctx, fn_call), ast::Expr::FieldAccess(expr) => { @@ -1118,6 +1119,46 @@ fn for_in_expr_from_ast( }))) } +fn with_expr_from_ast( + ctx: &mut CompileContext, + with: &ast::With, +) -> Result { + // Create stack frame with capacity for the with statement variables + let mut stack_frame = ctx.vars.new_frame(with.declarations.len() as i32); + let mut symbols = SymbolTable::new(); + let mut declarations = Vec::new(); + + // Iterate over all items in the with statement and create a new variable + // for each one. Both identifiers and corresponding expressions are stored + // in separate vectors. + for item in with.declarations.iter() { + let type_value = expr_from_ast(ctx, &item.expression)? + .type_value() + .clone_without_value(); + let var = stack_frame.new_var(type_value.ty()); + + declarations.push((var, expr_from_ast(ctx, &item.expression)?)); + + // Insert the variable into the symbol table. + symbols.insert( + item.identifier.name, + Symbol::new(type_value, SymbolKind::Var(var)), + ); + } + + // Put the with variables into scope. + ctx.symbol_table.push(Rc::new(symbols)); + + let condition = bool_expr_from_ast(ctx, &with.condition)?; + + // Leaving with statement condition's scope. Remove with statement variables. + ctx.symbol_table.pop(); + + ctx.vars.unwind(&stack_frame); + + Ok(Expr::With(Box::new(With { declarations, condition }))) +} + fn iterable_from_ast( ctx: &mut CompileContext, iter: &ast::Iterable, diff --git a/lib/src/compiler/ir/dfs.rs b/lib/src/compiler/ir/dfs.rs index 87a7ef38a..edbe8cb9d 100644 --- a/lib/src/compiler/ir/dfs.rs +++ b/lib/src/compiler/ir/dfs.rs @@ -208,10 +208,18 @@ impl<'a> Iterator for DepthFirstSearch<'a> { } push_quantifier(&for_in.quantifier, &mut self.stack); } + Expr::Lookup(lookup) => { self.stack.push(Event::Enter(&lookup.index)); self.stack.push(Event::Enter(&lookup.primary)); } + + Expr::With(with) => { + self.stack.push(Event::Enter(&with.condition)); + for (_id, expr) in with.declarations.iter().rev() { + self.stack.push(Event::Enter(expr)) + } + } } } diff --git a/lib/src/compiler/ir/mod.rs b/lib/src/compiler/ir/mod.rs index ba91672b5..f360298be 100644 --- a/lib/src/compiler/ir/mod.rs +++ b/lib/src/compiler/ir/mod.rs @@ -552,6 +552,9 @@ pub(in crate::compiler) enum Expr { /// A `for in ...` expression. (e.g. `for all i in (1..100) : ( ... )`) ForIn(Box), + /// A `with : ...` expression. (e.g. `with $a, $b : ( ... )`) + With(Box), + /// Array or dictionary lookup expression (e.g. `array[1]`, `dict["key"]`) Lookup(Box), } @@ -610,6 +613,13 @@ pub(in crate::compiler) struct ForIn { pub stack_frame: VarStackFrame, } +/// A `with` expression (e.g `with $a, $b : (..)`) +#[derive(Debug)] +pub(in crate::compiler) struct With { + pub declarations: Vec<(Var, Expr)>, + pub condition: Expr, +} + /// A quantifier used in `for` and `of` expressions. #[derive(Debug)] pub(in crate::compiler) enum Quantifier { @@ -878,7 +888,8 @@ impl Expr { | Expr::PatternMatchVar { .. } | Expr::Of(_) | Expr::ForOf(_) - | Expr::ForIn(_) => Type::Bool, + | Expr::ForIn(_) + | Expr::With(_) => Type::Bool, Expr::Minus { operand, .. } => match operand.ty() { Type::Integer => Type::Integer, @@ -947,7 +958,8 @@ impl Expr { | Expr::PatternMatchVar { .. } | Expr::Of(_) | Expr::ForOf(_) - | Expr::ForIn(_) => TypeValue::Bool(Value::Unknown), + | Expr::ForIn(_) + | Expr::With(_) => TypeValue::Bool(Value::Unknown), Expr::Minus { operand, .. } => match operand.ty() { Type::Integer => TypeValue::Integer(Value::Unknown), @@ -1193,6 +1205,7 @@ impl Debug for Expr { Expr::Of(_) => writeln!(f, "OF")?, Expr::ForOf(_) => writeln!(f, "FOR_OF")?, Expr::ForIn(_) => writeln!(f, "FOR_IN")?, + Expr::With(_) => writeln!(f, "WITH")?, Expr::Lookup(_) => writeln!(f, "LOOKUP")?, Expr::PatternMatch { pattern, anchor } => writeln!( f, diff --git a/lib/src/compiler/tests/testdata/errors/135.in b/lib/src/compiler/tests/testdata/errors/135.in new file mode 100644 index 000000000..f2edb2bc2 --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/135.in @@ -0,0 +1,4 @@ +rule test { + condition: + with foo = "foo" : ( bar ) +} \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/135.out b/lib/src/compiler/tests/testdata/errors/135.out new file mode 100644 index 000000000..94faa02ba --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/135.out @@ -0,0 +1,6 @@ +error[E009]: unknown identifier `bar` + --> line:3:26 + | +3 | with foo = "foo" : ( bar ) + | ^^^ this identifier has not been declared + | \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/136.in b/lib/src/compiler/tests/testdata/errors/136.in new file mode 100644 index 000000000..b2fda0fc6 --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/136.in @@ -0,0 +1,5 @@ +rule test { + condition: + with foo = "foo" : ( foo == "foo" ) + and foo == "foo" +} \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/136.out b/lib/src/compiler/tests/testdata/errors/136.out new file mode 100644 index 000000000..cee8b78e0 --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/136.out @@ -0,0 +1,6 @@ +error[E009]: unknown identifier `foo` + --> line:4:12 + | +4 | and foo == "foo" + | ^^^ this identifier has not been declared + | \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/72.out b/lib/src/compiler/tests/testdata/errors/72.out index 2fc25c5e8..6f84f58b5 100644 --- a/lib/src/compiler/tests/testdata/errors/72.out +++ b/lib/src/compiler/tests/testdata/errors/72.out @@ -2,5 +2,5 @@ error[E001]: syntax error --> line:1:24 | 1 | rule test { condition: } - | ^ expecting expression, `for`, `all`, `none` or `any`, found `}` + | ^ expecting expression, `for`, `all`, `none`, `any` or `with`, found `}` | \ No newline at end of file diff --git a/lib/src/tests/mod.rs b/lib/src/tests/mod.rs index b423ea439..76d3cc2a7 100644 --- a/lib/src/tests/mod.rs +++ b/lib/src/tests/mod.rs @@ -514,6 +514,89 @@ fn for_in() { ); } +#[test] +fn with() { + condition_true!(r#"with foo = 1 + 1 : (foo == 2)"#); + condition_false!(r#"with foo = 1 + 1 : (foo == 3)"#); + condition_true!(r#"with foo = 1 + 1, bar = 2 + 2 : (foo + bar == 6)"#); + condition_false!(r#"with foo = 1 + 1, bar = 2 + 2 : (foo + bar == 7)"#); + + #[cfg(feature = "test_proto2-module")] + condition_true!(r#"with foo = test_proto2.array_int64[0]: (foo == 1)"#); + + #[cfg(feature = "test_proto2-module")] + condition_false!( + r#"with foo = test_proto2.array_int64[test_proto2.int64_zero]: (foo == 10)"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"with foo = test_proto2.map_string_struct["foo"].nested_int64_one: (foo == 1)"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"with foo = test_proto2.nested: (foo.nested_int64_one == 1 )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_false!( + r#"with foo = test_proto2.nested: (foo.nested_int64_one == 0 )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"with foo = test_proto2.uppercase("foo"): (foo == "FOO" )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_false!( + r#"with foo = test_proto2.uppercase("foo"): (foo == "FoO" )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"with + bar = test_proto2.array_string[1], + baz = test_proto2.array_string[2]: + ( + bar == "bar" and baz == "baz" + ) + "# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"for any i in (0..1): ( + with foo = test_proto2.array_int64[i]: (foo == 1) + )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"for all i in (0..0): ( + with + foo = test_proto2.array_int64[i], + bar = test_proto2.array_int64[i + 1] : + ( + foo == 1 and bar == 10 + ) + )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_false!( + r#"for all i in (0..2): ( + with + foo = test_proto2.array_int64[i], + bar = test_proto2.array_int64[i + 1] : + ( + foo == 1 and bar == foo * 10 + ) + )"# + ); +} + #[test] fn text_patterns() { pattern_true!(r#""issi""#, b"mississippi"); diff --git a/parser/src/ast/ascii_tree.rs b/parser/src/ast/ascii_tree.rs index 4afe7efb6..cd6e7743e 100644 --- a/parser/src/ast/ascii_tree.rs +++ b/parser/src/ast/ascii_tree.rs @@ -424,9 +424,32 @@ pub(crate) fn expr_ascii_tree(expr: &Expr) -> Tree { Node(node_title, children) } + Expr::With(w) => Node( + "with : ( )".to_string(), + vec![ + Node( + "".to_string(), + w.declarations + .iter() + .flat_map(with_items_ascii_tree) + .collect(), + ), + Node( + "".to_string(), + vec![expr_ascii_tree(&w.condition)], + ), + ], + ), } } +fn with_items_ascii_tree(declaration: &WithDeclaration) -> Vec { + vec![ + Leaf(vec![format!("{}", declaration.identifier.name)]), + expr_ascii_tree(&declaration.expression), + ] +} + pub(crate) fn quantifier_ascii_tree(quantifier: &Quantifier) -> Tree { match quantifier { Quantifier::None { .. } => Leaf(vec!["none".to_string()]), diff --git a/parser/src/ast/cst2ast.rs b/parser/src/ast/cst2ast.rs index 20c9e79c8..d00aef31c 100644 --- a/parser/src/ast/cst2ast.rs +++ b/parser/src/ast/cst2ast.rs @@ -905,6 +905,7 @@ impl<'src> Builder<'src> { } Event::Begin(FOR_EXPR) => self.for_expr()?, Event::Begin(OF_EXPR) => self.of_expr()?, + Event::Begin(WITH_EXPR) => self.with_expr()?, Event::Begin(EXPR) => self.pratt_parser(Self::expr, 0)?, event => panic!("unexpected {:?}", event), }; @@ -1018,6 +1019,49 @@ impl<'src> Builder<'src> { Ok(Expr::Of(Box::new(Of { span, quantifier, items, anchor }))) } + fn with_expr(&mut self) -> Result, Abort> { + self.begin(WITH_EXPR)?; + + let mut span = self.expect(WITH_KW)?; + + self.begin(WITH_DECLS)?; + + let declaration = + |i: &mut Self| -> Result, Abort> { + i.begin(WITH_DECL)?; + + let identifier = i.identifier()?; + let mut span = identifier.span(); + span = span.combine(&i.expect(EQUAL)?); + let expression = i.expr()?; + span = span.combine(&expression.span()); + + i.end(WITH_DECL)?; + + Ok(WithDeclaration { span, identifier, expression }) + }; + + let mut declarations = vec![declaration(self)?]; + + while let Event::Token { kind: COMMA, .. } = self.peek() { + self.expect(COMMA)?; + declarations.push(declaration(self)?); + } + + self.end(WITH_DECLS)?; + + self.expect(COLON)?; + self.expect(L_PAREN)?; + + let condition = self.boolean_expr()?; + + span = span.combine(&self.expect(R_PAREN)?); + + self.end(WITH_EXPR)?; + + Ok(Expr::With(Box::new(With { span, declarations, condition }))) + } + fn quantifier(&mut self) -> Result, Abort> { self.begin(QUANTIFIER)?; diff --git a/parser/src/ast/mod.rs b/parser/src/ast/mod.rs index eb58e21ee..3e4a6f1e4 100644 --- a/parser/src/ast/mod.rs +++ b/parser/src/ast/mod.rs @@ -364,6 +364,22 @@ pub enum OfItems<'src> { BoolExprTuple(Vec>), } +/// A `with` expression (e.g `with foo = 1 + 1 : (..)`) +#[derive(Debug)] +pub struct With<'src> { + span: Span, + pub declarations: Vec>, + pub condition: Expr<'src>, +} + +/// Items in a `with` expression. +#[derive(Debug)] +pub struct WithDeclaration<'src> { + span: Span, + pub identifier: Ident<'src>, + pub expression: Expr<'src>, +} + /// A quantifier used in `for` and `of` expressions. #[derive(Debug)] pub enum Quantifier<'src> { @@ -576,6 +592,9 @@ pub enum Expr<'src> { /// A `for in ...` expression. (e.g. `for all i in (1..100) : ( ... )`) ForIn(Box>), + + /// A `with` expression (e.g. `with foo = 1 + 1 : ( ... )`) + With(Box>), } /// A set of modifiers associated to a pattern. @@ -1047,6 +1066,18 @@ impl WithSpan for OfItems<'_> { } } +impl WithSpan for With<'_> { + fn span(&self) -> Span { + self.span.clone() + } +} + +impl WithSpan for WithDeclaration<'_> { + fn span(&self) -> Span { + self.span.clone() + } +} + impl WithSpan for Iterable<'_> { fn span(&self) -> Span { match self { @@ -1290,6 +1321,7 @@ impl WithSpan for Expr<'_> { Expr::ForOf(f) => f.span(), Expr::ForIn(f) => f.span(), Expr::Of(o) => o.span(), + Expr::With(w) => w.span(), } } } diff --git a/parser/src/cst/syntax_kind.rs b/parser/src/cst/syntax_kind.rs index 34d4ec6e2..dfc5ca3bd 100644 --- a/parser/src/cst/syntax_kind.rs +++ b/parser/src/cst/syntax_kind.rs @@ -43,6 +43,7 @@ pub enum SyntaxKind { TRUE_KW, WIDE_KW, XOR_KW, + WITH_KW, // Arithmetic operators ADD, @@ -127,6 +128,9 @@ pub enum SyntaxKind { BOOLEAN_TERM, FOR_EXPR, OF_EXPR, + WITH_EXPR, + WITH_DECLS, + WITH_DECL, ITERABLE, QUANTIFIER, EXPR_TUPLE, @@ -210,6 +214,7 @@ impl SyntaxKind { SyntaxKind::TRUE_KW => TokenId::TRUE_KW, SyntaxKind::WIDE_KW => TokenId::WIDE_KW, SyntaxKind::XOR_KW => TokenId::XOR_KW, + SyntaxKind::WITH_KW => TokenId::WITH_KW, SyntaxKind::ADD => TokenId::PLUS, SyntaxKind::SUB => TokenId::HYPHEN, @@ -311,6 +316,7 @@ impl From<&Token> for SyntaxKind { Token::TRUE_KW(_) => SyntaxKind::TRUE_KW, Token::WIDE_KW(_) => SyntaxKind::WIDE_KW, Token::XOR_KW(_) => SyntaxKind::XOR_KW, + Token::WITH_KW(_) => SyntaxKind::WITH_KW, // Bitwise operators Token::SHL(_) => SyntaxKind::SHL, diff --git a/parser/src/parser/mod.rs b/parser/src/parser/mod.rs index 0f696bd66..4c94df83f 100644 --- a/parser/src/parser/mod.rs +++ b/parser/src/parser/mod.rs @@ -1227,20 +1227,7 @@ impl<'src> ParserImpl<'src> { .end() } - /// Parses the condition block. - /// - /// ```text - /// CONDITION_BLK := `condition` `:` BOOLEAN_EXPR - /// `` - fn condition_blk(&mut self) -> &mut Self { - self.begin(CONDITION_BLK) - .expect(t!(CONDITION_KW)) - .expect(t!(COLON)) - .then(|p| p.boolean_expr()) - .end_with_recovery(t!(R_BRACE)) - } - - /// Parses the condition block. + /// Parses the hex pattern block. /// /// ```text /// HEX_PATTERN := `{` HEX_SUB_PATTERN `}` @@ -1254,7 +1241,7 @@ impl<'src> ParserImpl<'src> { .end() } - /// Parses the condition block. + /// Parses the hex sub pattern block. /// /// ```text /// HEX_SUB_PATTERN := @@ -1311,6 +1298,19 @@ impl<'src> ParserImpl<'src> { .end() } + /// Parses the condition block. + /// + /// ```text + /// CONDITION_BLK := `condition` `:` BOOLEAN_EXPR + /// `` + fn condition_blk(&mut self) -> &mut Self { + self.begin(CONDITION_BLK) + .expect(t!(CONDITION_KW)) + .expect(t!(COLON)) + .then(|p| p.boolean_expr()) + .end_with_recovery(t!(R_BRACE)) + } + /// Parses a boolean expression. /// /// ```text @@ -1354,6 +1354,7 @@ impl<'src> ParserImpl<'src> { }) .alt(|p| p.for_expr()) .alt(|p| p.of_expr()) + .alt(|p| p.with_expr()) .alt(|p| { p.expr().zero_or_more(|p| { p.expect_d( @@ -1582,6 +1583,55 @@ impl<'src> ParserImpl<'src> { .end() } + /// Parses `with` expression. + /// + /// ```text + /// WITH_EXPR := + /// `with` WITH_DECLS `:` + /// `(` + /// BOOLEAN_EXPR + /// `)` + /// ``` + fn with_expr(&mut self) -> &mut Self { + self.begin(WITH_EXPR) + .expect(t!(WITH_KW)) + .then(|p| p.with_declarations()) + .expect(t!(COLON)) + .expect(t!(L_PAREN)) + .then(|p| p.boolean_expr()) + .expect(t!(R_PAREN)) + .end() + } + + /// Parses `with` identifiers. + /// + /// ```text + /// WITH_DECLS := + /// WITH_DECL (`,` WITH_DECL)* + /// + fn with_declarations(&mut self) -> &mut Self { + self.begin(WITH_DECLS) + .then(|p| p.with_declaration()) + .zero_or_more(|p| { + p.expect(t!(COMMA)).then(|p| p.with_declaration()) + }) + .end() + } + + /// Parses a `with` declaration. + /// + /// ```text + /// WITH_DECL := + /// IDENT `=` EXPR + /// ``` + fn with_declaration(&mut self) -> &mut Self { + self.begin(WITH_DECL) + .expect(t!(IDENT)) + .expect(t!(EQUAL)) + .then(|p| p.expr()) + .end() + } + /// Parses quantifier. /// /// ```text diff --git a/parser/src/parser/tests/testdata/with-1.ast b/parser/src/parser/tests/testdata/with-1.ast new file mode 100644 index 000000000..e11cbf2e2 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-1.ast @@ -0,0 +1,13 @@ + rule test + └─ condition + └─ with : ( ) + ├─ + │ ├─ two + │ └─ add + │ ├─ 1 + │ └─ 1 + └─ + └─ eq + ├─ two + └─ 2 + diff --git a/parser/src/parser/tests/testdata/with-1.cst b/parser/src/parser/tests/testdata/with-1.cst new file mode 100644 index 000000000..881db0037 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-1.cst @@ -0,0 +1,58 @@ +SOURCE_FILE@0..78 + RULE_DECL@0..78 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + CONDITION_BLK@13..76 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + BOOLEAN_EXPR@26..76 + BOOLEAN_TERM@26..76 + WITH_EXPR@26..76 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + WITH_DECLS@31..40 + WITH_DECL@31..40 + IDENT@31..34 "two" + WHITESPACE@34..35 " " + EQUAL@35..36 "=" + WHITESPACE@36..37 " " + EXPR@37..40 + TERM@37..38 + PRIMARY_EXPR@37..38 + INTEGER_LIT@37..38 "1" + ADD@38..39 "+" + TERM@39..40 + PRIMARY_EXPR@39..40 + INTEGER_LIT@39..40 "1" + WHITESPACE@40..41 " " + COLON@41..42 ":" + WHITESPACE@42..43 " " + L_PAREN@43..44 "(" + WHITESPACE@44..45 " " + NEWLINE@45..46 "\n" + WHITESPACE@46..58 " " + BOOLEAN_EXPR@58..66 + BOOLEAN_TERM@58..66 + EXPR@58..61 + TERM@58..61 + PRIMARY_EXPR@58..61 + IDENT@58..61 "two" + WHITESPACE@61..62 " " + EQ@62..64 "==" + WHITESPACE@64..65 " " + EXPR@65..66 + TERM@65..66 + PRIMARY_EXPR@65..66 + INTEGER_LIT@65..66 "2" + NEWLINE@66..67 "\n" + WHITESPACE@67..75 " " + R_PAREN@75..76 ")" + NEWLINE@76..77 "\n" + R_BRACE@77..78 "}" diff --git a/parser/src/parser/tests/testdata/with-1.in b/parser/src/parser/tests/testdata/with-1.in new file mode 100644 index 000000000..70ce64c5b --- /dev/null +++ b/parser/src/parser/tests/testdata/with-1.in @@ -0,0 +1,6 @@ +rule test { + condition: + with two = 1+1 : ( + two == 2 + ) +} \ No newline at end of file diff --git a/parser/src/parser/tests/testdata/with-2.ast b/parser/src/parser/tests/testdata/with-2.ast new file mode 100644 index 000000000..d5547b68e --- /dev/null +++ b/parser/src/parser/tests/testdata/with-2.ast @@ -0,0 +1,35 @@ + rule test + └─ condition + └─ with : ( ) + ├─ + │ ├─ first + │ ├─ [] + │ │ ├─ + │ │ │ └─ field access + │ │ │ ├─ foo + │ │ │ └─ bar + │ │ └─ + │ │ └─ 0 + │ ├─ last + │ └─ [] + │ ├─ + │ │ └─ field access + │ │ ├─ foo + │ │ └─ bar + │ └─ + │ └─ sub + │ ├─ bar_num_of_sections + │ └─ 1 + └─ + └─ and + ├─ eq + │ ├─ field access + │ │ ├─ first + │ │ └─ name + │ └─ "foo" + └─ eq + ├─ field access + │ ├─ bar + │ └─ value + └─ 0x200 + diff --git a/parser/src/parser/tests/testdata/with-2.cst b/parser/src/parser/tests/testdata/with-2.cst new file mode 100644 index 000000000..7e7dffbf2 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-2.cst @@ -0,0 +1,106 @@ +SOURCE_FILE@0..162 + RULE_DECL@0..162 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + CONDITION_BLK@13..160 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + BOOLEAN_EXPR@26..160 + BOOLEAN_TERM@26..160 + WITH_EXPR@26..160 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + WITH_DECLS@31..90 + WITH_DECL@31..49 + IDENT@31..36 "first" + WHITESPACE@36..37 " " + EQUAL@37..38 "=" + WHITESPACE@38..39 " " + EXPR@39..49 + TERM@39..49 + PRIMARY_EXPR@39..46 + IDENT@39..42 "foo" + DOT@42..43 "." + IDENT@43..46 "bar" + L_BRACKET@46..47 "[" + EXPR@47..48 + TERM@47..48 + PRIMARY_EXPR@47..48 + INTEGER_LIT@47..48 "0" + R_BRACKET@48..49 "]" + COMMA@49..50 "," + WHITESPACE@50..51 " " + WITH_DECL@51..90 + IDENT@51..55 "last" + WHITESPACE@55..56 " " + EQUAL@56..57 "=" + WHITESPACE@57..58 " " + EXPR@58..90 + TERM@58..90 + PRIMARY_EXPR@58..65 + IDENT@58..61 "foo" + DOT@61..62 "." + IDENT@62..65 "bar" + L_BRACKET@65..66 "[" + EXPR@66..89 + TERM@66..85 + PRIMARY_EXPR@66..85 + IDENT@66..85 "bar_num_of_sections" + WHITESPACE@85..86 " " + SUB@86..87 "-" + WHITESPACE@87..88 " " + TERM@88..89 + PRIMARY_EXPR@88..89 + INTEGER_LIT@88..89 "1" + R_BRACKET@89..90 "]" + WHITESPACE@90..91 " " + COLON@91..92 ":" + WHITESPACE@92..93 " " + L_PAREN@93..94 "(" + WHITESPACE@94..95 " " + NEWLINE@95..96 "\n" + WHITESPACE@96..108 " " + BOOLEAN_EXPR@108..150 + BOOLEAN_TERM@108..127 + EXPR@108..118 + TERM@108..118 + PRIMARY_EXPR@108..118 + IDENT@108..113 "first" + DOT@113..114 "." + IDENT@114..118 "name" + WHITESPACE@118..119 " " + EQ@119..121 "==" + WHITESPACE@121..122 " " + EXPR@122..127 + TERM@122..127 + PRIMARY_EXPR@122..127 + STRING_LIT@122..127 "\"foo\"" + WHITESPACE@127..128 " " + AND_KW@128..131 "and" + WHITESPACE@131..132 " " + BOOLEAN_TERM@132..150 + EXPR@132..141 + TERM@132..141 + PRIMARY_EXPR@132..141 + IDENT@132..135 "bar" + DOT@135..136 "." + IDENT@136..141 "value" + WHITESPACE@141..142 " " + EQ@142..144 "==" + WHITESPACE@144..145 " " + EXPR@145..150 + TERM@145..150 + PRIMARY_EXPR@145..150 + INTEGER_LIT@145..150 "0x200" + NEWLINE@150..151 "\n" + WHITESPACE@151..159 " " + R_PAREN@159..160 ")" + NEWLINE@160..161 "\n" + R_BRACE@161..162 "}" diff --git a/parser/src/parser/tests/testdata/with-2.in b/parser/src/parser/tests/testdata/with-2.in new file mode 100644 index 000000000..955b3af63 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-2.in @@ -0,0 +1,6 @@ +rule test { + condition: + with first = foo.bar[0], last = foo.bar[bar_num_of_sections - 1] : ( + first.name == "foo" and bar.value == 0x200 + ) +} \ No newline at end of file diff --git a/parser/src/parser/tests/testdata/with-error-1.ast b/parser/src/parser/tests/testdata/with-error-1.ast new file mode 100644 index 000000000..6d9817af4 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-1.ast @@ -0,0 +1,2 @@ +ERRORS: +- SyntaxError { message: "expecting operator, `,` or `:`, found `}`", span: Span(91..92) } diff --git a/parser/src/parser/tests/testdata/with-error-1.cst b/parser/src/parser/tests/testdata/with-error-1.cst new file mode 100644 index 000000000..f8247b29b --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-1.cst @@ -0,0 +1,46 @@ +SOURCE_FILE@0..92 + RULE_DECL@0..92 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + ERROR@13..90 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + ERROR@26..26 + ERROR@26..26 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + IDENT@31..36 "first" + WHITESPACE@36..37 " " + EQUAL@37..38 "=" + WHITESPACE@38..39 " " + IDENT@39..42 "foo" + DOT@42..43 "." + IDENT@43..46 "bar" + L_BRACKET@46..47 "[" + INTEGER_LIT@47..48 "0" + R_BRACKET@48..49 "]" + WHITESPACE@49..50 " " + L_PAREN@50..51 "(" + WHITESPACE@51..52 " " + NEWLINE@52..53 "\n" + WHITESPACE@53..65 " " + IDENT@65..70 "first" + WHITESPACE@70..71 " " + EQ@71..73 "==" + WHITESPACE@73..74 " " + STRING_LIT@74..80 "\"test\"" + NEWLINE@80..81 "\n" + WHITESPACE@81..89 " " + R_PAREN@89..90 ")" + NEWLINE@90..91 "\n" + R_BRACE@91..92 "}" + +ERRORS: +- [91..92]: expecting operator, `,` or `:`, found `}` diff --git a/parser/src/parser/tests/testdata/with-error-1.in b/parser/src/parser/tests/testdata/with-error-1.in new file mode 100644 index 000000000..f5957c4c1 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-1.in @@ -0,0 +1,6 @@ +rule test { + condition: + with first = foo.bar[0] ( + first == "test" + ) +} \ No newline at end of file diff --git a/parser/src/parser/tests/testdata/with-error-2.ast b/parser/src/parser/tests/testdata/with-error-2.ast new file mode 100644 index 000000000..84f5b6f44 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-2.ast @@ -0,0 +1,2 @@ +ERRORS: +- SyntaxError { message: "expecting expression, `for`, `all`, `none`, `any` or `with`, found `)`", span: Span(62..63) } diff --git a/parser/src/parser/tests/testdata/with-error-2.cst b/parser/src/parser/tests/testdata/with-error-2.cst new file mode 100644 index 000000000..0f5bbe972 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-2.cst @@ -0,0 +1,40 @@ +SOURCE_FILE@0..65 + RULE_DECL@0..65 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + ERROR@13..63 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + ERROR@26..26 + ERROR@26..26 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + IDENT@31..36 "first" + WHITESPACE@36..37 " " + EQUAL@37..38 "=" + WHITESPACE@38..39 " " + IDENT@39..42 "foo" + DOT@42..43 "." + IDENT@43..46 "bar" + L_BRACKET@46..47 "[" + INTEGER_LIT@47..48 "0" + R_BRACKET@48..49 "]" + WHITESPACE@49..50 " " + COLON@50..51 ":" + WHITESPACE@51..52 " " + L_PAREN@52..53 "(" + NEWLINE@53..54 "\n" + WHITESPACE@54..62 " " + R_PAREN@62..63 ")" + NEWLINE@63..64 "\n" + R_BRACE@64..65 "}" + +ERRORS: +- [62..63]: expecting expression, `for`, `all`, `none`, `any` or `with`, found `)` diff --git a/parser/src/parser/tests/testdata/with-error-2.in b/parser/src/parser/tests/testdata/with-error-2.in new file mode 100644 index 000000000..6b6815ad6 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-2.in @@ -0,0 +1,5 @@ +rule test { + condition: + with first = foo.bar[0] : ( + ) +} \ No newline at end of file diff --git a/parser/src/parser/tests/testdata/with-error-3.ast b/parser/src/parser/tests/testdata/with-error-3.ast new file mode 100644 index 000000000..257bafc0b --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-3.ast @@ -0,0 +1,2 @@ +ERRORS: +- SyntaxError { message: "expecting identifier, found `:`", span: Span(31..32) } diff --git a/parser/src/parser/tests/testdata/with-error-3.cst b/parser/src/parser/tests/testdata/with-error-3.cst new file mode 100644 index 000000000..f0215d978 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-3.cst @@ -0,0 +1,32 @@ +SOURCE_FILE@0..63 + RULE_DECL@0..63 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + ERROR@13..61 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + ERROR@26..26 + ERROR@26..26 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + COLON@31..32 ":" + WHITESPACE@32..33 " " + L_PAREN@33..34 "(" + NEWLINE@34..35 "\n" + WHITESPACE@35..47 " " + TRUE_KW@47..51 "true" + NEWLINE@51..52 "\n" + WHITESPACE@52..60 " " + R_PAREN@60..61 ")" + NEWLINE@61..62 "\n" + R_BRACE@62..63 "}" + +ERRORS: +- [31..32]: expecting identifier, found `:` diff --git a/parser/src/parser/tests/testdata/with-error-3.in b/parser/src/parser/tests/testdata/with-error-3.in new file mode 100644 index 000000000..47abfae31 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-3.in @@ -0,0 +1,6 @@ +rule test { + condition: + with : ( + true + ) +} \ No newline at end of file diff --git a/parser/src/tokenizer/mod.rs b/parser/src/tokenizer/mod.rs index 1d965b6de..8ab20488e 100644 --- a/parser/src/tokenizer/mod.rs +++ b/parser/src/tokenizer/mod.rs @@ -338,6 +338,8 @@ enum NormalToken<'src> { Wide, #[token("xor")] Xor, + #[token("with")] + With, // Bitwise #[token("<<")] @@ -695,6 +697,7 @@ fn convert_normal_token(token: NormalToken, span: Span) -> Token { NormalToken::True => Token::TRUE_KW(span), NormalToken::Wide => Token::WIDE_KW(span), NormalToken::Xor => Token::XOR_KW(span), + NormalToken::With => Token::WITH_KW(span), // Bitwise. NormalToken::Shl => Token::SHL(span), diff --git a/parser/src/tokenizer/tokens.rs b/parser/src/tokenizer/tokens.rs index 6a5f29fa7..de582efb2 100644 --- a/parser/src/tokenizer/tokens.rs +++ b/parser/src/tokenizer/tokens.rs @@ -44,6 +44,7 @@ pub(crate) enum TokenId { TRUE_KW, WIDE_KW, XOR_KW, + WITH_KW, // Bitwise operators. SHL, @@ -152,6 +153,7 @@ impl TokenId { TokenId::TRUE_KW => "`true`", TokenId::WIDE_KW => "`wide`", TokenId::XOR_KW => "`xor`", + TokenId::WITH_KW => "`with`", // Bitwise operators. TokenId::SHL => "`<<`", @@ -251,6 +253,7 @@ pub(crate) enum Token { TRUE_KW(Span) = TokenId::TRUE_KW as u8, WIDE_KW(Span) = TokenId::WIDE_KW as u8, XOR_KW(Span) = TokenId::XOR_KW as u8, + WITH_KW(Span) = TokenId::WITH_KW as u8, // Bitwise operators. SHL(Span) = TokenId::SHL as u8, @@ -369,6 +372,7 @@ impl Token { | Token::TRUE_KW(span) | Token::WIDE_KW(span) | Token::XOR_KW(span) + | Token::WITH_KW(span) // Bitwise operators | Token::SHL(span) diff --git a/site/content/docs/writing_rules/conditions.md b/site/content/docs/writing_rules/conditions.md index 53388408d..52a20c627 100644 --- a/site/content/docs/writing_rules/conditions.md +++ b/site/content/docs/writing_rules/conditions.md @@ -472,6 +472,93 @@ The `for..in` operator is similar to `for..of`, but the latter iterates over a set of patterns, while the former iterates over ranges, enumerations, arrays and dictionaries. +## The "with" statement + +YARA-X now supports the `with` statement defined by [RFC](https://github.com/VirusTotal/yara/discussions/1783), which allows you to define identifiers +that holds the result of a boolean expression. Each identifier is local and is +valid only within the `with` statement. The syntax is: + +```yara +with + = [, = ]* : + ( + + ) +``` + +For example: + +```yara +rule WithExample { + condition: + with + first = foo.bar[0], + last = foo.bar[num_of_items - 1] : ( + first.text == last.text + ) +} +``` + +Using the `with` identifier outside of a `with` statement is not allowed. +Something like: + +```yara +rule WithExample { + condition: + with + first = foo.bar[0], + last = foo.bar[num_of_items - 1] : ( + first.text == last.text + ) + or last.text != first.text +} +``` + +is syntactically valid but it will raise a compilation error. + +Another usage of the `with` statement could be to avoid repeating the same +expression multiple times in the condition. Something like: + +```yara +pe.sections[0] .name == ".text" and +pe.sections[0].characteristics == 0xC0000000 and +pe.sections[0].raw_data_size == 0x2000 and +pe.sections[0].raw_data_offset == 0x1000 and +pe.sections[pe.number_of_sections - 1] .name == ".tls" and +pe.sections[pe.number_of_sections - 1].characteristics == 0xC0000000 and +pe.sections[pe.number_of_sections - 1].raw_data_size == 0x1000 and +pe.sections[pe.number_of_sections - 1].raw_data_offset == 0x4000 +``` + +can be rewritten as: + +```yara +with + fs = pe.sections[0], + ls = pe.sections[pe.number_of_sections - 1] : ( + fs.name == ".text" and + fs.name.characteristics == 0xC0000000 and + fs.name.raw_data_size == 0x2000 and + fs.name.raw_data_offset == 0x1000 and + ls.name == ".tls" and + ls.characteristics == 0xC0000000 and + ls.raw_data_size == 0x1000 and + ls.raw_data_offset == 0x4000 + ) +``` + +Another use case is to declare a variable that is used just in "for" loops: + +```yara +for all offset in (10,20,30) : ( + with val = uint64(offset) | uint64(offset + 4) | uint64(offset + 8) : ( + val == 0x10000 or + val == 0x20000 or + val == 0x40000 + ) +) +``` + ## Referencing other rules When writing the condition for a rule, you can also make reference to a diff --git a/site/content/docs/writing_rules/differences.md b/site/content/docs/writing_rules/differences.md index 99609e063..d8b9ab5d2 100644 --- a/site/content/docs/writing_rules/differences.md +++ b/site/content/docs/writing_rules/differences.md @@ -196,6 +196,38 @@ But this is not valid... 1 of (some_rule*) ``` +## The "with" statement + +YARA-X now supports the `with` statement, which allows you to define identifiers +that holds the result of a boolean expression. Each identifier is local and is valid +only within the `with` statement. For example: + +``` +with + a = 1 + 1, + b = 2 : ( + a == b + ) +``` + +This is also useful to avoid repeating the same expression multiple times in the +condition. For example: + +``` +with + a = foo.bar[0], + b = foo.bar[1] : ( + a.name == b.name or + a.value == 0x10 or + b.value == 0x20 or + a.value == b.value + ) +``` + +This is something that was not present in YARA 4.x and you had to repeat the +expression multiple times. + + ## Using xor and fullword together In YARA 4.x the combination `xor` and `fullword` looks for the bytes before