From f6be6fc3ec34183fb5bcef0ae796581c3f1d9d06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Euri=C5=A1?= Date: Mon, 16 Sep 2024 15:00:43 +0200 Subject: [PATCH 1/3] feat: implement with statement --- lib/src/compiler/emit.rs | 32 +++++- lib/src/compiler/ir/ast2ir.rs | 45 +++++++- lib/src/compiler/ir/dfs.rs | 8 ++ lib/src/compiler/ir/mod.rs | 18 ++- lib/src/compiler/tests/testdata/errors/134.in | 4 + .../compiler/tests/testdata/errors/134.out | 6 + lib/src/compiler/tests/testdata/errors/135.in | 5 + .../compiler/tests/testdata/errors/135.out | 6 + lib/src/compiler/tests/testdata/errors/72.out | 2 +- lib/src/tests/mod.rs | 63 +++++++++++ parser/src/ast/ascii_tree.rs | 20 ++++ parser/src/ast/cst2ast.rs | 39 +++++++ parser/src/ast/mod.rs | 32 ++++++ parser/src/cst/syntax_kind.rs | 5 + parser/src/parser/mod.rs | 70 +++++++++--- parser/src/parser/tests/testdata/with-1.ast | 13 +++ parser/src/parser/tests/testdata/with-1.cst | 57 ++++++++++ parser/src/parser/tests/testdata/with-1.in | 6 + parser/src/parser/tests/testdata/with-2.ast | 35 ++++++ parser/src/parser/tests/testdata/with-2.cst | 104 ++++++++++++++++++ parser/src/parser/tests/testdata/with-2.in | 6 + .../parser/tests/testdata/with-error-1.ast | 2 + .../parser/tests/testdata/with-error-1.cst | 46 ++++++++ .../src/parser/tests/testdata/with-error-1.in | 6 + .../parser/tests/testdata/with-error-2.ast | 2 + .../parser/tests/testdata/with-error-2.cst | 40 +++++++ .../src/parser/tests/testdata/with-error-2.in | 5 + .../parser/tests/testdata/with-error-3.ast | 2 + .../parser/tests/testdata/with-error-3.cst | 32 ++++++ .../src/parser/tests/testdata/with-error-3.in | 6 + parser/src/tokenizer/mod.rs | 3 + parser/src/tokenizer/tokens.rs | 4 + site/content/docs/writing_rules/conditions.md | 87 +++++++++++++++ .../content/docs/writing_rules/differences.md | 31 ++++++ 34 files changed, 822 insertions(+), 20 deletions(-) create mode 100644 lib/src/compiler/tests/testdata/errors/134.in create mode 100644 lib/src/compiler/tests/testdata/errors/134.out create mode 100644 lib/src/compiler/tests/testdata/errors/135.in create mode 100644 lib/src/compiler/tests/testdata/errors/135.out create mode 100644 parser/src/parser/tests/testdata/with-1.ast create mode 100644 parser/src/parser/tests/testdata/with-1.cst create mode 100644 parser/src/parser/tests/testdata/with-1.in create mode 100644 parser/src/parser/tests/testdata/with-2.ast create mode 100644 parser/src/parser/tests/testdata/with-2.cst create mode 100644 parser/src/parser/tests/testdata/with-2.in create mode 100644 parser/src/parser/tests/testdata/with-error-1.ast create mode 100644 parser/src/parser/tests/testdata/with-error-1.cst create mode 100644 parser/src/parser/tests/testdata/with-error-1.in create mode 100644 parser/src/parser/tests/testdata/with-error-2.ast create mode 100644 parser/src/parser/tests/testdata/with-error-2.cst create mode 100644 parser/src/parser/tests/testdata/with-error-2.in create mode 100644 parser/src/parser/tests/testdata/with-error-3.ast create mode 100644 parser/src/parser/tests/testdata/with-error-3.cst create mode 100644 parser/src/parser/tests/testdata/with-error-3.in diff --git a/lib/src/compiler/emit.rs b/lib/src/compiler/emit.rs index 44f65bbd6..5da10af31 100644 --- a/lib/src/compiler/emit.rs +++ b/lib/src/compiler/emit.rs @@ -22,7 +22,7 @@ use walrus::{FunctionId, InstrSeqBuilder, ValType}; use crate::compiler::ir::{ Expr, ForIn, ForOf, Iterable, MatchAnchor, Of, OfItems, PatternIdx, - Quantifier, + Quantifier, With, }; use crate::compiler::{ LiteralId, PatternId, RegexpId, RuleId, RuleInfo, Var, VarStackFrame, @@ -635,6 +635,10 @@ fn emit_expr( } }, + Expr::With(with) => { + emit_with(ctx, instr, with); + } + Expr::FuncCall(fn_call) => { // Emit the arguments first. for expr in fn_call.args.iter_mut() { @@ -2134,6 +2138,32 @@ fn emit_for( }); } +/// Emits the code for a `with` statement. +/// +/// Each `with` statement has a corresponding = pair. +/// Each pair is stored in the `identifiers` and `expressions` fields of the +/// `with` statement. +/// For each pair, the code emitted by this function sets the variable +/// corresponding to the identifier to the value of the emmited expression. +/// Those variables are later used in the condition of the `with` statement. +fn emit_with( + ctx: &mut EmitContext, + instr: &mut InstrSeqBuilder, + with: &mut With, +) { + // Emit the code that sets the variables in the `with` statement. + for (idx, &item) in with.identifiers.iter().enumerate() { + let expr = &mut with.expressions[idx]; + set_var(ctx, instr, item, |ctx, instr| { + emit_expr(ctx, instr, expr); + }); + } + + // Emit the code that evaluates the condition of the `with` statement. + // This condition is a boolean expression that uses the variables set + emit_bool_expr(ctx, instr, &mut with.condition) +} + /// Produces a switch statement by calling a `branch_generator` function /// multiple times. /// diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index a3f4fd028..3140bbc27 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -24,7 +24,7 @@ use crate::compiler::ir::hex2hir::hex_pattern_hir_from_ast; use crate::compiler::ir::{ Expr, ForIn, ForOf, FuncCall, Iterable, LiteralPattern, Lookup, MatchAnchor, Of, OfItems, Pattern, PatternFlagSet, PatternFlags, - PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, + PatternIdx, PatternInRule, Quantifier, Range, RegexpPattern, With, }; use crate::compiler::report::ReportBuilder; use crate::compiler::{warnings, CompileContext, CompileError}; @@ -520,6 +520,7 @@ pub(in crate::compiler) fn expr_from_ast( ast::Expr::Of(of) => of_expr_from_ast(ctx, of), ast::Expr::ForOf(for_of) => for_of_expr_from_ast(ctx, for_of), ast::Expr::ForIn(for_in) => for_in_expr_from_ast(ctx, for_in), + ast::Expr::With(with) => with_expr_from_ast(ctx, with), ast::Expr::FuncCall(fn_call) => func_call_from_ast(ctx, fn_call), ast::Expr::FieldAccess(expr) => { @@ -1118,6 +1119,48 @@ fn for_in_expr_from_ast( }))) } +fn with_expr_from_ast( + ctx: &mut CompileContext, + with: &ast::With, +) -> Result { + // Create stack frame with capacity for the with statement variables + let mut stack_frame = ctx.vars.new_frame(with.items.len() as i32); + let mut symbols = SymbolTable::new(); + let mut identifiers = Vec::new(); + let mut expressions = Vec::new(); + + // Iterate over all items in the with statement and create a new variable + // for each one. Both identifiers and corresponding expressions are stored + // in separate vectors. + for item in with.items.iter() { + let type_value = expr_from_ast(ctx, &item.expression)? + .type_value() + .clone_without_value(); + let var = stack_frame.new_var(type_value.ty()); + + identifiers.push(var); + expressions.push(expr_from_ast(ctx, &item.expression)?); + + // Insert the variable into the symbol table. + symbols.insert( + item.identifier.name, + Symbol::new(type_value, SymbolKind::Var(var)), + ); + } + + // Put the with variables into scope. + ctx.symbol_table.push(Rc::new(symbols)); + + let condition = bool_expr_from_ast(ctx, &with.condition)?; + + // Leaving with statement condition's scope. Remove with statement variables. + ctx.symbol_table.pop(); + + ctx.vars.unwind(&stack_frame); + + Ok(Expr::With(Box::new(With { identifiers, expressions, condition }))) +} + fn iterable_from_ast( ctx: &mut CompileContext, iter: &ast::Iterable, diff --git a/lib/src/compiler/ir/dfs.rs b/lib/src/compiler/ir/dfs.rs index 87a7ef38a..baa83c1a7 100644 --- a/lib/src/compiler/ir/dfs.rs +++ b/lib/src/compiler/ir/dfs.rs @@ -208,10 +208,18 @@ impl<'a> Iterator for DepthFirstSearch<'a> { } push_quantifier(&for_in.quantifier, &mut self.stack); } + Expr::Lookup(lookup) => { self.stack.push(Event::Enter(&lookup.index)); self.stack.push(Event::Enter(&lookup.primary)); } + + Expr::With(with) => { + self.stack.push(Event::Enter(&with.condition)); + for expr in with.expressions.iter().rev() { + self.stack.push(Event::Enter(expr)) + } + } } } diff --git a/lib/src/compiler/ir/mod.rs b/lib/src/compiler/ir/mod.rs index ba91672b5..dcfb466d4 100644 --- a/lib/src/compiler/ir/mod.rs +++ b/lib/src/compiler/ir/mod.rs @@ -552,6 +552,9 @@ pub(in crate::compiler) enum Expr { /// A `for in ...` expression. (e.g. `for all i in (1..100) : ( ... )`) ForIn(Box), + /// A `with : ...` expression. (e.g. `with $a, $b : ( ... )`) + With(Box), + /// Array or dictionary lookup expression (e.g. `array[1]`, `dict["key"]`) Lookup(Box), } @@ -610,6 +613,14 @@ pub(in crate::compiler) struct ForIn { pub stack_frame: VarStackFrame, } +/// A `with` expression (e.g `with $a, $b : (..)`) +#[derive(Debug)] +pub(in crate::compiler) struct With { + pub identifiers: Vec, + pub expressions: Vec, + pub condition: Expr, +} + /// A quantifier used in `for` and `of` expressions. #[derive(Debug)] pub(in crate::compiler) enum Quantifier { @@ -878,7 +889,8 @@ impl Expr { | Expr::PatternMatchVar { .. } | Expr::Of(_) | Expr::ForOf(_) - | Expr::ForIn(_) => Type::Bool, + | Expr::ForIn(_) + | Expr::With(_) => Type::Bool, Expr::Minus { operand, .. } => match operand.ty() { Type::Integer => Type::Integer, @@ -947,7 +959,8 @@ impl Expr { | Expr::PatternMatchVar { .. } | Expr::Of(_) | Expr::ForOf(_) - | Expr::ForIn(_) => TypeValue::Bool(Value::Unknown), + | Expr::ForIn(_) + | Expr::With(_) => TypeValue::Bool(Value::Unknown), Expr::Minus { operand, .. } => match operand.ty() { Type::Integer => TypeValue::Integer(Value::Unknown), @@ -1193,6 +1206,7 @@ impl Debug for Expr { Expr::Of(_) => writeln!(f, "OF")?, Expr::ForOf(_) => writeln!(f, "FOR_OF")?, Expr::ForIn(_) => writeln!(f, "FOR_IN")?, + Expr::With(_) => writeln!(f, "WITH")?, Expr::Lookup(_) => writeln!(f, "LOOKUP")?, Expr::PatternMatch { pattern, anchor } => writeln!( f, diff --git a/lib/src/compiler/tests/testdata/errors/134.in b/lib/src/compiler/tests/testdata/errors/134.in new file mode 100644 index 000000000..f2edb2bc2 --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/134.in @@ -0,0 +1,4 @@ +rule test { + condition: + with foo = "foo" : ( bar ) +} \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/134.out b/lib/src/compiler/tests/testdata/errors/134.out new file mode 100644 index 000000000..94faa02ba --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/134.out @@ -0,0 +1,6 @@ +error[E009]: unknown identifier `bar` + --> line:3:26 + | +3 | with foo = "foo" : ( bar ) + | ^^^ this identifier has not been declared + | \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/135.in b/lib/src/compiler/tests/testdata/errors/135.in new file mode 100644 index 000000000..b2fda0fc6 --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/135.in @@ -0,0 +1,5 @@ +rule test { + condition: + with foo = "foo" : ( foo == "foo" ) + and foo == "foo" +} \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/135.out b/lib/src/compiler/tests/testdata/errors/135.out new file mode 100644 index 000000000..cee8b78e0 --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/135.out @@ -0,0 +1,6 @@ +error[E009]: unknown identifier `foo` + --> line:4:12 + | +4 | and foo == "foo" + | ^^^ this identifier has not been declared + | \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/72.out b/lib/src/compiler/tests/testdata/errors/72.out index 2fc25c5e8..6f84f58b5 100644 --- a/lib/src/compiler/tests/testdata/errors/72.out +++ b/lib/src/compiler/tests/testdata/errors/72.out @@ -2,5 +2,5 @@ error[E001]: syntax error --> line:1:24 | 1 | rule test { condition: } - | ^ expecting expression, `for`, `all`, `none` or `any`, found `}` + | ^ expecting expression, `for`, `all`, `none`, `any` or `with`, found `}` | \ No newline at end of file diff --git a/lib/src/tests/mod.rs b/lib/src/tests/mod.rs index b423ea439..abe01f619 100644 --- a/lib/src/tests/mod.rs +++ b/lib/src/tests/mod.rs @@ -514,6 +514,69 @@ fn for_in() { ); } +#[test] +fn with() { + condition_true!(r#"with foo = 1 + 1 : (foo == 2)"#); + condition_false!(r#"with foo = 1 + 1 : (foo == 3)"#); + condition_true!(r#"with foo = 1 + 1, bar = 2 + 2 : (foo + bar == 6)"#); + condition_false!(r#"with foo = 1 + 1, bar = 2 + 2 : (foo + bar == 7)"#); + + #[cfg(feature = "test_proto2-module")] + condition_true!(r#"with foo = test_proto2.array_int64[0]: (foo == 1)"#); + + #[cfg(feature = "test_proto2-module")] + condition_false!( + r#"with foo = test_proto2.array_int64[test_proto2.int64_zero]: (foo == 10)"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"with foo = test_proto2.map_string_struct["foo"].nested_int64_one: (foo == 1)"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"with + bar = test_proto2.array_string[1], + baz = test_proto2.array_string[2] : + ( + bar == "bar" and baz == "baz" + ) + "# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"for any i in (0..1): ( + with foo = test_proto2.array_int64[i]: (foo == 1) + )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"for all i in (0..0): ( + with + foo = test_proto2.array_int64[i], + bar = test_proto2.array_int64[i + 1] : + ( + foo == 1 and bar == 10 + ) + )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_false!( + r#"for all i in (0..2): ( + with + foo = test_proto2.array_int64[i], + bar = test_proto2.array_int64[i + 1] : + ( + foo == 1 and bar == foo * 10 + ) + )"# + ); +} + #[test] fn text_patterns() { pattern_true!(r#""issi""#, b"mississippi"); diff --git a/parser/src/ast/ascii_tree.rs b/parser/src/ast/ascii_tree.rs index 4afe7efb6..c743b933e 100644 --- a/parser/src/ast/ascii_tree.rs +++ b/parser/src/ast/ascii_tree.rs @@ -424,9 +424,29 @@ pub(crate) fn expr_ascii_tree(expr: &Expr) -> Tree { Node(node_title, children) } + Expr::With(w) => Node( + "with : ( )".to_string(), + vec![ + Node( + "".to_string(), + w.items.iter().flat_map(with_items_ascii_tree).collect(), + ), + Node( + "".to_string(), + vec![expr_ascii_tree(&w.condition)], + ), + ], + ), } } +fn with_items_ascii_tree(item: &WithItems) -> Vec { + vec![ + Leaf(vec![format!("{}", item.identifier.name)]), + expr_ascii_tree(&item.expression), + ] +} + pub(crate) fn quantifier_ascii_tree(quantifier: &Quantifier) -> Tree { match quantifier { Quantifier::None { .. } => Leaf(vec!["none".to_string()]), diff --git a/parser/src/ast/cst2ast.rs b/parser/src/ast/cst2ast.rs index 20c9e79c8..77fd4f234 100644 --- a/parser/src/ast/cst2ast.rs +++ b/parser/src/ast/cst2ast.rs @@ -905,6 +905,7 @@ impl<'src> Builder<'src> { } Event::Begin(FOR_EXPR) => self.for_expr()?, Event::Begin(OF_EXPR) => self.of_expr()?, + Event::Begin(WITH_EXPR) => self.with_expr()?, Event::Begin(EXPR) => self.pratt_parser(Self::expr, 0)?, event => panic!("unexpected {:?}", event), }; @@ -1018,6 +1019,44 @@ impl<'src> Builder<'src> { Ok(Expr::Of(Box::new(Of { span, quantifier, items, anchor }))) } + fn with_expr(&mut self) -> Result, Abort> { + self.begin(WITH_EXPR)?; + + let mut span = self.expect(WITH_KW)?; + + self.begin(WITH_IDENTIFIERS)?; + + let item = |i: &mut Self| -> Result, Abort> { + let identifier = i.identifier()?; + let mut span = identifier.span(); + span = span.combine(&i.expect(EQUAL)?); + let expression = i.expr()?; + span = span.combine(&expression.span()); + + Ok(WithItems { span, identifier, expression }) + }; + + let mut items = vec![item(self)?]; + + while let Event::Token { kind: COMMA, .. } = self.peek() { + self.expect(COMMA)?; + items.push(item(self)?); + } + + self.end(WITH_IDENTIFIERS)?; + + self.expect(COLON)?; + self.expect(L_PAREN)?; + + let condition = self.boolean_expr()?; + + span = span.combine(&self.expect(R_PAREN)?); + + self.end(WITH_EXPR)?; + + Ok(Expr::With(Box::new(With { span, items, condition }))) + } + fn quantifier(&mut self) -> Result, Abort> { self.begin(QUANTIFIER)?; diff --git a/parser/src/ast/mod.rs b/parser/src/ast/mod.rs index eb58e21ee..3831bbb54 100644 --- a/parser/src/ast/mod.rs +++ b/parser/src/ast/mod.rs @@ -364,6 +364,22 @@ pub enum OfItems<'src> { BoolExprTuple(Vec>), } +/// A `with` expression (e.g `with $a : (..)`) +#[derive(Debug)] +pub struct With<'src> { + span: Span, + pub items: Vec>, + pub condition: Expr<'src>, +} + +/// Items in a `with` expression. +#[derive(Debug)] +pub struct WithItems<'src> { + span: Span, + pub identifier: Ident<'src>, + pub expression: Expr<'src>, +} + /// A quantifier used in `for` and `of` expressions. #[derive(Debug)] pub enum Quantifier<'src> { @@ -576,6 +592,9 @@ pub enum Expr<'src> { /// A `for in ...` expression. (e.g. `for all i in (1..100) : ( ... )`) ForIn(Box>), + + /// A `with` expression (e.g. `with $a : ( ... )`) + With(Box>), } /// A set of modifiers associated to a pattern. @@ -1047,6 +1066,18 @@ impl WithSpan for OfItems<'_> { } } +impl WithSpan for With<'_> { + fn span(&self) -> Span { + self.span.clone() + } +} + +impl WithSpan for WithItems<'_> { + fn span(&self) -> Span { + self.span.clone() + } +} + impl WithSpan for Iterable<'_> { fn span(&self) -> Span { match self { @@ -1290,6 +1321,7 @@ impl WithSpan for Expr<'_> { Expr::ForOf(f) => f.span(), Expr::ForIn(f) => f.span(), Expr::Of(o) => o.span(), + Expr::With(w) => w.span(), } } } diff --git a/parser/src/cst/syntax_kind.rs b/parser/src/cst/syntax_kind.rs index 34d4ec6e2..941cae9e8 100644 --- a/parser/src/cst/syntax_kind.rs +++ b/parser/src/cst/syntax_kind.rs @@ -43,6 +43,7 @@ pub enum SyntaxKind { TRUE_KW, WIDE_KW, XOR_KW, + WITH_KW, // Arithmetic operators ADD, @@ -127,6 +128,8 @@ pub enum SyntaxKind { BOOLEAN_TERM, FOR_EXPR, OF_EXPR, + WITH_EXPR, + WITH_IDENTIFIERS, ITERABLE, QUANTIFIER, EXPR_TUPLE, @@ -210,6 +213,7 @@ impl SyntaxKind { SyntaxKind::TRUE_KW => TokenId::TRUE_KW, SyntaxKind::WIDE_KW => TokenId::WIDE_KW, SyntaxKind::XOR_KW => TokenId::XOR_KW, + SyntaxKind::WITH_KW => TokenId::WITH_KW, SyntaxKind::ADD => TokenId::PLUS, SyntaxKind::SUB => TokenId::HYPHEN, @@ -311,6 +315,7 @@ impl From<&Token> for SyntaxKind { Token::TRUE_KW(_) => SyntaxKind::TRUE_KW, Token::WIDE_KW(_) => SyntaxKind::WIDE_KW, Token::XOR_KW(_) => SyntaxKind::XOR_KW, + Token::WITH_KW(_) => SyntaxKind::WITH_KW, // Bitwise operators Token::SHL(_) => SyntaxKind::SHL, diff --git a/parser/src/parser/mod.rs b/parser/src/parser/mod.rs index 0f696bd66..b551387b6 100644 --- a/parser/src/parser/mod.rs +++ b/parser/src/parser/mod.rs @@ -1227,20 +1227,7 @@ impl<'src> ParserImpl<'src> { .end() } - /// Parses the condition block. - /// - /// ```text - /// CONDITION_BLK := `condition` `:` BOOLEAN_EXPR - /// `` - fn condition_blk(&mut self) -> &mut Self { - self.begin(CONDITION_BLK) - .expect(t!(CONDITION_KW)) - .expect(t!(COLON)) - .then(|p| p.boolean_expr()) - .end_with_recovery(t!(R_BRACE)) - } - - /// Parses the condition block. + /// Parses the hex pattern block. /// /// ```text /// HEX_PATTERN := `{` HEX_SUB_PATTERN `}` @@ -1254,7 +1241,7 @@ impl<'src> ParserImpl<'src> { .end() } - /// Parses the condition block. + /// Parses the hex sub pattern block. /// /// ```text /// HEX_SUB_PATTERN := @@ -1311,6 +1298,19 @@ impl<'src> ParserImpl<'src> { .end() } + /// Parses the condition block. + /// + /// ```text + /// CONDITION_BLK := `condition` `:` BOOLEAN_EXPR + /// `` + fn condition_blk(&mut self) -> &mut Self { + self.begin(CONDITION_BLK) + .expect(t!(CONDITION_KW)) + .expect(t!(COLON)) + .then(|p| p.boolean_expr()) + .end_with_recovery(t!(R_BRACE)) + } + /// Parses a boolean expression. /// /// ```text @@ -1354,6 +1354,7 @@ impl<'src> ParserImpl<'src> { }) .alt(|p| p.for_expr()) .alt(|p| p.of_expr()) + .alt(|p| p.with_expr()) .alt(|p| { p.expr().zero_or_more(|p| { p.expect_d( @@ -1582,6 +1583,45 @@ impl<'src> ParserImpl<'src> { .end() } + /// Parses `with` expression. + /// + /// ```text + /// WITH_EXPR := + /// `with` WITH_IDENTIFIERS `:` + /// `(` + /// BOOLEAN_EXPR + /// `)` + /// ``` + fn with_expr(&mut self) -> &mut Self { + self.begin(WITH_EXPR) + .expect(t!(WITH_KW)) + .then(|p| p.with_identifier()) + .expect(t!(COLON)) + .expect(t!(L_PAREN)) + .then(|p| p.boolean_expr()) + .expect(t!(R_PAREN)) + .end() + } + + /// Parses `with` identifiers. + /// + /// ```text + /// WITH_IDENTIFIERS := + /// IDENT `=` EXPR (`,` IDENT `=` EXPR)* + /// + fn with_identifier(&mut self) -> &mut Self { + self.begin(WITH_IDENTIFIERS) + .expect(t!(IDENT)) + .expect(t!(EQUAL)) + .then(|p| p.expr()) + .zero_or_more(|p| { + p.expect(t!(COMMA)).then(|p| { + p.expect(t!(IDENT)).expect(t!(EQUAL)).then(|p| p.expr()) + }) + }) + .end() + } + /// Parses quantifier. /// /// ```text diff --git a/parser/src/parser/tests/testdata/with-1.ast b/parser/src/parser/tests/testdata/with-1.ast new file mode 100644 index 000000000..e11cbf2e2 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-1.ast @@ -0,0 +1,13 @@ + rule test + └─ condition + └─ with : ( ) + ├─ + │ ├─ two + │ └─ add + │ ├─ 1 + │ └─ 1 + └─ + └─ eq + ├─ two + └─ 2 + diff --git a/parser/src/parser/tests/testdata/with-1.cst b/parser/src/parser/tests/testdata/with-1.cst new file mode 100644 index 000000000..9cc0fb3b8 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-1.cst @@ -0,0 +1,57 @@ +SOURCE_FILE@0..78 + RULE_DECL@0..78 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + CONDITION_BLK@13..76 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + BOOLEAN_EXPR@26..76 + BOOLEAN_TERM@26..76 + WITH_EXPR@26..76 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + WITH_IDENTIFIERS@31..40 + IDENT@31..34 "two" + WHITESPACE@34..35 " " + EQUAL@35..36 "=" + WHITESPACE@36..37 " " + EXPR@37..40 + TERM@37..38 + PRIMARY_EXPR@37..38 + INTEGER_LIT@37..38 "1" + ADD@38..39 "+" + TERM@39..40 + PRIMARY_EXPR@39..40 + INTEGER_LIT@39..40 "1" + WHITESPACE@40..41 " " + COLON@41..42 ":" + WHITESPACE@42..43 " " + L_PAREN@43..44 "(" + WHITESPACE@44..45 " " + NEWLINE@45..46 "\n" + WHITESPACE@46..58 " " + BOOLEAN_EXPR@58..66 + BOOLEAN_TERM@58..66 + EXPR@58..61 + TERM@58..61 + PRIMARY_EXPR@58..61 + IDENT@58..61 "two" + WHITESPACE@61..62 " " + EQ@62..64 "==" + WHITESPACE@64..65 " " + EXPR@65..66 + TERM@65..66 + PRIMARY_EXPR@65..66 + INTEGER_LIT@65..66 "2" + NEWLINE@66..67 "\n" + WHITESPACE@67..75 " " + R_PAREN@75..76 ")" + NEWLINE@76..77 "\n" + R_BRACE@77..78 "}" diff --git a/parser/src/parser/tests/testdata/with-1.in b/parser/src/parser/tests/testdata/with-1.in new file mode 100644 index 000000000..70ce64c5b --- /dev/null +++ b/parser/src/parser/tests/testdata/with-1.in @@ -0,0 +1,6 @@ +rule test { + condition: + with two = 1+1 : ( + two == 2 + ) +} \ No newline at end of file diff --git a/parser/src/parser/tests/testdata/with-2.ast b/parser/src/parser/tests/testdata/with-2.ast new file mode 100644 index 000000000..d5547b68e --- /dev/null +++ b/parser/src/parser/tests/testdata/with-2.ast @@ -0,0 +1,35 @@ + rule test + └─ condition + └─ with : ( ) + ├─ + │ ├─ first + │ ├─ [] + │ │ ├─ + │ │ │ └─ field access + │ │ │ ├─ foo + │ │ │ └─ bar + │ │ └─ + │ │ └─ 0 + │ ├─ last + │ └─ [] + │ ├─ + │ │ └─ field access + │ │ ├─ foo + │ │ └─ bar + │ └─ + │ └─ sub + │ ├─ bar_num_of_sections + │ └─ 1 + └─ + └─ and + ├─ eq + │ ├─ field access + │ │ ├─ first + │ │ └─ name + │ └─ "foo" + └─ eq + ├─ field access + │ ├─ bar + │ └─ value + └─ 0x200 + diff --git a/parser/src/parser/tests/testdata/with-2.cst b/parser/src/parser/tests/testdata/with-2.cst new file mode 100644 index 000000000..745ed424b --- /dev/null +++ b/parser/src/parser/tests/testdata/with-2.cst @@ -0,0 +1,104 @@ +SOURCE_FILE@0..162 + RULE_DECL@0..162 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + CONDITION_BLK@13..160 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + BOOLEAN_EXPR@26..160 + BOOLEAN_TERM@26..160 + WITH_EXPR@26..160 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + WITH_IDENTIFIERS@31..90 + IDENT@31..36 "first" + WHITESPACE@36..37 " " + EQUAL@37..38 "=" + WHITESPACE@38..39 " " + EXPR@39..49 + TERM@39..49 + PRIMARY_EXPR@39..46 + IDENT@39..42 "foo" + DOT@42..43 "." + IDENT@43..46 "bar" + L_BRACKET@46..47 "[" + EXPR@47..48 + TERM@47..48 + PRIMARY_EXPR@47..48 + INTEGER_LIT@47..48 "0" + R_BRACKET@48..49 "]" + COMMA@49..50 "," + WHITESPACE@50..51 " " + IDENT@51..55 "last" + WHITESPACE@55..56 " " + EQUAL@56..57 "=" + WHITESPACE@57..58 " " + EXPR@58..90 + TERM@58..90 + PRIMARY_EXPR@58..65 + IDENT@58..61 "foo" + DOT@61..62 "." + IDENT@62..65 "bar" + L_BRACKET@65..66 "[" + EXPR@66..89 + TERM@66..85 + PRIMARY_EXPR@66..85 + IDENT@66..85 "bar_num_of_sections" + WHITESPACE@85..86 " " + SUB@86..87 "-" + WHITESPACE@87..88 " " + TERM@88..89 + PRIMARY_EXPR@88..89 + INTEGER_LIT@88..89 "1" + R_BRACKET@89..90 "]" + WHITESPACE@90..91 " " + COLON@91..92 ":" + WHITESPACE@92..93 " " + L_PAREN@93..94 "(" + WHITESPACE@94..95 " " + NEWLINE@95..96 "\n" + WHITESPACE@96..108 " " + BOOLEAN_EXPR@108..150 + BOOLEAN_TERM@108..127 + EXPR@108..118 + TERM@108..118 + PRIMARY_EXPR@108..118 + IDENT@108..113 "first" + DOT@113..114 "." + IDENT@114..118 "name" + WHITESPACE@118..119 " " + EQ@119..121 "==" + WHITESPACE@121..122 " " + EXPR@122..127 + TERM@122..127 + PRIMARY_EXPR@122..127 + STRING_LIT@122..127 "\"foo\"" + WHITESPACE@127..128 " " + AND_KW@128..131 "and" + WHITESPACE@131..132 " " + BOOLEAN_TERM@132..150 + EXPR@132..141 + TERM@132..141 + PRIMARY_EXPR@132..141 + IDENT@132..135 "bar" + DOT@135..136 "." + IDENT@136..141 "value" + WHITESPACE@141..142 " " + EQ@142..144 "==" + WHITESPACE@144..145 " " + EXPR@145..150 + TERM@145..150 + PRIMARY_EXPR@145..150 + INTEGER_LIT@145..150 "0x200" + NEWLINE@150..151 "\n" + WHITESPACE@151..159 " " + R_PAREN@159..160 ")" + NEWLINE@160..161 "\n" + R_BRACE@161..162 "}" diff --git a/parser/src/parser/tests/testdata/with-2.in b/parser/src/parser/tests/testdata/with-2.in new file mode 100644 index 000000000..955b3af63 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-2.in @@ -0,0 +1,6 @@ +rule test { + condition: + with first = foo.bar[0], last = foo.bar[bar_num_of_sections - 1] : ( + first.name == "foo" and bar.value == 0x200 + ) +} \ No newline at end of file diff --git a/parser/src/parser/tests/testdata/with-error-1.ast b/parser/src/parser/tests/testdata/with-error-1.ast new file mode 100644 index 000000000..6d9817af4 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-1.ast @@ -0,0 +1,2 @@ +ERRORS: +- SyntaxError { message: "expecting operator, `,` or `:`, found `}`", span: Span(91..92) } diff --git a/parser/src/parser/tests/testdata/with-error-1.cst b/parser/src/parser/tests/testdata/with-error-1.cst new file mode 100644 index 000000000..f8247b29b --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-1.cst @@ -0,0 +1,46 @@ +SOURCE_FILE@0..92 + RULE_DECL@0..92 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + ERROR@13..90 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + ERROR@26..26 + ERROR@26..26 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + IDENT@31..36 "first" + WHITESPACE@36..37 " " + EQUAL@37..38 "=" + WHITESPACE@38..39 " " + IDENT@39..42 "foo" + DOT@42..43 "." + IDENT@43..46 "bar" + L_BRACKET@46..47 "[" + INTEGER_LIT@47..48 "0" + R_BRACKET@48..49 "]" + WHITESPACE@49..50 " " + L_PAREN@50..51 "(" + WHITESPACE@51..52 " " + NEWLINE@52..53 "\n" + WHITESPACE@53..65 " " + IDENT@65..70 "first" + WHITESPACE@70..71 " " + EQ@71..73 "==" + WHITESPACE@73..74 " " + STRING_LIT@74..80 "\"test\"" + NEWLINE@80..81 "\n" + WHITESPACE@81..89 " " + R_PAREN@89..90 ")" + NEWLINE@90..91 "\n" + R_BRACE@91..92 "}" + +ERRORS: +- [91..92]: expecting operator, `,` or `:`, found `}` diff --git a/parser/src/parser/tests/testdata/with-error-1.in b/parser/src/parser/tests/testdata/with-error-1.in new file mode 100644 index 000000000..f5957c4c1 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-1.in @@ -0,0 +1,6 @@ +rule test { + condition: + with first = foo.bar[0] ( + first == "test" + ) +} \ No newline at end of file diff --git a/parser/src/parser/tests/testdata/with-error-2.ast b/parser/src/parser/tests/testdata/with-error-2.ast new file mode 100644 index 000000000..84f5b6f44 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-2.ast @@ -0,0 +1,2 @@ +ERRORS: +- SyntaxError { message: "expecting expression, `for`, `all`, `none`, `any` or `with`, found `)`", span: Span(62..63) } diff --git a/parser/src/parser/tests/testdata/with-error-2.cst b/parser/src/parser/tests/testdata/with-error-2.cst new file mode 100644 index 000000000..0f5bbe972 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-2.cst @@ -0,0 +1,40 @@ +SOURCE_FILE@0..65 + RULE_DECL@0..65 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + ERROR@13..63 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + ERROR@26..26 + ERROR@26..26 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + IDENT@31..36 "first" + WHITESPACE@36..37 " " + EQUAL@37..38 "=" + WHITESPACE@38..39 " " + IDENT@39..42 "foo" + DOT@42..43 "." + IDENT@43..46 "bar" + L_BRACKET@46..47 "[" + INTEGER_LIT@47..48 "0" + R_BRACKET@48..49 "]" + WHITESPACE@49..50 " " + COLON@50..51 ":" + WHITESPACE@51..52 " " + L_PAREN@52..53 "(" + NEWLINE@53..54 "\n" + WHITESPACE@54..62 " " + R_PAREN@62..63 ")" + NEWLINE@63..64 "\n" + R_BRACE@64..65 "}" + +ERRORS: +- [62..63]: expecting expression, `for`, `all`, `none`, `any` or `with`, found `)` diff --git a/parser/src/parser/tests/testdata/with-error-2.in b/parser/src/parser/tests/testdata/with-error-2.in new file mode 100644 index 000000000..6b6815ad6 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-2.in @@ -0,0 +1,5 @@ +rule test { + condition: + with first = foo.bar[0] : ( + ) +} \ No newline at end of file diff --git a/parser/src/parser/tests/testdata/with-error-3.ast b/parser/src/parser/tests/testdata/with-error-3.ast new file mode 100644 index 000000000..257bafc0b --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-3.ast @@ -0,0 +1,2 @@ +ERRORS: +- SyntaxError { message: "expecting identifier, found `:`", span: Span(31..32) } diff --git a/parser/src/parser/tests/testdata/with-error-3.cst b/parser/src/parser/tests/testdata/with-error-3.cst new file mode 100644 index 000000000..f0215d978 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-3.cst @@ -0,0 +1,32 @@ +SOURCE_FILE@0..63 + RULE_DECL@0..63 + RULE_KW@0..4 "rule" + WHITESPACE@4..5 " " + IDENT@5..9 "test" + WHITESPACE@9..10 " " + L_BRACE@10..11 "{" + NEWLINE@11..12 "\n" + WHITESPACE@12..13 "\t" + ERROR@13..61 + CONDITION_KW@13..22 "condition" + COLON@22..23 ":" + NEWLINE@23..24 "\n" + WHITESPACE@24..26 "\t\t" + ERROR@26..26 + ERROR@26..26 + WITH_KW@26..30 "with" + WHITESPACE@30..31 " " + COLON@31..32 ":" + WHITESPACE@32..33 " " + L_PAREN@33..34 "(" + NEWLINE@34..35 "\n" + WHITESPACE@35..47 " " + TRUE_KW@47..51 "true" + NEWLINE@51..52 "\n" + WHITESPACE@52..60 " " + R_PAREN@60..61 ")" + NEWLINE@61..62 "\n" + R_BRACE@62..63 "}" + +ERRORS: +- [31..32]: expecting identifier, found `:` diff --git a/parser/src/parser/tests/testdata/with-error-3.in b/parser/src/parser/tests/testdata/with-error-3.in new file mode 100644 index 000000000..47abfae31 --- /dev/null +++ b/parser/src/parser/tests/testdata/with-error-3.in @@ -0,0 +1,6 @@ +rule test { + condition: + with : ( + true + ) +} \ No newline at end of file diff --git a/parser/src/tokenizer/mod.rs b/parser/src/tokenizer/mod.rs index 1d965b6de..8ab20488e 100644 --- a/parser/src/tokenizer/mod.rs +++ b/parser/src/tokenizer/mod.rs @@ -338,6 +338,8 @@ enum NormalToken<'src> { Wide, #[token("xor")] Xor, + #[token("with")] + With, // Bitwise #[token("<<")] @@ -695,6 +697,7 @@ fn convert_normal_token(token: NormalToken, span: Span) -> Token { NormalToken::True => Token::TRUE_KW(span), NormalToken::Wide => Token::WIDE_KW(span), NormalToken::Xor => Token::XOR_KW(span), + NormalToken::With => Token::WITH_KW(span), // Bitwise. NormalToken::Shl => Token::SHL(span), diff --git a/parser/src/tokenizer/tokens.rs b/parser/src/tokenizer/tokens.rs index 6a5f29fa7..de582efb2 100644 --- a/parser/src/tokenizer/tokens.rs +++ b/parser/src/tokenizer/tokens.rs @@ -44,6 +44,7 @@ pub(crate) enum TokenId { TRUE_KW, WIDE_KW, XOR_KW, + WITH_KW, // Bitwise operators. SHL, @@ -152,6 +153,7 @@ impl TokenId { TokenId::TRUE_KW => "`true`", TokenId::WIDE_KW => "`wide`", TokenId::XOR_KW => "`xor`", + TokenId::WITH_KW => "`with`", // Bitwise operators. TokenId::SHL => "`<<`", @@ -251,6 +253,7 @@ pub(crate) enum Token { TRUE_KW(Span) = TokenId::TRUE_KW as u8, WIDE_KW(Span) = TokenId::WIDE_KW as u8, XOR_KW(Span) = TokenId::XOR_KW as u8, + WITH_KW(Span) = TokenId::WITH_KW as u8, // Bitwise operators. SHL(Span) = TokenId::SHL as u8, @@ -369,6 +372,7 @@ impl Token { | Token::TRUE_KW(span) | Token::WIDE_KW(span) | Token::XOR_KW(span) + | Token::WITH_KW(span) // Bitwise operators | Token::SHL(span) diff --git a/site/content/docs/writing_rules/conditions.md b/site/content/docs/writing_rules/conditions.md index 53388408d..52a20c627 100644 --- a/site/content/docs/writing_rules/conditions.md +++ b/site/content/docs/writing_rules/conditions.md @@ -472,6 +472,93 @@ The `for..in` operator is similar to `for..of`, but the latter iterates over a set of patterns, while the former iterates over ranges, enumerations, arrays and dictionaries. +## The "with" statement + +YARA-X now supports the `with` statement defined by [RFC](https://github.com/VirusTotal/yara/discussions/1783), which allows you to define identifiers +that holds the result of a boolean expression. Each identifier is local and is +valid only within the `with` statement. The syntax is: + +```yara +with + = [, = ]* : + ( + + ) +``` + +For example: + +```yara +rule WithExample { + condition: + with + first = foo.bar[0], + last = foo.bar[num_of_items - 1] : ( + first.text == last.text + ) +} +``` + +Using the `with` identifier outside of a `with` statement is not allowed. +Something like: + +```yara +rule WithExample { + condition: + with + first = foo.bar[0], + last = foo.bar[num_of_items - 1] : ( + first.text == last.text + ) + or last.text != first.text +} +``` + +is syntactically valid but it will raise a compilation error. + +Another usage of the `with` statement could be to avoid repeating the same +expression multiple times in the condition. Something like: + +```yara +pe.sections[0] .name == ".text" and +pe.sections[0].characteristics == 0xC0000000 and +pe.sections[0].raw_data_size == 0x2000 and +pe.sections[0].raw_data_offset == 0x1000 and +pe.sections[pe.number_of_sections - 1] .name == ".tls" and +pe.sections[pe.number_of_sections - 1].characteristics == 0xC0000000 and +pe.sections[pe.number_of_sections - 1].raw_data_size == 0x1000 and +pe.sections[pe.number_of_sections - 1].raw_data_offset == 0x4000 +``` + +can be rewritten as: + +```yara +with + fs = pe.sections[0], + ls = pe.sections[pe.number_of_sections - 1] : ( + fs.name == ".text" and + fs.name.characteristics == 0xC0000000 and + fs.name.raw_data_size == 0x2000 and + fs.name.raw_data_offset == 0x1000 and + ls.name == ".tls" and + ls.characteristics == 0xC0000000 and + ls.raw_data_size == 0x1000 and + ls.raw_data_offset == 0x4000 + ) +``` + +Another use case is to declare a variable that is used just in "for" loops: + +```yara +for all offset in (10,20,30) : ( + with val = uint64(offset) | uint64(offset + 4) | uint64(offset + 8) : ( + val == 0x10000 or + val == 0x20000 or + val == 0x40000 + ) +) +``` + ## Referencing other rules When writing the condition for a rule, you can also make reference to a diff --git a/site/content/docs/writing_rules/differences.md b/site/content/docs/writing_rules/differences.md index 99609e063..db9fc05bc 100644 --- a/site/content/docs/writing_rules/differences.md +++ b/site/content/docs/writing_rules/differences.md @@ -196,6 +196,37 @@ But this is not valid... 1 of (some_rule*) ``` +## The "with" statement +YARA-X now supports the `with` statement, which allows you to define identifiers +that holds the result of a boolean expression. Each identifier is local and is valid +only within the `with` statement. For example: + +``` +with + a = 1 + 1, + b = 2 : ( + a == b + ) +``` + +This is also useful to avoid repeating the same expression multiple times in the +condition. For example: + +``` +with + a = foo.bar[0], + b = foo.bar[1] : ( + a.name == b.name or + a.value == 0x10 or + b.value == 0x20 or + a.value == b.value + ) +``` + +This is something that was not present in YARA 4.x and you had to repeat the +expression multiple times. + + ## Using xor and fullword together In YARA 4.x the combination `xor` and `fullword` looks for the bytes before From 3858aafe1452bba33aa2334bc6da55ced10a15f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Euri=C5=A1?= Date: Wed, 18 Sep 2024 11:31:07 +0200 Subject: [PATCH 2/3] fix: change identifiers to declarations, added tests, minor refactoring --- lib/src/compiler/emit.rs | 5 +- lib/src/compiler/ir/ast2ir.rs | 12 ++- lib/src/compiler/ir/dfs.rs | 2 +- lib/src/compiler/ir/mod.rs | 3 +- lib/src/tests/mod.rs | 22 ++++- parser/src/ast/ascii_tree.rs | 11 ++- parser/src/ast/cst2ast.rs | 31 ++++--- parser/src/ast/mod.rs | 10 +-- parser/src/cst/syntax_kind.rs | 3 +- parser/src/parser/mod.rs | 32 +++++--- parser/src/parser/tests/testdata/with-1.cst | 27 ++++--- parser/src/parser/tests/testdata/with-2.cst | 80 ++++++++++--------- .../content/docs/writing_rules/differences.md | 1 + 13 files changed, 139 insertions(+), 100 deletions(-) diff --git a/lib/src/compiler/emit.rs b/lib/src/compiler/emit.rs index 5da10af31..3bab70f35 100644 --- a/lib/src/compiler/emit.rs +++ b/lib/src/compiler/emit.rs @@ -2152,9 +2152,8 @@ fn emit_with( with: &mut With, ) { // Emit the code that sets the variables in the `with` statement. - for (idx, &item) in with.identifiers.iter().enumerate() { - let expr = &mut with.expressions[idx]; - set_var(ctx, instr, item, |ctx, instr| { + for (id, expr) in with.declarations.iter_mut() { + set_var(ctx, instr, *id, |ctx, instr| { emit_expr(ctx, instr, expr); }); } diff --git a/lib/src/compiler/ir/ast2ir.rs b/lib/src/compiler/ir/ast2ir.rs index 3140bbc27..f7ed0fd61 100644 --- a/lib/src/compiler/ir/ast2ir.rs +++ b/lib/src/compiler/ir/ast2ir.rs @@ -1124,22 +1124,20 @@ fn with_expr_from_ast( with: &ast::With, ) -> Result { // Create stack frame with capacity for the with statement variables - let mut stack_frame = ctx.vars.new_frame(with.items.len() as i32); + let mut stack_frame = ctx.vars.new_frame(with.declarations.len() as i32); let mut symbols = SymbolTable::new(); - let mut identifiers = Vec::new(); - let mut expressions = Vec::new(); + let mut declarations = Vec::new(); // Iterate over all items in the with statement and create a new variable // for each one. Both identifiers and corresponding expressions are stored // in separate vectors. - for item in with.items.iter() { + for item in with.declarations.iter() { let type_value = expr_from_ast(ctx, &item.expression)? .type_value() .clone_without_value(); let var = stack_frame.new_var(type_value.ty()); - identifiers.push(var); - expressions.push(expr_from_ast(ctx, &item.expression)?); + declarations.push((var, expr_from_ast(ctx, &item.expression)?)); // Insert the variable into the symbol table. symbols.insert( @@ -1158,7 +1156,7 @@ fn with_expr_from_ast( ctx.vars.unwind(&stack_frame); - Ok(Expr::With(Box::new(With { identifiers, expressions, condition }))) + Ok(Expr::With(Box::new(With { declarations, condition }))) } fn iterable_from_ast( diff --git a/lib/src/compiler/ir/dfs.rs b/lib/src/compiler/ir/dfs.rs index baa83c1a7..edbe8cb9d 100644 --- a/lib/src/compiler/ir/dfs.rs +++ b/lib/src/compiler/ir/dfs.rs @@ -216,7 +216,7 @@ impl<'a> Iterator for DepthFirstSearch<'a> { Expr::With(with) => { self.stack.push(Event::Enter(&with.condition)); - for expr in with.expressions.iter().rev() { + for (_id, expr) in with.declarations.iter().rev() { self.stack.push(Event::Enter(expr)) } } diff --git a/lib/src/compiler/ir/mod.rs b/lib/src/compiler/ir/mod.rs index dcfb466d4..f360298be 100644 --- a/lib/src/compiler/ir/mod.rs +++ b/lib/src/compiler/ir/mod.rs @@ -616,8 +616,7 @@ pub(in crate::compiler) struct ForIn { /// A `with` expression (e.g `with $a, $b : (..)`) #[derive(Debug)] pub(in crate::compiler) struct With { - pub identifiers: Vec, - pub expressions: Vec, + pub declarations: Vec<(Var, Expr)>, pub condition: Expr, } diff --git a/lib/src/tests/mod.rs b/lib/src/tests/mod.rs index abe01f619..76d3cc2a7 100644 --- a/lib/src/tests/mod.rs +++ b/lib/src/tests/mod.rs @@ -534,11 +534,31 @@ fn with() { r#"with foo = test_proto2.map_string_struct["foo"].nested_int64_one: (foo == 1)"# ); + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"with foo = test_proto2.nested: (foo.nested_int64_one == 1 )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_false!( + r#"with foo = test_proto2.nested: (foo.nested_int64_one == 0 )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_true!( + r#"with foo = test_proto2.uppercase("foo"): (foo == "FOO" )"# + ); + + #[cfg(feature = "test_proto2-module")] + condition_false!( + r#"with foo = test_proto2.uppercase("foo"): (foo == "FoO" )"# + ); + #[cfg(feature = "test_proto2-module")] condition_true!( r#"with bar = test_proto2.array_string[1], - baz = test_proto2.array_string[2] : + baz = test_proto2.array_string[2]: ( bar == "bar" and baz == "baz" ) diff --git a/parser/src/ast/ascii_tree.rs b/parser/src/ast/ascii_tree.rs index c743b933e..cd6e7743e 100644 --- a/parser/src/ast/ascii_tree.rs +++ b/parser/src/ast/ascii_tree.rs @@ -429,7 +429,10 @@ pub(crate) fn expr_ascii_tree(expr: &Expr) -> Tree { vec![ Node( "".to_string(), - w.items.iter().flat_map(with_items_ascii_tree).collect(), + w.declarations + .iter() + .flat_map(with_items_ascii_tree) + .collect(), ), Node( "".to_string(), @@ -440,10 +443,10 @@ pub(crate) fn expr_ascii_tree(expr: &Expr) -> Tree { } } -fn with_items_ascii_tree(item: &WithItems) -> Vec { +fn with_items_ascii_tree(declaration: &WithDeclaration) -> Vec { vec![ - Leaf(vec![format!("{}", item.identifier.name)]), - expr_ascii_tree(&item.expression), + Leaf(vec![format!("{}", declaration.identifier.name)]), + expr_ascii_tree(&declaration.expression), ] } diff --git a/parser/src/ast/cst2ast.rs b/parser/src/ast/cst2ast.rs index 77fd4f234..d00aef31c 100644 --- a/parser/src/ast/cst2ast.rs +++ b/parser/src/ast/cst2ast.rs @@ -1024,26 +1024,31 @@ impl<'src> Builder<'src> { let mut span = self.expect(WITH_KW)?; - self.begin(WITH_IDENTIFIERS)?; + self.begin(WITH_DECLS)?; - let item = |i: &mut Self| -> Result, Abort> { - let identifier = i.identifier()?; - let mut span = identifier.span(); - span = span.combine(&i.expect(EQUAL)?); - let expression = i.expr()?; - span = span.combine(&expression.span()); + let declaration = + |i: &mut Self| -> Result, Abort> { + i.begin(WITH_DECL)?; - Ok(WithItems { span, identifier, expression }) - }; + let identifier = i.identifier()?; + let mut span = identifier.span(); + span = span.combine(&i.expect(EQUAL)?); + let expression = i.expr()?; + span = span.combine(&expression.span()); - let mut items = vec![item(self)?]; + i.end(WITH_DECL)?; + + Ok(WithDeclaration { span, identifier, expression }) + }; + + let mut declarations = vec![declaration(self)?]; while let Event::Token { kind: COMMA, .. } = self.peek() { self.expect(COMMA)?; - items.push(item(self)?); + declarations.push(declaration(self)?); } - self.end(WITH_IDENTIFIERS)?; + self.end(WITH_DECLS)?; self.expect(COLON)?; self.expect(L_PAREN)?; @@ -1054,7 +1059,7 @@ impl<'src> Builder<'src> { self.end(WITH_EXPR)?; - Ok(Expr::With(Box::new(With { span, items, condition }))) + Ok(Expr::With(Box::new(With { span, declarations, condition }))) } fn quantifier(&mut self) -> Result, Abort> { diff --git a/parser/src/ast/mod.rs b/parser/src/ast/mod.rs index 3831bbb54..3e4a6f1e4 100644 --- a/parser/src/ast/mod.rs +++ b/parser/src/ast/mod.rs @@ -364,17 +364,17 @@ pub enum OfItems<'src> { BoolExprTuple(Vec>), } -/// A `with` expression (e.g `with $a : (..)`) +/// A `with` expression (e.g `with foo = 1 + 1 : (..)`) #[derive(Debug)] pub struct With<'src> { span: Span, - pub items: Vec>, + pub declarations: Vec>, pub condition: Expr<'src>, } /// Items in a `with` expression. #[derive(Debug)] -pub struct WithItems<'src> { +pub struct WithDeclaration<'src> { span: Span, pub identifier: Ident<'src>, pub expression: Expr<'src>, @@ -593,7 +593,7 @@ pub enum Expr<'src> { /// A `for in ...` expression. (e.g. `for all i in (1..100) : ( ... )`) ForIn(Box>), - /// A `with` expression (e.g. `with $a : ( ... )`) + /// A `with` expression (e.g. `with foo = 1 + 1 : ( ... )`) With(Box>), } @@ -1072,7 +1072,7 @@ impl WithSpan for With<'_> { } } -impl WithSpan for WithItems<'_> { +impl WithSpan for WithDeclaration<'_> { fn span(&self) -> Span { self.span.clone() } diff --git a/parser/src/cst/syntax_kind.rs b/parser/src/cst/syntax_kind.rs index 941cae9e8..dfc5ca3bd 100644 --- a/parser/src/cst/syntax_kind.rs +++ b/parser/src/cst/syntax_kind.rs @@ -129,7 +129,8 @@ pub enum SyntaxKind { FOR_EXPR, OF_EXPR, WITH_EXPR, - WITH_IDENTIFIERS, + WITH_DECLS, + WITH_DECL, ITERABLE, QUANTIFIER, EXPR_TUPLE, diff --git a/parser/src/parser/mod.rs b/parser/src/parser/mod.rs index b551387b6..4c94df83f 100644 --- a/parser/src/parser/mod.rs +++ b/parser/src/parser/mod.rs @@ -1587,7 +1587,7 @@ impl<'src> ParserImpl<'src> { /// /// ```text /// WITH_EXPR := - /// `with` WITH_IDENTIFIERS `:` + /// `with` WITH_DECLS `:` /// `(` /// BOOLEAN_EXPR /// `)` @@ -1595,7 +1595,7 @@ impl<'src> ParserImpl<'src> { fn with_expr(&mut self) -> &mut Self { self.begin(WITH_EXPR) .expect(t!(WITH_KW)) - .then(|p| p.with_identifier()) + .then(|p| p.with_declarations()) .expect(t!(COLON)) .expect(t!(L_PAREN)) .then(|p| p.boolean_expr()) @@ -1606,19 +1606,29 @@ impl<'src> ParserImpl<'src> { /// Parses `with` identifiers. /// /// ```text - /// WITH_IDENTIFIERS := - /// IDENT `=` EXPR (`,` IDENT `=` EXPR)* + /// WITH_DECLS := + /// WITH_DECL (`,` WITH_DECL)* /// - fn with_identifier(&mut self) -> &mut Self { - self.begin(WITH_IDENTIFIERS) + fn with_declarations(&mut self) -> &mut Self { + self.begin(WITH_DECLS) + .then(|p| p.with_declaration()) + .zero_or_more(|p| { + p.expect(t!(COMMA)).then(|p| p.with_declaration()) + }) + .end() + } + + /// Parses a `with` declaration. + /// + /// ```text + /// WITH_DECL := + /// IDENT `=` EXPR + /// ``` + fn with_declaration(&mut self) -> &mut Self { + self.begin(WITH_DECL) .expect(t!(IDENT)) .expect(t!(EQUAL)) .then(|p| p.expr()) - .zero_or_more(|p| { - p.expect(t!(COMMA)).then(|p| { - p.expect(t!(IDENT)).expect(t!(EQUAL)).then(|p| p.expr()) - }) - }) .end() } diff --git a/parser/src/parser/tests/testdata/with-1.cst b/parser/src/parser/tests/testdata/with-1.cst index 9cc0fb3b8..881db0037 100644 --- a/parser/src/parser/tests/testdata/with-1.cst +++ b/parser/src/parser/tests/testdata/with-1.cst @@ -17,19 +17,20 @@ SOURCE_FILE@0..78 WITH_EXPR@26..76 WITH_KW@26..30 "with" WHITESPACE@30..31 " " - WITH_IDENTIFIERS@31..40 - IDENT@31..34 "two" - WHITESPACE@34..35 " " - EQUAL@35..36 "=" - WHITESPACE@36..37 " " - EXPR@37..40 - TERM@37..38 - PRIMARY_EXPR@37..38 - INTEGER_LIT@37..38 "1" - ADD@38..39 "+" - TERM@39..40 - PRIMARY_EXPR@39..40 - INTEGER_LIT@39..40 "1" + WITH_DECLS@31..40 + WITH_DECL@31..40 + IDENT@31..34 "two" + WHITESPACE@34..35 " " + EQUAL@35..36 "=" + WHITESPACE@36..37 " " + EXPR@37..40 + TERM@37..38 + PRIMARY_EXPR@37..38 + INTEGER_LIT@37..38 "1" + ADD@38..39 "+" + TERM@39..40 + PRIMARY_EXPR@39..40 + INTEGER_LIT@39..40 "1" WHITESPACE@40..41 " " COLON@41..42 ":" WHITESPACE@42..43 " " diff --git a/parser/src/parser/tests/testdata/with-2.cst b/parser/src/parser/tests/testdata/with-2.cst index 745ed424b..7e7dffbf2 100644 --- a/parser/src/parser/tests/testdata/with-2.cst +++ b/parser/src/parser/tests/testdata/with-2.cst @@ -17,47 +17,49 @@ SOURCE_FILE@0..162 WITH_EXPR@26..160 WITH_KW@26..30 "with" WHITESPACE@30..31 " " - WITH_IDENTIFIERS@31..90 - IDENT@31..36 "first" - WHITESPACE@36..37 " " - EQUAL@37..38 "=" - WHITESPACE@38..39 " " - EXPR@39..49 - TERM@39..49 - PRIMARY_EXPR@39..46 - IDENT@39..42 "foo" - DOT@42..43 "." - IDENT@43..46 "bar" - L_BRACKET@46..47 "[" - EXPR@47..48 - TERM@47..48 - PRIMARY_EXPR@47..48 - INTEGER_LIT@47..48 "0" - R_BRACKET@48..49 "]" + WITH_DECLS@31..90 + WITH_DECL@31..49 + IDENT@31..36 "first" + WHITESPACE@36..37 " " + EQUAL@37..38 "=" + WHITESPACE@38..39 " " + EXPR@39..49 + TERM@39..49 + PRIMARY_EXPR@39..46 + IDENT@39..42 "foo" + DOT@42..43 "." + IDENT@43..46 "bar" + L_BRACKET@46..47 "[" + EXPR@47..48 + TERM@47..48 + PRIMARY_EXPR@47..48 + INTEGER_LIT@47..48 "0" + R_BRACKET@48..49 "]" COMMA@49..50 "," WHITESPACE@50..51 " " - IDENT@51..55 "last" - WHITESPACE@55..56 " " - EQUAL@56..57 "=" - WHITESPACE@57..58 " " - EXPR@58..90 - TERM@58..90 - PRIMARY_EXPR@58..65 - IDENT@58..61 "foo" - DOT@61..62 "." - IDENT@62..65 "bar" - L_BRACKET@65..66 "[" - EXPR@66..89 - TERM@66..85 - PRIMARY_EXPR@66..85 - IDENT@66..85 "bar_num_of_sections" - WHITESPACE@85..86 " " - SUB@86..87 "-" - WHITESPACE@87..88 " " - TERM@88..89 - PRIMARY_EXPR@88..89 - INTEGER_LIT@88..89 "1" - R_BRACKET@89..90 "]" + WITH_DECL@51..90 + IDENT@51..55 "last" + WHITESPACE@55..56 " " + EQUAL@56..57 "=" + WHITESPACE@57..58 " " + EXPR@58..90 + TERM@58..90 + PRIMARY_EXPR@58..65 + IDENT@58..61 "foo" + DOT@61..62 "." + IDENT@62..65 "bar" + L_BRACKET@65..66 "[" + EXPR@66..89 + TERM@66..85 + PRIMARY_EXPR@66..85 + IDENT@66..85 "bar_num_of_sections" + WHITESPACE@85..86 " " + SUB@86..87 "-" + WHITESPACE@87..88 " " + TERM@88..89 + PRIMARY_EXPR@88..89 + INTEGER_LIT@88..89 "1" + R_BRACKET@89..90 "]" WHITESPACE@90..91 " " COLON@91..92 ":" WHITESPACE@92..93 " " diff --git a/site/content/docs/writing_rules/differences.md b/site/content/docs/writing_rules/differences.md index db9fc05bc..d8b9ab5d2 100644 --- a/site/content/docs/writing_rules/differences.md +++ b/site/content/docs/writing_rules/differences.md @@ -197,6 +197,7 @@ But this is not valid... ``` ## The "with" statement + YARA-X now supports the `with` statement, which allows you to define identifiers that holds the result of a boolean expression. Each identifier is local and is valid only within the `with` statement. For example: From f4d09d8640c5607ce7827348f1f8b373d62a893d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20=C4=8Euri=C5=A1?= Date: Thu, 19 Sep 2024 12:00:30 +0200 Subject: [PATCH 3/3] fix: resolve merge conflicts --- lib/src/compiler/tests/testdata/errors/134.in | 4 ---- lib/src/compiler/tests/testdata/errors/134.out | 6 ------ lib/src/compiler/tests/testdata/errors/135.in | 3 +-- lib/src/compiler/tests/testdata/errors/135.out | 8 ++++---- lib/src/compiler/tests/testdata/errors/136.in | 5 +++++ lib/src/compiler/tests/testdata/errors/136.out | 6 ++++++ 6 files changed, 16 insertions(+), 16 deletions(-) delete mode 100644 lib/src/compiler/tests/testdata/errors/134.in delete mode 100644 lib/src/compiler/tests/testdata/errors/134.out create mode 100644 lib/src/compiler/tests/testdata/errors/136.in create mode 100644 lib/src/compiler/tests/testdata/errors/136.out diff --git a/lib/src/compiler/tests/testdata/errors/134.in b/lib/src/compiler/tests/testdata/errors/134.in deleted file mode 100644 index f2edb2bc2..000000000 --- a/lib/src/compiler/tests/testdata/errors/134.in +++ /dev/null @@ -1,4 +0,0 @@ -rule test { - condition: - with foo = "foo" : ( bar ) -} \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/134.out b/lib/src/compiler/tests/testdata/errors/134.out deleted file mode 100644 index 94faa02ba..000000000 --- a/lib/src/compiler/tests/testdata/errors/134.out +++ /dev/null @@ -1,6 +0,0 @@ -error[E009]: unknown identifier `bar` - --> line:3:26 - | -3 | with foo = "foo" : ( bar ) - | ^^^ this identifier has not been declared - | \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/135.in b/lib/src/compiler/tests/testdata/errors/135.in index b2fda0fc6..f2edb2bc2 100644 --- a/lib/src/compiler/tests/testdata/errors/135.in +++ b/lib/src/compiler/tests/testdata/errors/135.in @@ -1,5 +1,4 @@ rule test { condition: - with foo = "foo" : ( foo == "foo" ) - and foo == "foo" + with foo = "foo" : ( bar ) } \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/135.out b/lib/src/compiler/tests/testdata/errors/135.out index cee8b78e0..94faa02ba 100644 --- a/lib/src/compiler/tests/testdata/errors/135.out +++ b/lib/src/compiler/tests/testdata/errors/135.out @@ -1,6 +1,6 @@ -error[E009]: unknown identifier `foo` - --> line:4:12 +error[E009]: unknown identifier `bar` + --> line:3:26 | -4 | and foo == "foo" - | ^^^ this identifier has not been declared +3 | with foo = "foo" : ( bar ) + | ^^^ this identifier has not been declared | \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/136.in b/lib/src/compiler/tests/testdata/errors/136.in new file mode 100644 index 000000000..b2fda0fc6 --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/136.in @@ -0,0 +1,5 @@ +rule test { + condition: + with foo = "foo" : ( foo == "foo" ) + and foo == "foo" +} \ No newline at end of file diff --git a/lib/src/compiler/tests/testdata/errors/136.out b/lib/src/compiler/tests/testdata/errors/136.out new file mode 100644 index 000000000..cee8b78e0 --- /dev/null +++ b/lib/src/compiler/tests/testdata/errors/136.out @@ -0,0 +1,6 @@ +error[E009]: unknown identifier `foo` + --> line:4:12 + | +4 | and foo == "foo" + | ^^^ this identifier has not been declared + | \ No newline at end of file