Skip to content

Commit

Permalink
refactor: code reorganization
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Aug 11, 2023
1 parent d03c86c commit 424b1c3
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 105 deletions.
4 changes: 1 addition & 3 deletions yara-x/src/compiler/atoms/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ will end up using the `"Look"` atom alone, but in `/a(bcd|efg)h/` atoms `"bcd"`
and `"efg"` will be used because `"a"` and `"h"` are too short.
*/

pub mod base64;
mod mask;
mod quality;

use std::collections::Bound;
Expand All @@ -68,10 +66,10 @@ use regex_syntax::hir::literal::Literal;
use serde::{Deserialize, Serialize};
use smallvec::{SmallVec, ToSmallVec};

pub(crate) use crate::compiler::atoms::mask::ByteMaskCombinator;
pub(crate) use crate::compiler::atoms::quality::atom_quality;
pub(crate) use crate::compiler::atoms::quality::seq_quality;
pub(crate) use crate::compiler::atoms::quality::SeqQuality;

use crate::compiler::{SubPatternFlagSet, SubPatternFlags};

/// The number of bytes that every atom *should* have. Some atoms may be
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ pub(crate) fn base64_patterns(

#[cfg(test)]
mod test {
use crate::compiler::atoms::base64::base64_patterns;
use super::base64_patterns;
use bstr::BString;
use pretty_assertions::assert_eq;

Expand Down
76 changes: 73 additions & 3 deletions yara-x/src/compiler/ir/hex2hir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use regex_syntax::hir;

use yara_x_parser::ast;

use crate::re::hir::hex_byte_to_class;
use super::mask::ByteMaskCombinator;

pub(in crate::compiler) fn hex_pattern_hir_from_ast(
pattern: &ast::HexPattern,
Expand Down Expand Up @@ -66,15 +66,30 @@ fn hex_byte_hir_from_ast(byte: &ast::HexByte) -> hir::Hir {
match byte.mask {
0xff => hir::Hir::literal([byte.value]),
0x00 => hir::Hir::dot(hir::Dot::AnyByte),
_ => hir::Hir::class(hir::Class::Bytes(hex_byte_to_class(*byte))),
_ => hir::Hir::class(hir::Class::Bytes(hex_byte_to_class(byte))),
}
}

fn hex_byte_to_class(b: &ast::HexByte) -> hir::ClassBytes {
// A zero bit in the mask indicates that the corresponding bit in the value
// must will be ignored, but those ignored bits should be set to 0.
assert_eq!(b.value & !b.mask, 0);

let mut class = hir::ClassBytes::empty();
for b in ByteMaskCombinator::new(b.value, b.mask) {
class.push(hir::ClassBytesRange::new(b, b));
}

class
}

#[cfg(test)]
mod tests {
use super::hex_byte_to_class;
use crate::re::hir::class_to_hex_byte;
use pretty_assertions::assert_eq;
use regex_syntax::hir::{
Class, ClassBytes, ClassBytesRange, Dot, Hir, Repetition,
Class, ClassBytes, ClassBytesRange, Dot, Hir, HirKind, Repetition,
};
use yara_x_parser::ast::{
HexAlternative, HexByte, HexJump, HexToken, HexTokens,
Expand Down Expand Up @@ -237,4 +252,59 @@ mod tests {
])
);
}

#[test]
fn class_to_hex() {
assert_eq!(
class_to_hex_byte(&hex_byte_to_class(&HexByte {
value: 0x30,
mask: 0xF0
})),
Some(HexByte { value: 0x30, mask: 0xF0 })
);

assert_eq!(
class_to_hex_byte(&hex_byte_to_class(&HexByte {
value: 0x05,
mask: 0x0F
})),
Some(HexByte { value: 0x05, mask: 0x0F })
);

assert_eq!(
class_to_hex_byte(&hex_byte_to_class(&HexByte {
value: 0x08,
mask: 0xAA
})),
Some(HexByte { value: 0x08, mask: 0xAA })
);

assert_eq!(
class_to_hex_byte(&ClassBytes::new(vec![
ClassBytesRange::new(3, 4),
ClassBytesRange::new(8, 8),
])),
None,
);

assert_eq!(
class_to_hex_byte(&ClassBytes::new(vec![
ClassBytesRange::new(0, 0),
ClassBytesRange::new(2, 2),
ClassBytesRange::new(4, 4),
])),
None,
);

if let HirKind::Class(Class::Bytes(class)) =
Hir::dot(Dot::AnyByte).kind()
{
assert_eq!(
class_to_hex_byte(class),
Some(HexByte { value: 0x00, mask: 0x00 })
);
} else {
unreachable!()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ impl Iterator for ByteMaskCombinator {

#[cfg(test)]
mod tests {
use crate::compiler::atoms::mask::ByteMaskCombinator;
use super::ByteMaskCombinator;
use pretty_assertions::assert_eq;

#[test]
Expand Down
1 change: 1 addition & 0 deletions yara-x/src/compiler/ir/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ use crate::re;

mod ast2ir;
mod hex2hir;
pub mod mask;

bitmask! {
/// Flags associated to rule patterns.
Expand Down
3 changes: 2 additions & 1 deletion yara-x/src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ use yara_x_parser::report::ReportBuilder;
use yara_x_parser::warnings::Warning;
use yara_x_parser::{Parser, SourceCode};

use crate::compiler::atoms::base64::base64_patterns;
use crate::compiler::base64::base64_patterns;
use crate::compiler::emit::emit_rule_condition;
use crate::compiler::{Context, VarStack};
use crate::modules::BUILTIN_MODULES;
Expand Down Expand Up @@ -65,6 +65,7 @@ mod errors;
mod ir;
mod rules;

pub mod base64;
#[cfg(test)]
mod tests;

Expand Down
12 changes: 1 addition & 11 deletions yara-x/src/re/compiler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,7 @@ use thiserror::Error;
use yara_x_parser::ast::HexByte;

use crate::compiler::{
atom_quality, best_atom_from_slice, seq_quality, Atom, SeqQuality,
DESIRED_ATOM_SIZE,
best_atom_from_slice, seq_quality, Atom, SeqQuality, DESIRED_ATOM_SIZE,
};
use crate::re;
use crate::re::hir::class_to_hex_byte;
Expand Down Expand Up @@ -988,15 +987,6 @@ impl hir::Visitor for &mut Compiler {
}
}

fn seq_quality2(seq: &Seq) -> i16 {
seq.literals()
.unwrap_or(&[])
.iter()
.map(|lit| atom_quality(lit.as_bytes()))
.min()
.unwrap_or(-1) as i16
}

fn simplify_seq(seq: Seq) -> Seq {
// If the literal extractor produced exactly 256 atoms, and those atoms
// have a common prefix that is one byte shorter than the longest atom,
Expand Down
90 changes: 10 additions & 80 deletions yara-x/src/re/hir.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
use std::ops::RangeInclusive;

use regex_syntax;
use regex_syntax::hir::{Class, ClassBytes, ClassBytesRange, HirKind};

use crate::compiler::ByteMaskCombinator;
use crate::utils::cast;
use std::ops::RangeInclusive;
use yara_x_parser::ast::HexByte;

use crate::utils::cast;

pub use regex_syntax::hir::Class;
pub use regex_syntax::hir::ClassBytes;
pub use regex_syntax::hir::HirKind;

#[derive(Debug, PartialEq)]
pub(crate) struct ChainedPattern {
pub gap: RangeInclusive<u32>,
Expand Down Expand Up @@ -277,32 +281,13 @@ pub fn class_to_hex_byte(c: &ClassBytes) -> Option<HexByte> {
Some(HexByte { value: smallest_byte, mask: !neg_mask })
}

pub fn hex_byte_to_class(b: HexByte) -> ClassBytes {
// A zero bit in the mask indicates that the corresponding bit in the value
// must will be ignored, but those ignored bits should be set to 0.
assert_eq!(b.value & !b.mask, 0);

let mut class = ClassBytes::empty();
for b in ByteMaskCombinator::new(b.value, b.mask) {
class.push(ClassBytesRange::new(b, b));
}

class
}

#[cfg(test)]
mod tests {
use pretty_assertions::assert_eq;
use regex_syntax::hir::{
Class, ClassBytes, ClassBytesRange, Dot, HirKind, Repetition,
};

use yara_x_parser::ast::HexByte;
use regex_syntax::hir::{Dot, Repetition};

use super::Hir;
use crate::re::hir::{
class_to_hex_byte, hex_byte_to_class, ChainedPattern,
};
use crate::re::hir::ChainedPattern;

#[test]
fn split() {
Expand Down Expand Up @@ -427,59 +412,4 @@ mod tests {
)
);
}

#[test]
fn mask() {
assert_eq!(
class_to_hex_byte(&hex_byte_to_class(HexByte {
value: 0x30,
mask: 0xF0
})),
Some(HexByte { value: 0x30, mask: 0xF0 })
);

assert_eq!(
class_to_hex_byte(&hex_byte_to_class(HexByte {
value: 0x05,
mask: 0x0F
})),
Some(HexByte { value: 0x05, mask: 0x0F })
);

assert_eq!(
class_to_hex_byte(&hex_byte_to_class(HexByte {
value: 0x08,
mask: 0xAA
})),
Some(HexByte { value: 0x08, mask: 0xAA })
);

assert_eq!(
class_to_hex_byte(&ClassBytes::new(vec![
ClassBytesRange::new(3, 4),
ClassBytesRange::new(8, 8),
])),
None,
);

assert_eq!(
class_to_hex_byte(&ClassBytes::new(vec![
ClassBytesRange::new(0, 0),
ClassBytesRange::new(2, 2),
ClassBytesRange::new(4, 4),
])),
None,
);

let hir = Hir::dot(Dot::AnyByte);

if let HirKind::Class(Class::Bytes(class)) = hir.kind() {
assert_eq!(
class_to_hex_byte(class),
Some(HexByte { value: 0x00, mask: 0x00 })
);
} else {
unreachable!()
}
}
}
9 changes: 4 additions & 5 deletions yara-x/src/re/tests.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
use pretty_assertions::assert_eq;
use regex_syntax::hir::Class;

use yara_x_parser::ast;
use yara_x_parser::ast::HexByte;

use super::compiler::{Compiler, Location, RegexpAtom};
use crate::compiler::Atom;
use crate::re;
use crate::re::hir::hex_byte_to_class;
use crate::re::hir::Hir;
use crate::re::instr::{
epsilon_closure, BckCodeLoc, EpsilonClosureState, FwdCodeLoc,
Expand Down Expand Up @@ -979,12 +976,13 @@ fn re_code_20() {
);
}

/*
#[test]
fn re_code_21() {
let (forward_code, backward_code, atoms) = Compiler::new()
.compile(&Hir::concat(vec![
Hir::literal([0x01, 0x02]),
Hir::class(Class::Bytes(hex_byte_to_class(HexByte {
Hir::class(Hir::Class::Bytes(hex_byte_to_class(ast::HexByte {
value: 0x00,
mask: 0xFC,
}))),
Expand Down Expand Up @@ -1042,7 +1040,7 @@ fn re_code_22() {
let (forward_code, backward_code, atoms) = Compiler::new()
.compile(&Hir::concat(vec![
Hir::literal([0x01, 0x02]),
Hir::class(Class::Bytes(hex_byte_to_class(HexByte {
Hir::class(Hir::Class::Bytes(hex_byte_to_class(ast::HexByte {
value: 0x10,
mask: 0xF0,
}))),
Expand Down Expand Up @@ -1090,3 +1088,4 @@ fn re_code_22() {
},]
);
}
*/

0 comments on commit 424b1c3

Please sign in to comment.