Skip to content

Commit

Permalink
Merge pull request #287 from SigmaHQ:str_regex
Browse files Browse the repository at this point in the history
String pattern to regular excpression conversion
  • Loading branch information
thomaspatzke authored Oct 13, 2024
2 parents 4aec9da + 903b8a2 commit 8738362
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 4 deletions.
12 changes: 8 additions & 4 deletions sigma/conversion/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -807,7 +807,7 @@ class variables. If this is not sufficient, the respective methods can be implem
endswith_expression: ClassVar[Optional[str]] = None
contains_expression: ClassVar[Optional[str]] = None
wildcard_match_expression: ClassVar[Optional[str]] = (
None # Special expression if wildcards can't be matched with the eq_token operator
None # Special expression if wildcards can't be matched with the eq_token operator.
)

# Regular expressions
Expand All @@ -831,6 +831,7 @@ class variables. If this is not sufficient, the respective methods can be implem

# Case sensitive string matching expression. String is quoted/escaped like a normal string.
# Placeholders {field} and {value} are replaced with field name and quoted/escaped string.
# {regex} contains the value expressed as regular expression.
case_sensitive_match_expression: ClassVar[Optional[str]] = None
# Case sensitive string matching operators similar to standard string matching. If not provided,
# case_sensitive_match_expression is used.
Expand Down Expand Up @@ -888,10 +889,10 @@ class variables. If this is not sufficient, the respective methods can be implem

# Value not bound to a field
unbound_value_str_expression: ClassVar[Optional[str]] = (
None # Expression for string value not bound to a field as format string with placeholder {value}
None # Expression for string value not bound to a field as format string with placeholder {value} and {regex} (value as regular expression)
)
unbound_value_num_expression: ClassVar[Optional[str]] = (
None # Expression for number value not bound to a field as format string with placeholder {value}
None # Expression for number value not bound to a field as format string with placeholder {value} and {regex} (value as regular expression)
)
unbound_value_re_expression: ClassVar[Optional[str]] = (
None # Expression for regular expression not bound to a field as format string with placeholder {value} and {flag_x} as described for re_expression
Expand Down Expand Up @@ -1339,6 +1340,7 @@ def convert_condition_field_eq_val_str(
return expr.format(
field=self.escape_and_quote_field(cond.field),
value=self.convert_value_str(value, state),
regex=self.convert_value_re(value.to_regex(), state),
backend=self,
)
except TypeError: # pragma: no cover
Expand Down Expand Up @@ -1388,6 +1390,7 @@ def convert_condition_field_eq_val_str_case_sensitive(
return expr.format(
field=self.escape_and_quote_field(cond.field),
value=self.convert_value_str(value, state),
regex=self.convert_value_re(value.to_regex(), state),
)
except TypeError: # pragma: no cover
raise NotImplementedError(
Expand Down Expand Up @@ -1563,7 +1566,8 @@ def convert_condition_val_str(
) -> Union[str, DeferredQueryExpression]:
"""Conversion of value-only strings."""
return self.unbound_value_str_expression.format(
value=self.convert_value_str(cond.value, state)
value=self.convert_value_str(cond.value, state),
regex=self.convert_value_re(cond.value.to_regex(), state),
)

def convert_condition_val_num(
Expand Down
11 changes: 11 additions & 0 deletions sigma/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,17 @@ def convert(
)
return s

def to_regex(self) -> "SigmaRegularExpression":
"""Convert SigmaString into a regular expression."""
return SigmaRegularExpression(
self.convert(
escape_char="\\",
wildcard_multi=".*",
wildcard_single=".",
add_escaped=".*+?^$[](){}\\|",
)
)


class SigmaCasedString(SigmaString):
"""Case-sensitive string matching."""
Expand Down
69 changes: 69 additions & 0 deletions tests/test_conversion_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,52 @@ def test_convert_value_str_contains_further_wildcard(test_backend):
)


def test_convert_value_str_wildcard_to_regex(test_backend, monkeypatch):
monkeypatch.setattr(test_backend, "wildcard_match_expression", '{field} match "{regex}"')
assert (
test_backend.convert(
SigmaCollection.from_yaml(
"""
title: Test
status: test
logsource:
category: test_category
product: test_product
detection:
sel:
fieldA|contains: "va*lue"
condition: sel
"""
)
)
== ['mappedA match ".*va.*lue.*"']
)


def test_convert_value_str_wildcard_to_regex_cased(test_backend, monkeypatch):
monkeypatch.setattr(
test_backend, "case_sensitive_match_expression", '{field} casematch "{regex}"'
)
assert (
test_backend.convert(
SigmaCollection.from_yaml(
"""
title: Test
status: test
logsource:
category: test_category
product: test_product
detection:
sel:
fieldA|contains|cased: "va*lue"
condition: sel
"""
)
)
== ['mappedA casematch ".*va.*lue.*"']
)


def test_convert_value_str_contains_expression_not_defined(test_backend, monkeypatch):
monkeypatch.setattr(test_backend, "contains_expression", None)
assert (
Expand Down Expand Up @@ -1830,6 +1876,29 @@ def test_convert_unbound_values(test_backend):
)


def test_convert_unbound_values_regex(test_backend, monkeypatch):
monkeypatch.setattr(test_backend, "unbound_value_str_expression", '_=~"{regex}"')
assert (
test_backend.convert(
SigmaCollection.from_yaml(
"""
title: Test
status: test
logsource:
category: test_category
product: test_product
detection:
sel:
- value*1
- value?2
condition: sel
"""
)
)
== ['_=~"value.*1" or _=~"value.2"']
)


def test_convert_invalid_unbound_bool(test_backend):
with pytest.raises(SigmaValueError, match="Boolean values can't appear as standalone"):
test_backend.convert(
Expand Down
13 changes: 13 additions & 0 deletions tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,19 @@ def test_strings_convert_invalid_part():
s.convert()


def test_strings_to_regex():
s = SigmaString("Test*Special?(Plain)[\\*\\?]")
assert s.s == (
"Test",
SpecialChars.WILDCARD_MULTI,
"Special",
SpecialChars.WILDCARD_SINGLE,
"(Plain)[*?]",
)
r = s.to_regex()
assert r.regexp == "Test.*Special.\\(Plain\\)\\[\\*\\?\\]"


def test_string_index(sigma_string):
assert sigma_string[3] == SigmaString("s")

Expand Down

0 comments on commit 8738362

Please sign in to comment.