-
-
Notifications
You must be signed in to change notification settings - Fork 216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Cache transformations within a phase #537
Changes from 9 commits
d85c39a
732ddfd
6563674
789c477
7571ad6
a6566b2
5585154
fc1554f
8c4f48f
4b698a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ import ( | |
"regexp" | ||
"strconv" | ||
"strings" | ||
"sync" | ||
|
||
"github.com/corazawaf/coraza/v3/internal/corazarules" | ||
"github.com/corazawaf/coraza/v3/macro" | ||
|
@@ -102,6 +103,8 @@ type Rule struct { | |
// action itself, not sure yet | ||
transformations []ruleTransformationParams | ||
|
||
transformationsID int | ||
|
||
// Slice of initialized actions to be evaluated during | ||
// the rule evaluation process | ||
actions []ruleActionParams | ||
|
@@ -155,11 +158,11 @@ func (r *Rule) Status() int { | |
// Evaluate will evaluate the current rule for the indicated transaction | ||
// If the operator matches, actions will be evaluated, and it will return | ||
// the matched variables, keys and values (MatchData) | ||
func (r *Rule) Evaluate(tx rules.TransactionState) []types.MatchData { | ||
return r.doEvaluate(tx.(*Transaction)) | ||
func (r *Rule) Evaluate(tx rules.TransactionState, cache map[transformationKey]transformationValue) []types.MatchData { | ||
return r.doEvaluate(tx.(*Transaction), cache) | ||
} | ||
|
||
func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData { | ||
func (r *Rule) doEvaluate(tx *Transaction, cache map[transformationKey]transformationValue) []types.MatchData { | ||
if r.Capture { | ||
tx.Capture = true | ||
} | ||
|
@@ -201,7 +204,7 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData { | |
|
||
values = tx.GetField(v) | ||
tx.WAF.Logger.Debug("[%s] [%d] Expanding %d arguments for rule %d", tx.id, rid, len(values), r.ID_) | ||
for _, arg := range values { | ||
for i, arg := range values { | ||
var args []string | ||
tx.WAF.Logger.Debug("[%s] [%d] Transforming argument %q for rule %d", tx.id, rid, arg.Value(), r.ID_) | ||
var errs []error | ||
|
@@ -210,9 +213,34 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData { | |
// We could try for each until found | ||
args, errs = r.executeTransformationsMultimatch(arg.Value()) | ||
} else { | ||
ars, es := r.executeTransformations(arg.Value()) | ||
args = []string{ars} | ||
errs = es | ||
switch { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think 'doEvaluate' method is complex enough already so maybe we could extract this in another method? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yup extracted |
||
case len(r.transformations) == 0: | ||
args = []string{arg.Value()} | ||
case arg.VariableName() == "TX": | ||
// no cache for TX | ||
ars, es := r.executeTransformations(arg.Value()) | ||
args = []string{ars} | ||
errs = es | ||
default: | ||
key := transformationKey{ | ||
argKey: arg.Key(), | ||
argIndex: i, | ||
argVariable: arg.Variable(), | ||
transformationsID: r.transformationsID, | ||
} | ||
if cached, ok := cache[key]; ok { | ||
args = cached.args | ||
errs = cached.errs | ||
} else { | ||
ars, es := r.executeTransformations(arg.Value()) | ||
args = []string{ars} | ||
errs = es | ||
cache[key] = transformationValue{ | ||
args: args, | ||
errs: es, | ||
} | ||
} | ||
} | ||
} | ||
if len(errs) > 0 { | ||
tx.WAF.Logger.Debug("[%s] [%d] Error transforming argument %q for rule %d: %v", tx.id, rid, arg.Value(), r.ID_, errs) | ||
|
@@ -270,7 +298,7 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData { | |
// we only run the chains for the parent rule | ||
for nr := r.Chain; nr != nil; { | ||
tx.WAF.Logger.Debug("[%s] [%d] Evaluating rule chain for %d", tx.id, rid, r.ID_) | ||
matchedChainValues := nr.Evaluate(tx) | ||
matchedChainValues := nr.Evaluate(tx, cache) | ||
if len(matchedChainValues) == 0 { | ||
return matchedChainValues | ||
} | ||
|
@@ -375,13 +403,35 @@ func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error { | |
return nil | ||
} | ||
|
||
var transformationIDToName = []string{""} | ||
var transformationNameToID = map[string]int{"": 0} | ||
var transformationIDsLock = sync.Mutex{} | ||
|
||
func transformationID(currentID int, transformationName string) int { | ||
transformationIDsLock.Lock() | ||
defer transformationIDsLock.Unlock() | ||
|
||
currName := transformationIDToName[currentID] | ||
nextName := fmt.Sprintf("%s+%s", currName, transformationName) | ||
if id, ok := transformationNameToID[nextName]; ok { | ||
return id | ||
} | ||
|
||
id := len(transformationIDToName) | ||
transformationIDToName = append(transformationIDToName, nextName) | ||
transformationIDToName[id] = nextName | ||
M4tteoP marked this conversation as resolved.
Show resolved
Hide resolved
|
||
transformationNameToID[nextName] = id | ||
return id | ||
} | ||
|
||
// AddTransformation adds a transformation to the rule | ||
// it fails if the transformation cannot be found | ||
func (r *Rule) AddTransformation(name string, t rules.Transformation) error { | ||
if t == nil || name == "" { | ||
return fmt.Errorf("invalid transformation %q not found", name) | ||
} | ||
r.transformations = append(r.transformations, ruleTransformationParams{name, t}) | ||
r.transformationsID = transformationID(r.transformationsID, name) | ||
return nil | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,6 +9,7 @@ import ( | |
|
||
"github.com/corazawaf/coraza/v3/internal/strings" | ||
"github.com/corazawaf/coraza/v3/types" | ||
"github.com/corazawaf/coraza/v3/types/variables" | ||
) | ||
|
||
// RuleGroup is a collection of rules | ||
|
@@ -99,6 +100,10 @@ func (rg *RuleGroup) Eval(phase types.RulePhase, tx *Transaction) bool { | |
tx.LastPhase = phase | ||
usedRules := 0 | ||
ts := time.Now().UnixNano() | ||
transformationCache := tx.transformationCache | ||
for k := range transformationCache { | ||
delete(transformationCache, k) | ||
} | ||
RulesLoop: | ||
for _, r := range tx.WAF.Rules.GetRules() { | ||
if tx.interruption != nil && phase != types.PhaseLogging { | ||
|
@@ -136,7 +141,7 @@ RulesLoop: | |
tx.variables.matchedVars.Reset() | ||
tx.variables.matchedVarsNames.Reset() | ||
|
||
r.Evaluate(tx) | ||
r.Evaluate(tx, transformationCache) | ||
tx.Capture = false // we reset captures | ||
usedRules++ | ||
} | ||
|
@@ -154,3 +159,15 @@ func NewRuleGroup() RuleGroup { | |
rules: []*Rule{}, | ||
} | ||
} | ||
|
||
type transformationKey struct { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @M4tteoP The main difference with your initial approach is using a struct instead of string for the key, which I failed to communicate well offline :) There is significant cost including allocation to concatenate strings, while filling a struct is much faster and all on the stack so no allocation There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rag, Many thanks for taking over it while explaining how you did it! Very appreciated. |
||
argKey string | ||
argIndex int | ||
argVariable variables.RuleVariable | ||
transformationsID int | ||
} | ||
|
||
type transformationValue struct { | ||
args []string | ||
errs []error | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -104,6 +104,8 @@ type Transaction struct { | |
audit bool | ||
|
||
variables TransactionVariables | ||
|
||
transformationCache map[transformationKey]transformationValue | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @M4tteoP Though besides that reusing maps is also important, adding to the transaction which is already pooled made it pretty simple |
||
} | ||
|
||
func (tx *Transaction) ID() string { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,6 +99,7 @@ func BenchmarkCRSSimpleGET(b *testing.B) { | |
func BenchmarkCRSSimplePOST(b *testing.B) { | ||
waf := crsWAF(b) | ||
|
||
b.ReportAllocs() | ||
b.ResetTimer() // only benchmark execution, not compilation | ||
for i := 0; i < b.N; i++ { | ||
tx := waf.NewTransaction() | ||
|
@@ -109,7 +110,41 @@ func BenchmarkCRSSimplePOST(b *testing.B) { | |
tx.AddRequestHeader("Accept", "application/json") | ||
tx.AddRequestHeader("Content-Type", "application/x-www-form-urlencoded") | ||
tx.ProcessRequestHeaders() | ||
if _, err := tx.ResponseBodyWriter().Write([]byte("parameters2=and&other2=Stuff")); err != nil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If anyone was trying to use this benchmark and confused like I was, it's because of the typo here preventing post body processing from happening at all (response body processing is disabled by default I guess) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🙀 |
||
if _, err := tx.RequestBodyWriter().Write([]byte("parameters2=and&other2=Stuff")); err != nil { | ||
b.Error(err) | ||
} | ||
if _, err := tx.ProcessRequestBody(); err != nil { | ||
b.Error(err) | ||
} | ||
tx.AddResponseHeader("Content-Type", "application/json") | ||
tx.ProcessResponseHeaders(200, "OK") | ||
if _, err := tx.ProcessResponseBody(); err != nil { | ||
b.Error(err) | ||
} | ||
tx.ProcessLogging() | ||
if err := tx.Close(); err != nil { | ||
b.Error(err) | ||
} | ||
} | ||
} | ||
|
||
func BenchmarkCRSLargePOST(b *testing.B) { | ||
waf := crsWAF(b) | ||
|
||
postPayload := []byte(fmt.Sprintf("parameters2=and&other2=%s", strings.Repeat("a", 10000))) | ||
|
||
b.ReportAllocs() | ||
b.ResetTimer() // only benchmark execution, not compilation | ||
for i := 0; i < b.N; i++ { | ||
tx := waf.NewTransaction() | ||
tx.ProcessConnection("127.0.0.1", 8080, "127.0.0.1", 8080) | ||
tx.ProcessURI("POST", "/some_path/with?parameters=and&other=Stuff", "HTTP/1.1") | ||
tx.AddRequestHeader("Host", "localhost") | ||
tx.AddRequestHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36") | ||
tx.AddRequestHeader("Accept", "application/json") | ||
tx.AddRequestHeader("Content-Type", "application/x-www-form-urlencoded") | ||
tx.ProcessRequestHeaders() | ||
if _, err := tx.RequestBodyWriter().Write(postPayload); err != nil { | ||
b.Error(err) | ||
} | ||
if _, err := tx.ProcessRequestBody(); err != nil { | ||
|
@@ -280,8 +315,30 @@ func crsWAF(t testing.TB) coraza.WAF { | |
if err != nil { | ||
t.Fatal(err) | ||
} | ||
customTestingConfig := ` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added this so the benchmark runs the same rules as FTW (mostly for paranoia level), I'm not sure if there is a practical difference though |
||
SecResponseBodyMimeType text/plain | ||
SecDefaultAction "phase:3,log,auditlog,pass" | ||
SecDefaultAction "phase:4,log,auditlog,pass" | ||
|
||
SecAction "id:900005,\ | ||
phase:1,\ | ||
nolog,\ | ||
pass,\ | ||
ctl:ruleEngine=DetectionOnly,\ | ||
ctl:ruleRemoveById=910000,\ | ||
setvar:tx.paranoia_level=4,\ | ||
setvar:tx.crs_validate_utf8_encoding=1,\ | ||
setvar:tx.arg_name_length=100,\ | ||
setvar:tx.arg_length=400,\ | ||
setvar:tx.total_arg_length=64000,\ | ||
setvar:tx.max_num_args=255,\ | ||
setvar:tx.max_file_size=64100,\ | ||
setvar:tx.combined_file_sizes=65535" | ||
` | ||
conf := coraza.NewWAFConfig(). | ||
WithRootFS(crsReader). | ||
WithDirectives(string(rec)). | ||
WithDirectives(customTestingConfig). | ||
WithDirectives("Include crs-setup.conf.example"). | ||
WithDirectives("Include rules/*.conf") | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This change makes me wonder if evaluate should be exported or not as it is only used inside this package. Does not need to happen in this PR but this is a good justification cc @jptosso @anuraaga @M4tteoP