Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache transformations within a phase #537

Merged
merged 10 commits into from
Dec 26, 2022
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config_test.go
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
// SPDX-License-Identifier: Apache-2.0

package coraza

import (
Expand Down
66 changes: 58 additions & 8 deletions internal/corazawaf/rule.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"regexp"
"strconv"
"strings"
"sync"

"github.com/corazawaf/coraza/v3/internal/corazarules"
"github.com/corazawaf/coraza/v3/macro"
Expand Down Expand Up @@ -102,6 +103,8 @@ type Rule struct {
// action itself, not sure yet
transformations []ruleTransformationParams

transformationsID int

// Slice of initialized actions to be evaluated during
// the rule evaluation process
actions []ruleActionParams
Expand Down Expand Up @@ -155,11 +158,11 @@ func (r *Rule) Status() int {
// Evaluate will evaluate the current rule for the indicated transaction
// If the operator matches, actions will be evaluated, and it will return
// the matched variables, keys and values (MatchData)
func (r *Rule) Evaluate(tx rules.TransactionState) []types.MatchData {
return r.doEvaluate(tx.(*Transaction))
func (r *Rule) Evaluate(tx rules.TransactionState, cache map[transformationKey]transformationValue) []types.MatchData {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change makes me wonder if evaluate should be exported or not as it is only used inside this package. Does not need to happen in this PR but this is a good justification cc @jptosso @anuraaga @M4tteoP

return r.doEvaluate(tx.(*Transaction), cache)
}

func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData {
func (r *Rule) doEvaluate(tx *Transaction, cache map[transformationKey]transformationValue) []types.MatchData {
if r.Capture {
tx.Capture = true
}
Expand Down Expand Up @@ -201,7 +204,7 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData {

values = tx.GetField(v)
tx.WAF.Logger.Debug("[%s] [%d] Expanding %d arguments for rule %d", tx.id, rid, len(values), r.ID_)
for _, arg := range values {
for i, arg := range values {
var args []string
tx.WAF.Logger.Debug("[%s] [%d] Transforming argument %q for rule %d", tx.id, rid, arg.Value(), r.ID_)
var errs []error
Expand All @@ -210,9 +213,34 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData {
// We could try for each until found
args, errs = r.executeTransformationsMultimatch(arg.Value())
} else {
ars, es := r.executeTransformations(arg.Value())
args = []string{ars}
errs = es
switch {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think 'doEvaluate' method is complex enough already so maybe we could extract this in another method?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup extracted

case len(r.transformations) == 0:
args = []string{arg.Value()}
case arg.VariableName() == "TX":
// no cache for TX
ars, es := r.executeTransformations(arg.Value())
args = []string{ars}
errs = es
default:
key := transformationKey{
argKey: arg.Key(),
argIndex: i,
argVariable: arg.Variable(),
transformationsID: r.transformationsID,
}
if cached, ok := cache[key]; ok {
args = cached.args
errs = cached.errs
} else {
ars, es := r.executeTransformations(arg.Value())
args = []string{ars}
errs = es
cache[key] = transformationValue{
args: args,
errs: es,
}
}
}
}
if len(errs) > 0 {
tx.WAF.Logger.Debug("[%s] [%d] Error transforming argument %q for rule %d: %v", tx.id, rid, arg.Value(), r.ID_, errs)
Expand Down Expand Up @@ -270,7 +298,7 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData {
// we only run the chains for the parent rule
for nr := r.Chain; nr != nil; {
tx.WAF.Logger.Debug("[%s] [%d] Evaluating rule chain for %d", tx.id, rid, r.ID_)
matchedChainValues := nr.Evaluate(tx)
matchedChainValues := nr.Evaluate(tx, cache)
if len(matchedChainValues) == 0 {
return matchedChainValues
}
Expand Down Expand Up @@ -375,13 +403,35 @@ func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error {
return nil
}

var transformationIDToName = []string{""}
var transformationNameToID = map[string]int{"": 0}
var transformationIDsLock = sync.Mutex{}

func transformationID(currentID int, transformationName string) int {
transformationIDsLock.Lock()
defer transformationIDsLock.Unlock()

currName := transformationIDToName[currentID]
nextName := fmt.Sprintf("%s+%s", currName, transformationName)
if id, ok := transformationNameToID[nextName]; ok {
return id
}

id := len(transformationIDToName)
transformationIDToName = append(transformationIDToName, nextName)
transformationIDToName[id] = nextName
M4tteoP marked this conversation as resolved.
Show resolved Hide resolved
transformationNameToID[nextName] = id
return id
}

// AddTransformation adds a transformation to the rule
// it fails if the transformation cannot be found
func (r *Rule) AddTransformation(name string, t rules.Transformation) error {
if t == nil || name == "" {
return fmt.Errorf("invalid transformation %q not found", name)
}
r.transformations = append(r.transformations, ruleTransformationParams{name, t})
r.transformationsID = transformationID(r.transformationsID, name)
return nil
}

Expand Down
19 changes: 18 additions & 1 deletion internal/corazawaf/rulegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

"github.com/corazawaf/coraza/v3/internal/strings"
"github.com/corazawaf/coraza/v3/types"
"github.com/corazawaf/coraza/v3/types/variables"
)

// RuleGroup is a collection of rules
Expand Down Expand Up @@ -99,6 +100,10 @@ func (rg *RuleGroup) Eval(phase types.RulePhase, tx *Transaction) bool {
tx.LastPhase = phase
usedRules := 0
ts := time.Now().UnixNano()
transformationCache := tx.transformationCache
for k := range transformationCache {
delete(transformationCache, k)
}
RulesLoop:
for _, r := range tx.WAF.Rules.GetRules() {
if tx.interruption != nil && phase != types.PhaseLogging {
Expand Down Expand Up @@ -136,7 +141,7 @@ RulesLoop:
tx.variables.matchedVars.Reset()
tx.variables.matchedVarsNames.Reset()

r.Evaluate(tx)
r.Evaluate(tx, transformationCache)
tx.Capture = false // we reset captures
usedRules++
}
Expand All @@ -154,3 +159,15 @@ func NewRuleGroup() RuleGroup {
rules: []*Rule{},
}
}

type transformationKey struct {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@M4tteoP The main difference with your initial approach is using a struct instead of string for the key, which I failed to communicate well offline :) There is significant cost including allocation to concatenate strings, while filling a struct is much faster and all on the stack so no allocation

Copy link
Member

@M4tteoP M4tteoP Dec 23, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rag, Many thanks for taking over it while explaining how you did it! Very appreciated.

argKey string
argIndex int
argVariable variables.RuleVariable
transformationsID int
}

type transformationValue struct {
args []string
errs []error
}
2 changes: 2 additions & 0 deletions internal/corazawaf/transaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ type Transaction struct {
audit bool

variables TransactionVariables

transformationCache map[transformationKey]transformationValue
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@M4tteoP Though besides that reusing maps is also important, adding to the transaction which is already pooled made it pretty simple

}

func (tx *Transaction) ID() string {
Expand Down
1 change: 1 addition & 0 deletions internal/corazawaf/waf.go
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ func (w *WAF) newTransactionWithID(id string) *Transaction {
MemoryLimit: w.RequestBodyInMemoryLimit,
})
tx.variables = *NewTransactionVariables()
tx.transformationCache = map[transformationKey]transformationValue{}
}

// set capture variables
Expand Down
59 changes: 58 additions & 1 deletion testing/coreruleset/coreruleset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func BenchmarkCRSSimpleGET(b *testing.B) {
func BenchmarkCRSSimplePOST(b *testing.B) {
waf := crsWAF(b)

b.ReportAllocs()
b.ResetTimer() // only benchmark execution, not compilation
for i := 0; i < b.N; i++ {
tx := waf.NewTransaction()
Expand All @@ -109,7 +110,41 @@ func BenchmarkCRSSimplePOST(b *testing.B) {
tx.AddRequestHeader("Accept", "application/json")
tx.AddRequestHeader("Content-Type", "application/x-www-form-urlencoded")
tx.ProcessRequestHeaders()
if _, err := tx.ResponseBodyWriter().Write([]byte("parameters2=and&other2=Stuff")); err != nil {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If anyone was trying to use this benchmark and confused like I was, it's because of the typo here preventing post body processing from happening at all (response body processing is disabled by default I guess)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🙀

if _, err := tx.RequestBodyWriter().Write([]byte("parameters2=and&other2=Stuff")); err != nil {
b.Error(err)
}
if _, err := tx.ProcessRequestBody(); err != nil {
b.Error(err)
}
tx.AddResponseHeader("Content-Type", "application/json")
tx.ProcessResponseHeaders(200, "OK")
if _, err := tx.ProcessResponseBody(); err != nil {
b.Error(err)
}
tx.ProcessLogging()
if err := tx.Close(); err != nil {
b.Error(err)
}
}
}

func BenchmarkCRSLargePOST(b *testing.B) {
waf := crsWAF(b)

postPayload := []byte(fmt.Sprintf("parameters2=and&other2=%s", strings.Repeat("a", 10000)))

b.ReportAllocs()
b.ResetTimer() // only benchmark execution, not compilation
for i := 0; i < b.N; i++ {
tx := waf.NewTransaction()
tx.ProcessConnection("127.0.0.1", 8080, "127.0.0.1", 8080)
tx.ProcessURI("POST", "/some_path/with?parameters=and&other=Stuff", "HTTP/1.1")
tx.AddRequestHeader("Host", "localhost")
tx.AddRequestHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36")
tx.AddRequestHeader("Accept", "application/json")
tx.AddRequestHeader("Content-Type", "application/x-www-form-urlencoded")
tx.ProcessRequestHeaders()
if _, err := tx.RequestBodyWriter().Write(postPayload); err != nil {
b.Error(err)
}
if _, err := tx.ProcessRequestBody(); err != nil {
Expand Down Expand Up @@ -280,8 +315,30 @@ func crsWAF(t testing.TB) coraza.WAF {
if err != nil {
t.Fatal(err)
}
customTestingConfig := `
Copy link
Contributor Author

@anuraaga anuraaga Dec 22, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this so the benchmark runs the same rules as FTW (mostly for paranoia level), I'm not sure if there is a practical difference though

SecResponseBodyMimeType text/plain
SecDefaultAction "phase:3,log,auditlog,pass"
SecDefaultAction "phase:4,log,auditlog,pass"

SecAction "id:900005,\
phase:1,\
nolog,\
pass,\
ctl:ruleEngine=DetectionOnly,\
ctl:ruleRemoveById=910000,\
setvar:tx.paranoia_level=4,\
setvar:tx.crs_validate_utf8_encoding=1,\
setvar:tx.arg_name_length=100,\
setvar:tx.arg_length=400,\
setvar:tx.total_arg_length=64000,\
setvar:tx.max_num_args=255,\
setvar:tx.max_file_size=64100,\
setvar:tx.combined_file_sizes=65535"
`
conf := coraza.NewWAFConfig().
WithRootFS(crsReader).
WithDirectives(string(rec)).
WithDirectives(customTestingConfig).
WithDirectives("Include crs-setup.conf.example").
WithDirectives("Include rules/*.conf")

Expand Down