corazawaf · anuraaga · Dec 26, 2022 · Dec 22, 2022 · Dec 22, 2022 · Dec 22, 2022
diff --git a/config_test.go b/config_test.go
@@ -1,3 +1,6 @@
+// Copyright 2022 Juan Pablo Tosso and the OWASP Coraza contributors
+// SPDX-License-Identifier: Apache-2.0
+
 package coraza
 
 import (

diff --git a/internal/corazawaf/rule.go b/internal/corazawaf/rule.go
@@ -8,6 +8,7 @@ import (
 	"regexp"
 	"strconv"
 	"strings"
+	"sync"
 
 	"github.com/corazawaf/coraza/v3/internal/corazarules"
 	"github.com/corazawaf/coraza/v3/macro"
@@ -102,6 +103,8 @@ type Rule struct {
 	// action itself, not sure yet
 	transformations []ruleTransformationParams
 
+	transformationsID int
+
 	// Slice of initialized actions to be evaluated during
 	// the rule evaluation process
 	actions []ruleActionParams
@@ -155,11 +158,11 @@ func (r *Rule) Status() int {
 // Evaluate will evaluate the current rule for the indicated transaction
 // If the operator matches, actions will be evaluated, and it will return
 // the matched variables, keys and values (MatchData)
-func (r *Rule) Evaluate(tx rules.TransactionState) []types.MatchData {
-	return r.doEvaluate(tx.(*Transaction))
+func (r *Rule) Evaluate(tx rules.TransactionState, cache map[transformationKey]transformationValue) []types.MatchData {
+	return r.doEvaluate(tx.(*Transaction), cache)
 }
 
-func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData {
+func (r *Rule) doEvaluate(tx *Transaction, cache map[transformationKey]transformationValue) []types.MatchData {
 	if r.Capture {
 		tx.Capture = true
 	}
@@ -201,7 +204,7 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData {
 
 			values = tx.GetField(v)
 			tx.WAF.Logger.Debug("[%s] [%d] Expanding %d arguments for rule %d", tx.id, rid, len(values), r.ID_)
-			for _, arg := range values {
+			for i, arg := range values {
 				var args []string
 				tx.WAF.Logger.Debug("[%s] [%d] Transforming argument %q for rule %d", tx.id, rid, arg.Value(), r.ID_)
 				var errs []error
@@ -210,9 +213,34 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData {
 					// We could try for each until found
 					args, errs = r.executeTransformationsMultimatch(arg.Value())
 				} else {
-					ars, es := r.executeTransformations(arg.Value())
-					args = []string{ars}
-					errs = es
+					switch {
+					case len(r.transformations) == 0:
+						args = []string{arg.Value()}
+					case arg.VariableName() == "TX":
+						// no cache for TX
+						ars, es := r.executeTransformations(arg.Value())
+						args = []string{ars}
+						errs = es
+					default:
+						key := transformationKey{
+							argKey:            arg.Key(),
+							argIndex:          i,
+							argVariable:       arg.Variable(),
+							transformationsID: r.transformationsID,
+						}
+						if cached, ok := cache[key]; ok {
+							args = cached.args
+							errs = cached.errs
+						} else {
+							ars, es := r.executeTransformations(arg.Value())
+							args = []string{ars}
+							errs = es
+							cache[key] = transformationValue{
+								args: args,
+								errs: es,
+							}
+						}
+					}
 				}
 				if len(errs) > 0 {
 					tx.WAF.Logger.Debug("[%s] [%d] Error transforming argument %q for rule %d: %v", tx.id, rid, arg.Value(), r.ID_, errs)
@@ -270,7 +298,7 @@ func (r *Rule) doEvaluate(tx *Transaction) []types.MatchData {
 		// we only run the chains for the parent rule
 		for nr := r.Chain; nr != nil; {
 			tx.WAF.Logger.Debug("[%s] [%d] Evaluating rule chain for %d", tx.id, rid, r.ID_)
-			matchedChainValues := nr.Evaluate(tx)
+			matchedChainValues := nr.Evaluate(tx, cache)
 			if len(matchedChainValues) == 0 {
 				return matchedChainValues
 			}
@@ -375,13 +403,35 @@ func (r *Rule) AddVariableNegation(v variables.RuleVariable, key string) error {
 	return nil
 }
 
+var transformationIDToName = []string{""}
+var transformationNameToID = map[string]int{"": 0}
+var transformationIDsLock = sync.Mutex{}
+
+func transformationID(currentID int, transformationName string) int {
+	transformationIDsLock.Lock()
+	defer transformationIDsLock.Unlock()
+
+	currName := transformationIDToName[currentID]
+	nextName := fmt.Sprintf("%s+%s", currName, transformationName)
+	if id, ok := transformationNameToID[nextName]; ok {
+		return id
+	}
+
+	id := len(transformationIDToName)
+	transformationIDToName = append(transformationIDToName, nextName)
+	transformationIDToName[id] = nextName
+	transformationNameToID[nextName] = id
+	return id
+}
+
 // AddTransformation adds a transformation to the rule
 // it fails if the transformation cannot be found
 func (r *Rule) AddTransformation(name string, t rules.Transformation) error {
 	if t == nil || name == "" {
 		return fmt.Errorf("invalid transformation %q not found", name)
 	}
 	r.transformations = append(r.transformations, ruleTransformationParams{name, t})
+	r.transformationsID = transformationID(r.transformationsID, name)
 	return nil
 }
 

diff --git a/internal/corazawaf/rulegroup.go b/internal/corazawaf/rulegroup.go
@@ -9,6 +9,7 @@ import (
 
 	"github.com/corazawaf/coraza/v3/internal/strings"
 	"github.com/corazawaf/coraza/v3/types"
+	"github.com/corazawaf/coraza/v3/types/variables"
 )
 
 // RuleGroup is a collection of rules
@@ -99,6 +100,10 @@ func (rg *RuleGroup) Eval(phase types.RulePhase, tx *Transaction) bool {
 	tx.LastPhase = phase
 	usedRules := 0
 	ts := time.Now().UnixNano()
+	transformationCache := tx.transformationCache
+	for k := range transformationCache {
+		delete(transformationCache, k)
+	}
 RulesLoop:
 	for _, r := range tx.WAF.Rules.GetRules() {
 		if tx.interruption != nil && phase != types.PhaseLogging {
@@ -136,7 +141,7 @@ RulesLoop:
 		tx.variables.matchedVars.Reset()
 		tx.variables.matchedVarsNames.Reset()
 
-		r.Evaluate(tx)
+		r.Evaluate(tx, transformationCache)
 		tx.Capture = false // we reset captures
 		usedRules++
 	}
@@ -154,3 +159,15 @@ func NewRuleGroup() RuleGroup {
 		rules: []*Rule{},
 	}
 }
+
+type transformationKey struct {
+	argKey            string
+	argIndex          int
+	argVariable       variables.RuleVariable
+	transformationsID int
+}
+
+type transformationValue struct {
+	args []string
+	errs []error
+}
diff --git a/internal/corazawaf/transaction.go b/internal/corazawaf/transaction.go
@@ -104,6 +104,8 @@ type Transaction struct {
 	audit bool
 
 	variables TransactionVariables
+
+	transformationCache map[transformationKey]transformationValue
 }
 
 func (tx *Transaction) ID() string {

diff --git a/internal/corazawaf/waf.go b/internal/corazawaf/waf.go
@@ -190,6 +190,7 @@ func (w *WAF) newTransactionWithID(id string) *Transaction {
 			MemoryLimit: w.RequestBodyInMemoryLimit,
 		})
 		tx.variables = *NewTransactionVariables()
+		tx.transformationCache = map[transformationKey]transformationValue{}
 	}
 
 	// set capture variables

diff --git a/testing/coreruleset/coreruleset_test.go b/testing/coreruleset/coreruleset_test.go
@@ -99,6 +99,7 @@ func BenchmarkCRSSimpleGET(b *testing.B) {
 func BenchmarkCRSSimplePOST(b *testing.B) {
 	waf := crsWAF(b)
 
+	b.ReportAllocs()
 	b.ResetTimer() // only benchmark execution, not compilation
 	for i := 0; i < b.N; i++ {
 		tx := waf.NewTransaction()
@@ -109,7 +110,41 @@ func BenchmarkCRSSimplePOST(b *testing.B) {
 		tx.AddRequestHeader("Accept", "application/json")
 		tx.AddRequestHeader("Content-Type", "application/x-www-form-urlencoded")
 		tx.ProcessRequestHeaders()
-		if _, err := tx.ResponseBodyWriter().Write([]byte("parameters2=and&other2=Stuff")); err != nil {
+		if _, err := tx.RequestBodyWriter().Write([]byte("parameters2=and&other2=Stuff")); err != nil {
+			b.Error(err)
+		}
+		if _, err := tx.ProcessRequestBody(); err != nil {
+			b.Error(err)
+		}
+		tx.AddResponseHeader("Content-Type", "application/json")
+		tx.ProcessResponseHeaders(200, "OK")
+		if _, err := tx.ProcessResponseBody(); err != nil {
+			b.Error(err)
+		}
+		tx.ProcessLogging()
+		if err := tx.Close(); err != nil {
+			b.Error(err)
+		}
+	}
+}
+
+func BenchmarkCRSLargePOST(b *testing.B) {
+	waf := crsWAF(b)
+
+	postPayload := []byte(fmt.Sprintf("parameters2=and&other2=%s", strings.Repeat("a", 10000)))
+
+	b.ReportAllocs()
+	b.ResetTimer() // only benchmark execution, not compilation
+	for i := 0; i < b.N; i++ {
+		tx := waf.NewTransaction()
+		tx.ProcessConnection("127.0.0.1", 8080, "127.0.0.1", 8080)
+		tx.ProcessURI("POST", "/some_path/with?parameters=and&other=Stuff", "HTTP/1.1")
+		tx.AddRequestHeader("Host", "localhost")
+		tx.AddRequestHeader("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36")
+		tx.AddRequestHeader("Accept", "application/json")
+		tx.AddRequestHeader("Content-Type", "application/x-www-form-urlencoded")
+		tx.ProcessRequestHeaders()
+		if _, err := tx.RequestBodyWriter().Write(postPayload); err != nil {
 			b.Error(err)
 		}
 		if _, err := tx.ProcessRequestBody(); err != nil {
@@ -280,8 +315,30 @@ func crsWAF(t testing.TB) coraza.WAF {
 	if err != nil {
 		t.Fatal(err)
 	}
+	customTestingConfig := `
+SecResponseBodyMimeType text/plain
+SecDefaultAction "phase:3,log,auditlog,pass"
+SecDefaultAction "phase:4,log,auditlog,pass"
+
+SecAction "id:900005,\
+  phase:1,\
+  nolog,\
+  pass,\
+  ctl:ruleEngine=DetectionOnly,\
+  ctl:ruleRemoveById=910000,\
+  setvar:tx.paranoia_level=4,\
+  setvar:tx.crs_validate_utf8_encoding=1,\
+  setvar:tx.arg_name_length=100,\
+  setvar:tx.arg_length=400,\
+  setvar:tx.total_arg_length=64000,\
+  setvar:tx.max_num_args=255,\
+  setvar:tx.max_file_size=64100,\
+  setvar:tx.combined_file_sizes=65535"
+`
 	conf := coraza.NewWAFConfig().
+		WithRootFS(crsReader).
 		WithDirectives(string(rec)).
+		WithDirectives(customTestingConfig).
 		WithDirectives("Include crs-setup.conf.example").
 		WithDirectives("Include rules/*.conf")