From fb03c310600836e0ab92995bd0190eb2e5bb3866 Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Tue, 17 Dec 2024 21:42:31 +0100 Subject: [PATCH 1/2] adding memoirze + short repeat groups --- go.mod | 9 +++++ go.sum | 24 ++++++++++++ patterns.go | 4 +- patterns_test.go | 6 +-- tech.go | 49 ++++++++++++++++++++++- wappalyzer_bench_test.go | 85 ++++++++++++++++++++++++++++++++++++++++ 6 files changed, 171 insertions(+), 6 deletions(-) create mode 100644 wappalyzer_bench_test.go diff --git a/go.mod b/go.mod index f5405e7..13c7e99 100644 --- a/go.mod +++ b/go.mod @@ -3,12 +3,21 @@ module github.com/projectdiscovery/wappalyzergo go 1.21 require ( + github.com/projectdiscovery/utils v0.4.2 github.com/stretchr/testify v1.10.0 golang.org/x/net v0.32.0 ) require ( + github.com/Mzack9999/gcache v0.0.0-20230410081825-519e28eab057 // indirect + github.com/aymerick/douceur v0.2.0 // indirect + github.com/cespare/xxhash v1.1.0 // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/gorilla/css v1.0.1 // indirect + github.com/microcosm-cc/bluemonday v1.0.27 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect + golang.org/x/mod v0.17.0 // indirect + golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 0083fa2..aaeb709 100644 --- a/go.sum +++ b/go.sum @@ -1,11 +1,35 @@ +github.com/Mzack9999/gcache v0.0.0-20230410081825-519e28eab057 h1:KFac3SiGbId8ub47e7kd2PLZeACxc1LkiiNoDOFRClE= +github.com/Mzack9999/gcache v0.0.0-20230410081825-519e28eab057/go.mod h1:iLB2pivrPICvLOuROKmlqURtFIEsoJZaMidQfCG1+D4= +github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk= +github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= +github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= +github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk= +github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/projectdiscovery/utils v0.4.2 h1:O/G7vaH1XFxC5W/XdGnTvRkM4NpMnzVmRo8wNLSsbM4= +github.com/projectdiscovery/utils v0.4.2/go.mod h1:xqbN5BcMwIYK7dU0VObVPiyNrpp5UEgt/1OyzqPIXmU= +github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= +github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= diff --git a/patterns.go b/patterns.go index a67f857..01fba57 100644 --- a/patterns.go +++ b/patterns.go @@ -34,8 +34,8 @@ func ParsePattern(pattern string) (*ParsedPattern, error) { regexPattern = strings.ReplaceAll(regexPattern, "/", "\\/") regexPattern = strings.ReplaceAll(regexPattern, "\\+", "__escapedPlus__") - regexPattern = strings.ReplaceAll(regexPattern, "+", "{1,250}") - regexPattern = strings.ReplaceAll(regexPattern, "*", "{0,250}") + regexPattern = strings.ReplaceAll(regexPattern, "+", "{1,100}") + regexPattern = strings.ReplaceAll(regexPattern, "*", "{0,100}") regexPattern = strings.ReplaceAll(regexPattern, "__escapedPlus__", "\\+") var err error diff --git a/patterns_test.go b/patterns_test.go index 9ee02a6..5f04473 100644 --- a/patterns_test.go +++ b/patterns_test.go @@ -14,19 +14,19 @@ func TestParsePattern(t *testing.T) { { name: "Basic pattern", input: "Mage.*", - expectedRegex: "(?i)Mage.{0,250}", + expectedRegex: "(?i)Mage.{0,100}", expectedConf: 100, }, { name: "With confidence", input: "Mage.*\\;confidence:50", - expectedRegex: "(?i)Mage.{0,250}", + expectedRegex: "(?i)Mage.{0,100}", expectedConf: 50, }, { name: "With version", input: "jquery-([0-9.]+)\\.js\\;version:\\1", - expectedRegex: "(?i)jquery-([0-9.]{1,250})\\.js", + expectedRegex: "(?i)jquery-([0-9.]{1,100})\\.js", expectedConf: 100, expectedVer: "\\1", }, diff --git a/tech.go b/tech.go index 12d40bc..7bd32f3 100644 --- a/tech.go +++ b/tech.go @@ -3,13 +3,17 @@ package wappalyzer import ( "bytes" "encoding/json" + "fmt" "strings" + + "github.com/projectdiscovery/utils/memoize" ) // Wappalyze is a client for working with tech detection type Wappalyze struct { original *Fingerprints fingerprints *CompiledFingerprints + memocache *memoize.Memoizer } // New creates a new tech detection instance @@ -24,6 +28,12 @@ func New() (*Wappalyze, error) { if err != nil { return nil, err } + + wappalyze.memocache, err = memoize.New(memoize.WithMaxSize(1024)) + if err != nil { + return nil, err + } + return wappalyze, nil } @@ -58,6 +68,40 @@ func (s *Wappalyze) loadFingerprints() error { // Body should not be mutated while this function is being called, or it may // lead to unexpected things. func (s *Wappalyze) Fingerprint(headers map[string][]string, body []byte) map[string]struct{} { + result, _, _ := s.memocache.Do(fmt.Sprintf("%s-%s", headers, body), func() (interface{}, error) { + uniqueFingerprints := NewUniqueFingerprints() + + // Lowercase everything that we have received to check + normalizedBody := bytes.ToLower(body) + normalizedHeaders := s.normalizeHeaders(headers) + + // Run header based fingerprinting if the number + // of header checks if more than 0. + for _, app := range s.checkHeaders(normalizedHeaders) { + uniqueFingerprints.SetIfNotExists(app.application, app.version, app.confidence) + } + + cookies := s.findSetCookie(normalizedHeaders) + // Run cookie based fingerprinting if we have a set-cookie header + if len(cookies) > 0 { + for _, app := range s.checkCookies(cookies) { + uniqueFingerprints.SetIfNotExists(app.application, app.version, app.confidence) + } + } + + // Check for stuff in the body finally + bodyTech := s.checkBody(normalizedBody) + for _, app := range bodyTech { + uniqueFingerprints.SetIfNotExists(app.application, app.version, app.confidence) + } + return uniqueFingerprints.GetValues(), nil + }) + + return result.(map[string]struct{}) +} + +func (s *Wappalyze) Fingerprint1(headers map[string][]string, body []byte) map[string]struct{} { + //result, _, _ := s.memocache.Do(fmt.Sprintf("%s-%s", headers, body), func() (interface{}, error) { uniqueFingerprints := NewUniqueFingerprints() // Lowercase everything that we have received to check @@ -83,7 +127,10 @@ func (s *Wappalyze) Fingerprint(headers map[string][]string, body []byte) map[st for _, app := range bodyTech { uniqueFingerprints.SetIfNotExists(app.application, app.version, app.confidence) } - return uniqueFingerprints.GetValues() + return uniqueFingerprints.GetValues() //, nil + // }) + + // return result.(map[string]struct{}) } type UniqueFingerprints struct { diff --git a/wappalyzer_bench_test.go b/wappalyzer_bench_test.go new file mode 100644 index 0000000..73e6e20 --- /dev/null +++ b/wappalyzer_bench_test.go @@ -0,0 +1,85 @@ +package wappalyzer + +import ( + "crypto/rand" + "fmt" + "testing" + + randutil "github.com/projectdiscovery/utils/rand" + "github.com/stretchr/testify/require" +) + +// generateRandomData creates random HTTP headers and body for testing +func generateRandomData() (map[string][]string, []byte, error) { + headers := make(map[string][]string) + headerCount, err := randutil.IntN(50) + if err != nil { + return nil, nil, err + } + + headerCount += 50 + + for i := 0; i < headerCount; i++ { + headerNameI, err := randutil.IntN(20) + if err != nil { + return nil, nil, err + } + headerName := make([]byte, headerNameI+20) + headerValueI, err := randutil.IntN(80) + if err != nil { + return nil, nil, err + } + headerValue := make([]byte, headerValueI+60) + if _, err := rand.Read(headerName); err != nil { + return nil, nil, err + } + if _, err := rand.Read(headerValue); err != nil { + return nil, nil, err + } + headers[fmt.Sprintf("X-Header-%x", headerName)] = []string{fmt.Sprintf("%x", headerValue)} + } + + commonHeaderServerI, err := randutil.IntN(3) + if err != nil { + return nil, nil, err + } + headers["Server"] = []string{[]string{"Apache", "nginx", "IIS"}[commonHeaderServerI]} + headers["Content-Type"] = []string{"text/html"} + commonHeaderPoweredByI, err := randutil.IntN(3) + if err != nil { + return nil, nil, err + } + headers["X-Powered-By"] = []string{[]string{"PHP/7.4", "ASP.NET", "Express"}[commonHeaderPoweredByI]} + + // Generate large random body + bodySizeI, err := randutil.IntN(1024 * 1024) + if err != nil { + return nil, nil, err + } + bodySize := 1024*1024 + bodySizeI + randomBody := make([]byte, bodySize) + if _, err := rand.Read(randomBody); err != nil { + return nil, nil, err + } + + html := fmt.Sprintf(`%x%x`, + randomBody[:100], randomBody[100:]) + + return headers, []byte(html), nil +} + +func BenchmarkWappalyzer(b *testing.B) { + wappalyzer, err := New() + require.Nil(b, err, "could not create wappalyzer") + + // Generate test data once before starting the benchmark + headers, body, err := generateRandomData() + require.Nil(b, err, "could not generate random data") + + b.ResetTimer() + b.SetParallelism(1) // Ensure single thread for cleaner profile + + for i := 0; i < b.N; i++ { + wappalyzer.Fingerprint(headers, body) + } +} From 225a790b5d9b65de5bc0a552f2ff03c7f85fd688 Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Tue, 17 Dec 2024 21:44:45 +0100 Subject: [PATCH 2/2] upd actions --- .github/workflows/build-test.yml | 1 - .github/workflows/lint-test.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml index b7d5862..39bef08 100644 --- a/.github/workflows/build-test.yml +++ b/.github/workflows/build-test.yml @@ -1,6 +1,5 @@ name: 🔨 Build Test on: - push: pull_request: workflow_dispatch: diff --git a/.github/workflows/lint-test.yml b/.github/workflows/lint-test.yml index f233e88..1beb403 100644 --- a/.github/workflows/lint-test.yml +++ b/.github/workflows/lint-test.yml @@ -1,6 +1,5 @@ name: 🙏🏻 Lint Test on: - push: pull_request: workflow_dispatch: