Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding memoirze + Short repeat groups #114

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/workflows/build-test.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
name: 🔨 Build Test
on:
push:
pull_request:
workflow_dispatch:

Expand Down
1 change: 0 additions & 1 deletion .github/workflows/lint-test.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
name: 🙏🏻 Lint Test
on:
push:
pull_request:
workflow_dispatch:

Expand Down
9 changes: 9 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,21 @@ module github.com/projectdiscovery/wappalyzergo
go 1.21

require (
github.com/projectdiscovery/utils v0.4.2
github.com/stretchr/testify v1.10.0
golang.org/x/net v0.32.0
)

require (
github.com/Mzack9999/gcache v0.0.0-20230410081825-519e28eab057 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/cespare/xxhash v1.1.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/gorilla/css v1.0.1 // indirect
github.com/microcosm-cc/bluemonday v1.0.27 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect
golang.org/x/mod v0.17.0 // indirect
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
24 changes: 24 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,11 +1,35 @@
github.com/Mzack9999/gcache v0.0.0-20230410081825-519e28eab057 h1:KFac3SiGbId8ub47e7kd2PLZeACxc1LkiiNoDOFRClE=
github.com/Mzack9999/gcache v0.0.0-20230410081825-519e28eab057/go.mod h1:iLB2pivrPICvLOuROKmlqURtFIEsoJZaMidQfCG1+D4=
github.com/OneOfOne/xxhash v1.2.2 h1:KMrpdQIwFcEqXDklaen+P1axHaj9BSKzvpUUfnHldSE=
github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU=
github.com/aymerick/douceur v0.2.0 h1:Mv+mAeH1Q+n9Fr+oyamOlAkUNPWPlA8PPGR0QAaYuPk=
github.com/aymerick/douceur v0.2.0/go.mod h1:wlT5vV2O3h55X9m7iVYN0TBM0NH/MmbLnd30/FjWUq4=
github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8=
github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0=
github.com/microcosm-cc/bluemonday v1.0.27 h1:MpEUotklkwCSLeH+Qdx1VJgNqLlpY2KXwXFM08ygZfk=
github.com/microcosm-cc/bluemonday v1.0.27/go.mod h1:jFi9vgW+H7c3V0lb6nR74Ib/DIB5OBs92Dimizgw2cA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/projectdiscovery/utils v0.4.2 h1:O/G7vaH1XFxC5W/XdGnTvRkM4NpMnzVmRo8wNLSsbM4=
github.com/projectdiscovery/utils v0.4.2/go.mod h1:xqbN5BcMwIYK7dU0VObVPiyNrpp5UEgt/1OyzqPIXmU=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA=
github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 h1:qLC7fQah7D6K1B0ujays3HV9gkFtllcxhzImRR7ArPQ=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA=
golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI=
golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs=
golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg=
golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
Expand Down
4 changes: 2 additions & 2 deletions patterns.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ func ParsePattern(pattern string) (*ParsedPattern, error) {

regexPattern = strings.ReplaceAll(regexPattern, "/", "\\/")
regexPattern = strings.ReplaceAll(regexPattern, "\\+", "__escapedPlus__")
regexPattern = strings.ReplaceAll(regexPattern, "+", "{1,250}")
regexPattern = strings.ReplaceAll(regexPattern, "*", "{0,250}")
regexPattern = strings.ReplaceAll(regexPattern, "+", "{1,100}")
regexPattern = strings.ReplaceAll(regexPattern, "*", "{0,100}")
regexPattern = strings.ReplaceAll(regexPattern, "__escapedPlus__", "\\+")

var err error
Expand Down
6 changes: 3 additions & 3 deletions patterns_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ func TestParsePattern(t *testing.T) {
{
name: "Basic pattern",
input: "Mage.*",
expectedRegex: "(?i)Mage.{0,250}",
expectedRegex: "(?i)Mage.{0,100}",
expectedConf: 100,
},
{
name: "With confidence",
input: "Mage.*\\;confidence:50",
expectedRegex: "(?i)Mage.{0,250}",
expectedRegex: "(?i)Mage.{0,100}",
expectedConf: 50,
},
{
name: "With version",
input: "jquery-([0-9.]+)\\.js\\;version:\\1",
expectedRegex: "(?i)jquery-([0-9.]{1,250})\\.js",
expectedRegex: "(?i)jquery-([0-9.]{1,100})\\.js",
expectedConf: 100,
expectedVer: "\\1",
},
Expand Down
49 changes: 48 additions & 1 deletion tech.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@ package wappalyzer
import (
"bytes"
"encoding/json"
"fmt"
"strings"

"github.com/projectdiscovery/utils/memoize"
)

// Wappalyze is a client for working with tech detection
type Wappalyze struct {
original *Fingerprints
fingerprints *CompiledFingerprints
memocache *memoize.Memoizer
}

// New creates a new tech detection instance
Expand All @@ -24,6 +28,12 @@ func New() (*Wappalyze, error) {
if err != nil {
return nil, err
}

wappalyze.memocache, err = memoize.New(memoize.WithMaxSize(1024))
if err != nil {
return nil, err
}

return wappalyze, nil
}

Expand Down Expand Up @@ -58,6 +68,40 @@ func (s *Wappalyze) loadFingerprints() error {
// Body should not be mutated while this function is being called, or it may
// lead to unexpected things.
func (s *Wappalyze) Fingerprint(headers map[string][]string, body []byte) map[string]struct{} {
result, _, _ := s.memocache.Do(fmt.Sprintf("%s-%s", headers, body), func() (interface{}, error) {
uniqueFingerprints := NewUniqueFingerprints()

// Lowercase everything that we have received to check
normalizedBody := bytes.ToLower(body)
normalizedHeaders := s.normalizeHeaders(headers)

// Run header based fingerprinting if the number
// of header checks if more than 0.
for _, app := range s.checkHeaders(normalizedHeaders) {
uniqueFingerprints.SetIfNotExists(app.application, app.version, app.confidence)
}

cookies := s.findSetCookie(normalizedHeaders)
// Run cookie based fingerprinting if we have a set-cookie header
if len(cookies) > 0 {
for _, app := range s.checkCookies(cookies) {
uniqueFingerprints.SetIfNotExists(app.application, app.version, app.confidence)
}
}

// Check for stuff in the body finally
bodyTech := s.checkBody(normalizedBody)
for _, app := range bodyTech {
uniqueFingerprints.SetIfNotExists(app.application, app.version, app.confidence)
}
return uniqueFingerprints.GetValues(), nil
})

return result.(map[string]struct{})
}

func (s *Wappalyze) Fingerprint1(headers map[string][]string, body []byte) map[string]struct{} {
//result, _, _ := s.memocache.Do(fmt.Sprintf("%s-%s", headers, body), func() (interface{}, error) {
uniqueFingerprints := NewUniqueFingerprints()

// Lowercase everything that we have received to check
Expand All @@ -83,7 +127,10 @@ func (s *Wappalyze) Fingerprint(headers map[string][]string, body []byte) map[st
for _, app := range bodyTech {
uniqueFingerprints.SetIfNotExists(app.application, app.version, app.confidence)
}
return uniqueFingerprints.GetValues()
return uniqueFingerprints.GetValues() //, nil
// })

// return result.(map[string]struct{})
}

type UniqueFingerprints struct {
Expand Down
85 changes: 85 additions & 0 deletions wappalyzer_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
package wappalyzer

import (
"crypto/rand"
"fmt"
"testing"

randutil "github.com/projectdiscovery/utils/rand"
"github.com/stretchr/testify/require"
)

// generateRandomData creates random HTTP headers and body for testing
func generateRandomData() (map[string][]string, []byte, error) {
headers := make(map[string][]string)
headerCount, err := randutil.IntN(50)
if err != nil {
return nil, nil, err
}

headerCount += 50

for i := 0; i < headerCount; i++ {
headerNameI, err := randutil.IntN(20)
if err != nil {
return nil, nil, err
}
headerName := make([]byte, headerNameI+20)
headerValueI, err := randutil.IntN(80)
if err != nil {
return nil, nil, err
}
headerValue := make([]byte, headerValueI+60)
if _, err := rand.Read(headerName); err != nil {
return nil, nil, err
}
if _, err := rand.Read(headerValue); err != nil {
return nil, nil, err
}
headers[fmt.Sprintf("X-Header-%x", headerName)] = []string{fmt.Sprintf("%x", headerValue)}
}

commonHeaderServerI, err := randutil.IntN(3)
if err != nil {
return nil, nil, err
}
headers["Server"] = []string{[]string{"Apache", "nginx", "IIS"}[commonHeaderServerI]}
headers["Content-Type"] = []string{"text/html"}
commonHeaderPoweredByI, err := randutil.IntN(3)
if err != nil {
return nil, nil, err
}
headers["X-Powered-By"] = []string{[]string{"PHP/7.4", "ASP.NET", "Express"}[commonHeaderPoweredByI]}

// Generate large random body
bodySizeI, err := randutil.IntN(1024 * 1024)
if err != nil {
return nil, nil, err
}
bodySize := 1024*1024 + bodySizeI
randomBody := make([]byte, bodySize)
if _, err := rand.Read(randomBody); err != nil {
return nil, nil, err
}

html := fmt.Sprintf(`<!DOCTYPE html><html><head><title>%x</title></head><body>%x</body></html>`,
randomBody[:100], randomBody[100:])

return headers, []byte(html), nil
}

func BenchmarkWappalyzer(b *testing.B) {
wappalyzer, err := New()
require.Nil(b, err, "could not create wappalyzer")

// Generate test data once before starting the benchmark
headers, body, err := generateRandomData()
require.Nil(b, err, "could not generate random data")

b.ResetTimer()
b.SetParallelism(1) // Ensure single thread for cleaner profile

for i := 0; i < b.N; i++ {
wappalyzer.Fingerprint(headers, body)
}
}
Loading