cleanups

cactus · Sep 5, 2019 · dfc8268 · dfc8268
1 parent 236ccdd
commit dfc8268
Show file tree

Hide file tree

Showing 7 changed files with 85 additions and 48 deletions.
diff --git a/pkg/camo/proxy.go b/pkg/camo/proxy.go
@@ -253,8 +253,10 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, req *http.Request) {
 
 func (p *Proxy) checkURL(reqURL *url.URL) error {
 	// reject localhost urls
-	uHostname := strings.ToLower(reqURL.Hostname())
-	if uHostname == "" || localhostDomainProxyFilter.CheckHostname(uHostname) {
+	// lower case for matching is done by CheckHostname, so no need to
+	// ToLower here also
+	uHostname := reqURL.Hostname()
+	if uHostname == "" || localsFilter.CheckHostname(uHostname) {
 		return errors.New("Bad url host")
 	}
 

diff --git a/pkg/camo/vars.go b/pkg/camo/vars.go
@@ -77,8 +77,8 @@ var rejectIPv6Networks = mustParseNetmasks(
 	},
 )
 
-// match for localhost
-var localhostDomainProxyFilter = htrie.MustNewURLMatcherWithRules(
+// match for localhost, localdomain
+var localsFilter = htrie.MustNewURLMatcherWithRules(
 	[]string{
 		"|s|localhost||",
 		"|s|localdomain||",

diff --git a/pkg/htrie/glob_path_chk.go b/pkg/htrie/glob_path_chk.go
@@ -132,13 +132,7 @@ func (gpc *GlobPathChecker) AddRule(rule string) error {
 // Note: CheckPathString requires that the url path component is already escaped,
 // in a similar way to `(*url.URL).EscapePath()`, as well as TrimSpace'd.
 func (gpc *GlobPathChecker) CheckPath(url string) bool {
-	return gpc.CheckPathPtr(&url)
-}
-
-// CheckPathPtr operates like CheckPath, but takes a *string
-// (to optionally avoid extra copies)
-func (gpc *GlobPathChecker) CheckPathPtr(url *string) bool {
-	ulen := len(*url)
+	ulen := len(url)
 
 	// if we have a case sensitive checker, check that one first
 	if gpc.csNode != nil && gpc.csNode.checkPath(url, 0, ulen) {

diff --git a/pkg/htrie/glob_path_chk_test.go b/pkg/htrie/glob_path_chk_test.go
@@ -111,3 +111,54 @@ func TestGlobPathCheckerPathsMisc(t *testing.T) {
 		assert.False(t, gpc.CheckPath(u), fmt.Sprintf("should NOT have matched: %s", u))
 	}
 }
+
+func BenchmarkGlobPathChecker(b *testing.B) {
+	rules := []string{
+		"|i|*/test.png",
+		"||/hodor/test.png",
+		"||/hodor/test.png.longer",
+		"||/hodor/bar*",
+		"||/hodor/ütest.png",
+		"||/no*/to/s*/here",
+		"||/i/can/s*/it*",
+		"||/play/*/ball/img.png",
+		"||/yalp*llab/img.png",
+	}
+
+	testMatch := []string{
+		"http://bar.example.com/foo/TEST.png",
+		"http://example.org/foo/test.png",
+		"http://example.org/hodor/test.png",
+		"http://example.org/hodor/test.png.longer",
+		"http://example.org/hodor/bartholemew",
+		"http://example.org/hodor/bart/homer.png",
+		"http://example.net/nothing/to/see/here",
+		"http://example.net/i/can/see/it/in/the/clouds/file.png",
+		"http://example.org/play/base/ball/img.png",
+		"http://example.org/yalp/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/img.png",
+		"http://example.org/yalpllab/img.png",
+	}
+
+	gpc := NewGlobPathChecker()
+	for _, rule := range rules {
+		err := gpc.AddRule(rule)
+		assert.Nil(b, err)
+	}
+
+	var (
+		testIters = 10000
+	)
+
+	// avoid inlining optimization
+	var x bool
+	b.ResetTimer()
+
+	for _, u := range testMatch {
+		u, _ := url.Parse(u)
+		z := u.EscapedPath()
+		for i := 0; i < testIters; i++ {
+			x = gpc.CheckPath(z)
+		}
+	}
+	_ = x
+}
diff --git a/pkg/htrie/glob_path_node.go b/pkg/htrie/glob_path_node.go
@@ -81,7 +81,7 @@ func (gpn *globPathNode) addPath(s string) error {
 	return nil
 }
 
-func (gpn *globPathNode) globConsume(s *string, index, mlen int) bool {
+func (gpn *globPathNode) globConsume(s string, index, mlen int) bool {
 	// we have a glob and no follow-on chars, so we can consume
 	// till the end and then match. early return
 	if gpn.canMatch {
@@ -101,7 +101,7 @@ func (gpn *globPathNode) globConsume(s *string, index, mlen int) bool {
 	curnode := gpn
 	// don't need to iter runes since we have ascii
 	for i := index; i < mlen; i++ {
-		part := (*s)[i]
+		part := s[i]
 		// if icase, use lowercase letters for comparisons
 		if gpn.icase && 'A' <= part && part <= 'Z' {
 			part = part + 32
@@ -151,11 +151,11 @@ func (gpn *globPathNode) globConsume(s *string, index, mlen int) bool {
 	return false
 }
 
-func (gpn *globPathNode) checkPath(s *string, index, mlen int) bool {
+func (gpn *globPathNode) checkPath(s string, index, mlen int) bool {
 	curnode := gpn
 	// don't need to iter runes since we have ascii
 	for i := index; i < mlen; i++ {
-		part := (*s)[i]
+		part := s[i]
 
 		// if icase, use lowercase letters for comparisons
 		if gpn.icase && 'A' <= part && part <= 'Z' {
@@ -230,16 +230,12 @@ func newGlobPathNode(icase bool) *globPathNode {
 	//   0x0061...0x007A    97-122
 	//   0x007E             126
 	// so a total possible of 85 chars, but spread out over 94 slots
-	// since there are quite a few slots, let's use a map for now...
+	// since there are quite a few possible slots, let's use a map for now...
 	// web searches say a map is faster in go above a certain size. benchmark later...
 
-	// for now, just use the top bounds minus low bound for array size: 126-32=94
-	// plus one for our globbing
-
-	// NOTE: since realloc cost is paid at creation, and we want to reduce size
-	// and we only care about lookup costs, just start with 0 and let it grow
-	// as needed.
-	// return &globPathNode{subtrees: make(map[int]*globPathNode, 95)}
+	// for now, since realloc cost is paid at creation, and we want to RSS size
+	// and since we only /really/ care about lookup costs, just start with 0 initial
+	// map size and let it grow as needed
 	return &globPathNode{
 		subtrees: make(map[byte]*globPathNode, 0),
 		icase:    icase,

diff --git a/pkg/htrie/htrie.go b/pkg/htrie/htrie.go
@@ -220,7 +220,6 @@ func (dt *URLMatcher) AddRule(rule string) error {
 
 func (dt *URLMatcher) walkFind(s string) []*URLMatcher {
 	// hostname should already be lowercase. avoid work by not doing it.
-	// hostname := strings.ToLower(s)
 	matches := *getURLMatcherSlice()
 	labels := reverse(strings.Split(s, "."))
 	plen := len(labels)
@@ -265,7 +264,8 @@ func (dt *URLMatcher) walkFind(s string) []*URLMatcher {
 // If the url matches (a "hit"), it returns true.
 // If the url does not match (a "miss"), it return false.
 func (dt *URLMatcher) CheckURL(u *url.URL) bool {
-	hostname := u.Hostname()
+	// alas, (*url.URL).Hostname() does not ToLower
+	hostname := strings.ToLower(u.Hostname())
 	matches := dt.walkFind(hostname)
 	defer putURLMatcherSlice(&matches)
 
@@ -291,10 +291,14 @@ func (dt *URLMatcher) CheckURL(u *url.URL) bool {
 }
 
 // CheckHostname checks the supplied hostname (as a string).
-// Note: CheckHostnameString requires that the hostname is already escaped,
+// Note: CheckHostname requires that the hostname is already escaped,
 // sanitized, space trimmed, and lowercased...
-// Basically sanitized in a way similar to `(*url.URL).Hostname()`
+// Basically sanitized in a way similar to:
+//
+//     strings.ToLower((*url.URL).Hostname())
+//
 func (dt *URLMatcher) CheckHostname(hostname string) bool {
+	hostname = strings.ToLower(hostname)
 	matches := dt.walkFind(hostname)
 	defer putURLMatcherSlice(&matches)
 	return len(matches) > 0

diff --git a/pkg/htrie/htrie_test.go b/pkg/htrie/htrie_test.go
@@ -156,22 +156,17 @@ func BenchmarkHTrieMatch(b *testing.B) {
 		"|s|example.org|i|*/test.png",
 	}
 
-	testURLin := []string{
+	testURLs := []string{
 		"http://example.com/foo/test.png",
 		"http://bar.example.com/foo/test.png",
 		"http://bar.example.com/foo/testx.png",
 		"http://bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/test.png",
 		"http://bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/testx.png",
+		// this one kills the regex pretty bad.
+		"bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/testx.png",
 	}
 
-	var (
-		testIters   = 10
-		testURLSize = 10000
-	)
-	testURLs := make([]string, 0)
-	for i := 0; i < testURLSize; i++ {
-		testURLs = append(testURLs, testURLin[i%3])
-	}
+	testIters := 10000
 
 	dt := NewURLMatcher()
 	for _, rule := range rules {
@@ -183,7 +178,6 @@ func BenchmarkHTrieMatch(b *testing.B) {
 	for _, u := range testURLs {
 		u, _ := url.Parse(u)
 		parsed = append(parsed, u)
-
 	}
 
 	// avoid inlining optimization
@@ -200,30 +194,26 @@ func BenchmarkHTrieMatch(b *testing.B) {
 
 func BenchmarkRegexMatch(b *testing.B) {
 	rules := []string{
-		`^foo.example.net/test.png`,
+		// giving regex lots of help here, putting this rule first
+		`^.*\.example.com/.*/test.png`,
 		`^bar.example.net/test.png`,
+		`^foo.example.net/test.png`,
 		`^.*\.bar.example.net/test.png`,
 		`^.*\.hodor.example.net/.*/test.png`,
-		`^.*\.example.com/.*/test.png`,
 		`^(.*\.)?example.org/(?:i.*/test.png)`,
 	}
 
-	testURLin := []string{
+	testURLs := []string{
 		"example.com/foo/test.png",
 		"bar.example.com/foo/test.png",
 		"bar.example.com/foo/testx.png",
 		"bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/test.png",
 		"bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/testx.png",
+		// this one kills the regex pretty bad. :(
+		//"bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/testx.png",
 	}
 
-	var (
-		testIters   = 10
-		testURLSize = 10000
-	)
-	testURLs := make([]string, 0)
-	for i := 0; i < testURLSize; i++ {
-		testURLs = append(testURLs, testURLin[i%3])
-	}
+	testIters := 10000
 
 	rexes := make([]*regexp.Regexp, 0)
 	for _, r := range rules {