Skip to content

Commit

Permalink
cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
dropwhile committed Sep 5, 2019
1 parent 236ccdd commit dfc8268
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 48 deletions.
6 changes: 4 additions & 2 deletions pkg/camo/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,10 @@ func (p *Proxy) ServeHTTP(w http.ResponseWriter, req *http.Request) {

func (p *Proxy) checkURL(reqURL *url.URL) error {
// reject localhost urls
uHostname := strings.ToLower(reqURL.Hostname())
if uHostname == "" || localhostDomainProxyFilter.CheckHostname(uHostname) {
// lower case for matching is done by CheckHostname, so no need to
// ToLower here also
uHostname := reqURL.Hostname()
if uHostname == "" || localsFilter.CheckHostname(uHostname) {
return errors.New("Bad url host")
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/camo/vars.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ var rejectIPv6Networks = mustParseNetmasks(
},
)

// match for localhost
var localhostDomainProxyFilter = htrie.MustNewURLMatcherWithRules(
// match for localhost, localdomain
var localsFilter = htrie.MustNewURLMatcherWithRules(
[]string{
"|s|localhost||",
"|s|localdomain||",
Expand Down
8 changes: 1 addition & 7 deletions pkg/htrie/glob_path_chk.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,13 +132,7 @@ func (gpc *GlobPathChecker) AddRule(rule string) error {
// Note: CheckPathString requires that the url path component is already escaped,
// in a similar way to `(*url.URL).EscapePath()`, as well as TrimSpace'd.
func (gpc *GlobPathChecker) CheckPath(url string) bool {
return gpc.CheckPathPtr(&url)
}

// CheckPathPtr operates like CheckPath, but takes a *string
// (to optionally avoid extra copies)
func (gpc *GlobPathChecker) CheckPathPtr(url *string) bool {
ulen := len(*url)
ulen := len(url)

// if we have a case sensitive checker, check that one first
if gpc.csNode != nil && gpc.csNode.checkPath(url, 0, ulen) {
Expand Down
51 changes: 51 additions & 0 deletions pkg/htrie/glob_path_chk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,54 @@ func TestGlobPathCheckerPathsMisc(t *testing.T) {
assert.False(t, gpc.CheckPath(u), fmt.Sprintf("should NOT have matched: %s", u))
}
}

func BenchmarkGlobPathChecker(b *testing.B) {
rules := []string{
"|i|*/test.png",
"||/hodor/test.png",
"||/hodor/test.png.longer",
"||/hodor/bar*",
"||/hodor/ütest.png",
"||/no*/to/s*/here",
"||/i/can/s*/it*",
"||/play/*/ball/img.png",
"||/yalp*llab/img.png",
}

testMatch := []string{
"http://bar.example.com/foo/TEST.png",
"http://example.org/foo/test.png",
"http://example.org/hodor/test.png",
"http://example.org/hodor/test.png.longer",
"http://example.org/hodor/bartholemew",
"http://example.org/hodor/bart/homer.png",
"http://example.net/nothing/to/see/here",
"http://example.net/i/can/see/it/in/the/clouds/file.png",
"http://example.org/play/base/ball/img.png",
"http://example.org/yalp/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/base/llab/img.png",
"http://example.org/yalpllab/img.png",
}

gpc := NewGlobPathChecker()
for _, rule := range rules {
err := gpc.AddRule(rule)
assert.Nil(b, err)
}

var (
testIters = 10000
)

// avoid inlining optimization
var x bool
b.ResetTimer()

for _, u := range testMatch {
u, _ := url.Parse(u)
z := u.EscapedPath()
for i := 0; i < testIters; i++ {
x = gpc.CheckPath(z)
}
}
_ = x
}
20 changes: 8 additions & 12 deletions pkg/htrie/glob_path_node.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func (gpn *globPathNode) addPath(s string) error {
return nil
}

func (gpn *globPathNode) globConsume(s *string, index, mlen int) bool {
func (gpn *globPathNode) globConsume(s string, index, mlen int) bool {
// we have a glob and no follow-on chars, so we can consume
// till the end and then match. early return
if gpn.canMatch {
Expand All @@ -101,7 +101,7 @@ func (gpn *globPathNode) globConsume(s *string, index, mlen int) bool {
curnode := gpn
// don't need to iter runes since we have ascii
for i := index; i < mlen; i++ {
part := (*s)[i]
part := s[i]
// if icase, use lowercase letters for comparisons
if gpn.icase && 'A' <= part && part <= 'Z' {
part = part + 32
Expand Down Expand Up @@ -151,11 +151,11 @@ func (gpn *globPathNode) globConsume(s *string, index, mlen int) bool {
return false
}

func (gpn *globPathNode) checkPath(s *string, index, mlen int) bool {
func (gpn *globPathNode) checkPath(s string, index, mlen int) bool {
curnode := gpn
// don't need to iter runes since we have ascii
for i := index; i < mlen; i++ {
part := (*s)[i]
part := s[i]

// if icase, use lowercase letters for comparisons
if gpn.icase && 'A' <= part && part <= 'Z' {
Expand Down Expand Up @@ -230,16 +230,12 @@ func newGlobPathNode(icase bool) *globPathNode {
// 0x0061...0x007A 97-122
// 0x007E 126
// so a total possible of 85 chars, but spread out over 94 slots
// since there are quite a few slots, let's use a map for now...
// since there are quite a few possible slots, let's use a map for now...
// web searches say a map is faster in go above a certain size. benchmark later...

// for now, just use the top bounds minus low bound for array size: 126-32=94
// plus one for our globbing

// NOTE: since realloc cost is paid at creation, and we want to reduce size
// and we only care about lookup costs, just start with 0 and let it grow
// as needed.
// return &globPathNode{subtrees: make(map[int]*globPathNode, 95)}
// for now, since realloc cost is paid at creation, and we want to RSS size
// and since we only /really/ care about lookup costs, just start with 0 initial
// map size and let it grow as needed
return &globPathNode{
subtrees: make(map[byte]*globPathNode, 0),
icase: icase,
Expand Down
12 changes: 8 additions & 4 deletions pkg/htrie/htrie.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ func (dt *URLMatcher) AddRule(rule string) error {

func (dt *URLMatcher) walkFind(s string) []*URLMatcher {
// hostname should already be lowercase. avoid work by not doing it.
// hostname := strings.ToLower(s)
matches := *getURLMatcherSlice()
labels := reverse(strings.Split(s, "."))
plen := len(labels)
Expand Down Expand Up @@ -265,7 +264,8 @@ func (dt *URLMatcher) walkFind(s string) []*URLMatcher {
// If the url matches (a "hit"), it returns true.
// If the url does not match (a "miss"), it return false.
func (dt *URLMatcher) CheckURL(u *url.URL) bool {
hostname := u.Hostname()
// alas, (*url.URL).Hostname() does not ToLower
hostname := strings.ToLower(u.Hostname())
matches := dt.walkFind(hostname)
defer putURLMatcherSlice(&matches)

Expand All @@ -291,10 +291,14 @@ func (dt *URLMatcher) CheckURL(u *url.URL) bool {
}

// CheckHostname checks the supplied hostname (as a string).
// Note: CheckHostnameString requires that the hostname is already escaped,
// Note: CheckHostname requires that the hostname is already escaped,
// sanitized, space trimmed, and lowercased...
// Basically sanitized in a way similar to `(*url.URL).Hostname()`
// Basically sanitized in a way similar to:
//
// strings.ToLower((*url.URL).Hostname())
//
func (dt *URLMatcher) CheckHostname(hostname string) bool {
hostname = strings.ToLower(hostname)
matches := dt.walkFind(hostname)
defer putURLMatcherSlice(&matches)
return len(matches) > 0
Expand Down
32 changes: 11 additions & 21 deletions pkg/htrie/htrie_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,22 +156,17 @@ func BenchmarkHTrieMatch(b *testing.B) {
"|s|example.org|i|*/test.png",
}

testURLin := []string{
testURLs := []string{
"http://example.com/foo/test.png",
"http://bar.example.com/foo/test.png",
"http://bar.example.com/foo/testx.png",
"http://bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/test.png",
"http://bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/testx.png",
// this one kills the regex pretty bad.
"bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/testx.png",
}

var (
testIters = 10
testURLSize = 10000
)
testURLs := make([]string, 0)
for i := 0; i < testURLSize; i++ {
testURLs = append(testURLs, testURLin[i%3])
}
testIters := 10000

dt := NewURLMatcher()
for _, rule := range rules {
Expand All @@ -183,7 +178,6 @@ func BenchmarkHTrieMatch(b *testing.B) {
for _, u := range testURLs {
u, _ := url.Parse(u)
parsed = append(parsed, u)

}

// avoid inlining optimization
Expand All @@ -200,30 +194,26 @@ func BenchmarkHTrieMatch(b *testing.B) {

func BenchmarkRegexMatch(b *testing.B) {
rules := []string{
`^foo.example.net/test.png`,
// giving regex lots of help here, putting this rule first
`^.*\.example.com/.*/test.png`,
`^bar.example.net/test.png`,
`^foo.example.net/test.png`,
`^.*\.bar.example.net/test.png`,
`^.*\.hodor.example.net/.*/test.png`,
`^.*\.example.com/.*/test.png`,
`^(.*\.)?example.org/(?:i.*/test.png)`,
}

testURLin := []string{
testURLs := []string{
"example.com/foo/test.png",
"bar.example.com/foo/test.png",
"bar.example.com/foo/testx.png",
"bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/test.png",
"bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/testx.png",
// this one kills the regex pretty bad. :(
//"bar.example.com/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/foo/testx.png",
}

var (
testIters = 10
testURLSize = 10000
)
testURLs := make([]string, 0)
for i := 0; i < testURLSize; i++ {
testURLs = append(testURLs, testURLin[i%3])
}
testIters := 10000

rexes := make([]*regexp.Regexp, 0)
for _, r := range rules {
Expand Down

0 comments on commit dfc8268

Please sign in to comment.