fix: Include whitespaces in extracted tokens (#13738)

Co-authored-by: Travis Patterson <travis.patterson@grafana.com>
grafana · Aug 6, 2024 · 7683a79 · 7683a79
1 parent da63ca7
commit 7683a79
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 56 deletions.
diff --git a/pkg/pattern/tokenization/tokenization.go b/pkg/pattern/tokenization/tokenization.go
@@ -1,7 +1,6 @@
 package tokenization
 
 import (
-	"bytes"
 	"unsafe"
 )
 
@@ -29,15 +28,15 @@ type tokenizer struct {
 	tokens []string
 }
 
-func (t *tokenizer) countOrSaveToken(endTokenPos, skip int) {
+func (t *tokenizer) countOrSaveToken(endTokenPos int) {
 	if t.tokens != nil {
 		// Intentionally written like this and not with append(), so this can
 		// panic if we ever exceed the preallocated slice size, since that means
 		// we have a nasty bug in handleNextToken() below.
 		t.tokens[t.tokenCount] = t.line[t.tpos:endTokenPos]
 	}
 	t.tokenCount++
-	t.tpos = endTokenPos + skip
+	t.tpos = endTokenPos
 }
 
 func (t *tokenizer) handleNextToken() bool {
@@ -55,7 +54,7 @@ func (t *tokenizer) handleNextToken() bool {
 		// outside of a quoted string.
 		case escaped:
 			if curQuotePos < 0 && delimiters[c] {
-				t.countOrSaveToken(p, 1)
+				t.countOrSaveToken(p + 1)
 				return true
 			} else {
 				escaped = false
@@ -89,7 +88,7 @@ func (t *tokenizer) handleNextToken() bool {
 		// If we encounter a delimiter outside of a quote, count or save the
 		// token and skip the delimiter.
 		case delimiters[c]:
-			t.countOrSaveToken(p, 1)
+			t.countOrSaveToken(p + 1)
 			return true
 
 		// Handle likely JSON object keys that have been serialized without
@@ -108,11 +107,11 @@ func (t *tokenizer) handleNextToken() bool {
 		// wasn't a delimiter right before the comma.
 		case t.maybeJSON && p > t.tpos && (c == ':' || c == ',') && p+1 < lineLen:
 			if c == ':' && t.line[p-1] == '"' && !delimiters[t.line[p+1]] {
-				t.countOrSaveToken(p+1, 0)
+				t.countOrSaveToken(p + 1)
 				return true
 			}
 			if c == ',' && t.line[p+1] == '"' {
-				t.countOrSaveToken(p, 0)
+				t.countOrSaveToken(p)
 				return true
 			}
 		}
@@ -126,12 +125,12 @@ func (t *tokenizer) handleNextToken() bool {
 	// unterminated quote and the quote itself as a single token, and continue
 	// fairly normally from there.
 	if curQuotePos > 0 {
-		t.countOrSaveToken(curQuotePos+1, 0)
+		t.countOrSaveToken(curQuotePos + 1)
 		return true
 	}
 
 	if t.tpos < len(t.line) {
-		t.countOrSaveToken(len(t.line), 0)
+		t.countOrSaveToken(len(t.line))
 		return true
 	}
 
@@ -193,8 +192,6 @@ func (t *tokenizer) tokenize() []string {
 }
 
 func PreprocessAndTokenize(content []byte) []string {
-	content = bytes.TrimSpace(content)
-
 	t := tokenizer{rawLine: content, maxTokens: 100} // TODO: parametrize maxTokens
 
 	return t.tokenize()