From 76926c794793141df4c94023715fd2de0cef7c4e Mon Sep 17 00:00:00 2001
From: Janik Rabe <janik@cloudflare.com>
Date: Thu, 13 Jun 2024 15:13:25 +0100
Subject: [PATCH] Fix possible out-of-bounds read in endingToTxtSlice (#1557)

* Update escapedStringOffset to improve readability

This function was, admittedly, a little difficult to follow. This new
version is slightly more verbose, but, in my opinion, easier to
understand.

* Fix possible out-of-bounds read in endingToTxtSlice caused by escapedStringOffset

If the input had a trailing backslash (normally the start of an escape
sequence) with nothing following it, `escapedStringOffset` would return
the length of the input, plus one (!), as the result index, causing an
out-of-bounds read and panic in `endingToTxtSlice`.

Consistent with, e.g., commit 2230854ba97edcf29ac55a1f274e49cec11bf9bb,
I've decided to make this an error since it definitely indicates that
the string isn't valid.

Credit to OSS-Fuzz -- thank you!
---
 scan_rr.go   | 38 ++++++++++++++++++++++++--------------
 scan_test.go | 34 +++++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/scan_rr.go b/scan_rr.go
index 7d1ade7d87..c1a76995e7 100644
--- a/scan_rr.go
+++ b/scan_rr.go
@@ -55,7 +55,10 @@ func endingToTxtSlice(c *zlexer, errstr string) ([]string, *ParseError) {
 			sx := []string{}
 			p := 0
 			for {
-				i := escapedStringOffset(l.token[p:], 255)
+				i, ok := escapedStringOffset(l.token[p:], 255)
+				if !ok {
+					return nil, &ParseError{err: errstr, lex: l}
+				}
 				if i != -1 && p+i != len(l.token) {
 					sx = append(sx, l.token[p:p+i])
 				} else {
@@ -1919,29 +1922,36 @@ func (rr *APL) parse(c *zlexer, o string) *ParseError {
 
 // escapedStringOffset finds the offset within a string (which may contain escape
 // sequences) that corresponds to a certain byte offset. If the input offset is
-// out of bounds, -1 is returned.
-func escapedStringOffset(s string, byteOffset int) int {
-	if byteOffset == 0 {
-		return 0
+// out of bounds, -1 is returned (which is *not* considered an error).
+func escapedStringOffset(s string, desiredByteOffset int) (int, bool) {
+	if desiredByteOffset == 0 {
+		return 0, true
 	}
 
-	offset := 0
-	for i := 0; i < len(s); i++ {
-		offset += 1
+	currentByteOffset, i := 0, 0
+
+	for i < len(s) {
+		currentByteOffset += 1
 
 		// Skip escape sequences
 		if s[i] != '\\' {
-			// Not an escape sequence; nothing to do.
+			// Single plain byte, not an escape sequence.
+			i++
 		} else if isDDD(s[i+1:]) {
-			i += 3
+			// Skip backslash and DDD.
+			i += 4
+		} else if len(s[i+1:]) < 1 {
+			// No character following the backslash; that's an error.
+			return 0, false
 		} else {
-			i++
+			// Skip backslash and following byte.
+			i += 2
 		}
 
-		if offset >= byteOffset {
-			return i + 1
+		if currentByteOffset >= desiredByteOffset {
+			return i, true
 		}
 	}
 
-	return -1
+	return -1, true
 }
diff --git a/scan_test.go b/scan_test.go
index 580236ea26..c4f7e7f4a9 100644
--- a/scan_test.go
+++ b/scan_test.go
@@ -433,25 +433,37 @@ func TestEscapedStringOffset(t *testing.T) {
 		input          string
 		inputOffset    int
 		expectedOffset int
+		expectedOK     bool
 	}{
-		{"simple string with no escape sequences", 20, 20},
-		{"simple string with no escape sequences", 500, -1},
-		{`\;\088\\\;\120\\`, 0, 0},
-		{`\;\088\\\;\120\\`, 1, 2},
-		{`\;\088\\\;\120\\`, 2, 6},
-		{`\;\088\\\;\120\\`, 3, 8},
-		{`\;\088\\\;\120\\`, 4, 10},
-		{`\;\088\\\;\120\\`, 5, 14},
-		{`\;\088\\\;\120\\`, 6, 16},
-		{`\;\088\\\;\120\\`, 7, -1},
+		{"simple string with no escape sequences", 20, 20, true},
+		{"simple string with no escape sequences", 500, -1, true},
+		{`\;\088\\\;\120\\`, 0, 0, true},
+		{`\;\088\\\;\120\\`, 1, 2, true},
+		{`\;\088\\\;\120\\`, 2, 6, true},
+		{`\;\088\\\;\120\\`, 3, 8, true},
+		{`\;\088\\\;\120\\`, 4, 10, true},
+		{`\;\088\\\;\120\\`, 5, 14, true},
+		{`\;\088\\\;\120\\`, 6, 16, true},
+		{`\;\088\\\;\120\\`, 7, -1, true},
+		{`\`, 3, 0, false},
+		{`a\`, 3, 0, false},
+		{`aa\`, 3, 0, false},
+		{`aaa\`, 3, 3, true},
+		{`aaaa\`, 3, 3, true},
 	}
 	for i, test := range cases {
-		outputOffset := escapedStringOffset(test.input, test.inputOffset)
+		outputOffset, outputOK := escapedStringOffset(test.input, test.inputOffset)
 		if outputOffset != test.expectedOffset {
 			t.Errorf(
 				"Test %d (input %#q offset %d) returned offset %d but expected %d",
 				i, test.input, test.inputOffset, outputOffset, test.expectedOffset,
 			)
 		}
+		if outputOK != test.expectedOK {
+			t.Errorf(
+				"Test %d (input %#q offset %d) returned ok=%t but expected %t",
+				i, test.input, test.inputOffset, outputOK, test.expectedOK,
+			)
+		}
 	}
 }