moby · thaJeztah · Sep 9, 2024 · Jul 21, 2024 · Jul 26, 2024 · thaJeztah
diff --git a/mountinfo/mountinfo_linux.go b/mountinfo/mountinfo_linux.go
@@ -24,8 +24,6 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) {
 	s := bufio.NewScanner(r)
 	out := []*Info{}
 	for s.Scan() {
-		var err error
-
 		/*
 		   See http://man7.org/linux/man-pages/man5/proc.5.html
 
@@ -85,29 +83,15 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) {
 			Parent:     toInt(fields[1]),
 			Major:      toInt(major),
 			Minor:      toInt(minor),
+			Root:       unescape(fields[3]),
+			Mountpoint: unescape(fields[4]),
 			Options:    fields[5],
 			Optional:   strings.Join(fields[6:sepIdx], " "), // zero or more optional fields
+			FSType:     unescape(fields[sepIdx+1]),
+			Source:     unescape(fields[sepIdx+2]),
 			VFSOptions: fields[sepIdx+3],
 		}
 
-		p.Mountpoint, err = unescape(fields[4])
-		if err != nil {
-			return nil, fmt.Errorf("parsing '%s' failed: mount point: %w", fields[4], err)
-		}
-		p.FSType, err = unescape(fields[sepIdx+1])
-		if err != nil {
-			return nil, fmt.Errorf("parsing '%s' failed: fstype: %w", fields[sepIdx+1], err)
-		}
-		p.Source, err = unescape(fields[sepIdx+2])
-		if err != nil {
-			return nil, fmt.Errorf("parsing '%s' failed: source: %w", fields[sepIdx+2], err)
-		}
-
-		p.Root, err = unescape(fields[3])
-		if err != nil {
-			return nil, fmt.Errorf("parsing '%s' failed: root: %w", fields[3], err)
-		}
-
 		// Run the filter after parsing all fields.
 		var skip, stop bool
 		if filter != nil {
@@ -188,62 +172,51 @@ func PidMountInfo(pid int) ([]*Info, error) {
 	return GetMountsFromReader(f, nil)
 }
 
-// A few specific characters in mountinfo path entries (root and mountpoint)
-// are escaped using a backslash followed by a character's ascii code in octal.
+// Some characters in some mountinfo fields may be escaped using a backslash
+// followed by a three octal digits of the character's ASCII code \NNN, where
+// N is 0-7, for example:
 //
 //	space              -- as \040
 //	tab (aka \t)       -- as \011
 //	newline (aka \n)   -- as \012
 //	backslash (aka \\) -- as \134
+//	hash (aka #)       -- as \043
 //
-// This function converts path from mountinfo back, i.e. it unescapes the above sequences.
-func unescape(path string) (string, error) {
-	// try to avoid copying
+// This function converts all such escape sequences back to ASCII, and returns
+// the unescaped string.
+func unescape(path string) string {
+	// Try to avoid copying.
 	if strings.IndexByte(path, '\\') == -1 {
-		return path, nil
+		return path
 	}
 
 	// The following code is UTF-8 transparent as it only looks for some
-	// specific characters (backslash and 0..7) with values < utf8.RuneSelf,
-	// and everything else is passed through as is.
+	// specific characters (backslash and 0..7) with values less than
+	// utf8.RuneSelf, and everything else is passed through as is.
 	buf := make([]byte, len(path))
 	bufLen := 0
 	for i := 0; i < len(path); i++ {
-		if path[i] != '\\' {
-			buf[bufLen] = path[i]
-			bufLen++
-			continue
-		}
-		s := path[i:]
-		if len(s) < 4 {
-			// too short
-			return "", fmt.Errorf("bad escape sequence %q: too short", s)
-		}
-		c := s[1]
-		switch c {
-		case '0', '1', '2', '3', '4', '5', '6', '7':
-			v := c - '0'
-			for j := 2; j < 4; j++ { // one digit already; two more
-				if s[j] < '0' || s[j] > '7' {
-					return "", fmt.Errorf("bad escape sequence %q: not a digit", s[:3])
-				}
-				x := s[j] - '0'
-				v = (v << 3) | x
-			}
-			if v > 255 {
-				return "", fmt.Errorf("bad escape sequence %q: out of range" + s[:3])
-			}
-			buf[bufLen] = v
-			bufLen++
+		c := path[i]
+		// Look for \NNN, i.e. a backslash followed by three octal
+		// digits. Maximum value is 177 (equals utf8.RuneSelf-1).
+		if c == '\\' && i+3 < len(path) &&
+			(path[i+1] == '0' || path[i+1] == '1') &&
+			(path[i+2] >= '0' && path[i+2] <= '7') &&
+			(path[i+3] >= '0' && path[i+3] <= '7') {
+			// Convert from ASCII to numeric values.
+			c1 := path[i+1] - '0'
+			c2 := path[i+2] - '0'
+			c3 := path[i+3] - '0'
+			// Each octal digit is three bits, thus the shift value.
+			c = c1<<6 | c2<<3 | c3
+			// We read three extra bytes of input.
 			i += 3
-			continue
-		default:
-			return "", fmt.Errorf("bad escape sequence %q: not a digit" + s[:3])
-
 		}
+		buf[bufLen] = c
+		bufLen++
 	}
 
-	return string(buf[:bufLen]), nil
+	return string(buf[:bufLen])
 }
 
 // toInt converts a string to an int, and ignores any numbers parsing errors,

diff --git a/mountinfo/mountinfo_linux_test.go b/mountinfo/mountinfo_linux_test.go
@@ -731,43 +731,70 @@ func TestParseMountinfoExtraCases(t *testing.T) {
 }
 
 func TestUnescape(t *testing.T) {
+	// When adding test cases below, be aware that Go interprets \NNN
+	// inside strings enclosed in double quotes in the same way as the
+	// function being tested, so:
+	//  - for input: either escape every backslash character (i.e. \\), or
+	//    enclose the whole string in `backticks` so \NNN is passed as-is;
+	//  - for output: write it like "\040", which is identical to " ".
 	testCases := []struct {
 		input, output string
-		isErr         bool
 	}{
-		{"", "", false},
-		{"/", "/", false},
-		{"/some/longer/path", "/some/longer/path", false},
-		{"/path\\040with\\040spaces", "/path with spaces", false},
-		{"/path/with\\134backslash", "/path/with\\backslash", false},
-		{"/tab\\011in/path", "/tab\tin/path", false},
-		{`/path/"with'quotes`, `/path/"with'quotes`, false},
-		{`/path/"with'quotes,\040space,\011tab`, `/path/"with'quotes, space,	tab`, false},
-		{`\12`, "", true},
-		{`\134`, `\`, false},
-		{`"'"'"'`, `"'"'"'`, false},
-		{`/\1345`, `/\5`, false},
-		{`/\12x`, "", true},
-		{`\0`, "", true},
-		{`\x`, "", true},
-		{"\\\\", "", true},
+		{"", ""},
+		{"/", "/"},
+		{"/some/longer/path", "/some/longer/path"},
+		{`/path\040with\040spaces`, "/path\040with\040spaces"},
+		{"/path/with\\134backslash", "/path/with\\backslash"},
+		{"/tab\\011in/path", "/tab\011in/path"},
+		{`/path/"with'quotes`, `/path/"with'quotes`},
+		{`/path/"with'quotes,\040space,\011tab`, `/path/"with'quotes, space,	tab`},
+		{`\12`, `\12`}, // Not enough digits.
+		{`\134`, `\`},  // Backslash.
+		{`"'"'"'`, `"'"'"'`},
+		{`/\1345`, `/\5`}, // Backslash with extra digit.
+		{`/\12x`, `/\12x`},
+		{`\0`, `\0`},             // Not enough digits.
+		{`\000\000`, "\000\000"}, // NUL (min allowed ASCII value).
+		{`\x`, `\x`},
+		{"\\\\", "\\\\"},
+		{`\177`, "\177"}, // Max allowed ASCII value.
+		{`\222`, `\222`}, // Too large value -- not unescaped.
+		{`Это\040комон\040какой-то`, "Это комон какой-то"}, // Some UTF-8 -- not unescaped.
 	}
 
 	for _, tc := range testCases {
-		res, err := unescape(tc.input)
-		if tc.isErr == true {
-			if err == nil {
-				t.Errorf("Input %q, want error, got nil", tc.input)
-			}
-			// no more checks
-			continue
-		}
+		res := unescape(tc.input)
 		if res != tc.output {
 			t.Errorf("Input %q, want %q, got %q", tc.input, tc.output, res)
 		}
-		if err != nil {
-			t.Errorf("Input %q, want nil, got error %v", tc.input, err)
-			continue
+	}
+}
+
+func BenchmarkUnescape(b *testing.B) {
+	testCases := []string{
+		"",
+		"/",
+		"/some/longer/path",
+		"/path\\040with\\040spaces",
+		"/path/with\\134backslash",
+		"/tab\\011in/path",
+		`/path/"with'quotes`,
+		`/path/"with'quotes,\040space,\011tab`,
+		`\12`,
+		`\134`,
+		`"'"'"'`,
+		`/\1345`,
+		`/\12x`,
+		`\0`,
+		`\x`,
+		"\\\\",
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		for x := 0; x < len(testCases); x++ {
+			_ = unescape(testCases[x])
 		}
 	}
 }