Skip to content

Commit

Permalink
mountinfo: simplify/speedup unescape
Browse files Browse the repository at this point in the history
It has been pointed out that unescape implementation is unnecessarily
complex, and I tend to agree. Plus, we never expect any errors during
unescaping, and if there are any, we can always return the string as is.

So, drop the error return, and simplify the algorithm. Modify the test
case and the benchmark accordingly.

NOTE this also fixes the issue of trying to convert escape sequences
where the character code is >= utf8.RuneSelf (128 decimal, 0200 octal).
Adding those to the output may result in invalid utf-8 strings. The new
code doesn't try to unescape those (and the tests are amended to check
that).

Finally, add some more test cases.

Here's benchstat output:
name               old time/op    new time/op    delta
Unescape-20           851ns ± 1%     358ns ± 3%  -57.94%  (p=0.008 n=5+5)

name               old alloc/op   new alloc/op   delta
Unescape-20            640B ± 0%      255B ± 0%  -60.16%  (p=0.000 n=5+4)

name               old allocs/op  new allocs/op  delta
Unescape-20            31.0 ± 0%      21.0 ± 0%  -32.26%  (p=0.008 n=5+5)

Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
kolyshkin committed Jul 26, 2024
1 parent 0015f62 commit 4733520
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 89 deletions.
88 changes: 29 additions & 59 deletions mountinfo/mountinfo_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) {
s := bufio.NewScanner(r)
out := []*Info{}
for s.Scan() {
var err error

/*
See http://man7.org/linux/man-pages/man5/proc.5.html
Expand Down Expand Up @@ -85,29 +83,15 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) {
Parent: toInt(fields[1]),
Major: toInt(major),
Minor: toInt(minor),
Root: unescape(fields[3]),
Mountpoint: unescape(fields[4]),
Options: fields[5],
Optional: strings.Join(fields[6:sepIdx], " "), // zero or more optional fields
FSType: unescape(fields[sepIdx+1]),
Source: unescape(fields[sepIdx+2]),
VFSOptions: fields[sepIdx+3],
}

p.Mountpoint, err = unescape(fields[4])
if err != nil {
return nil, fmt.Errorf("parsing '%s' failed: mount point: %w", fields[4], err)
}
p.FSType, err = unescape(fields[sepIdx+1])
if err != nil {
return nil, fmt.Errorf("parsing '%s' failed: fstype: %w", fields[sepIdx+1], err)
}
p.Source, err = unescape(fields[sepIdx+2])
if err != nil {
return nil, fmt.Errorf("parsing '%s' failed: source: %w", fields[sepIdx+2], err)
}

p.Root, err = unescape(fields[3])
if err != nil {
return nil, fmt.Errorf("parsing '%s' failed: root: %w", fields[3], err)
}

// Run the filter after parsing all fields.
var skip, stop bool
if filter != nil {
Expand Down Expand Up @@ -188,62 +172,48 @@ func PidMountInfo(pid int) ([]*Info, error) {
return GetMountsFromReader(f, nil)
}

// A few specific characters in mountinfo path entries (root and mountpoint)
// are escaped using a backslash followed by a character's ascii code in octal.
// Some characters in some mountinfo fields may be escaped using a backslash
// followed by a three octal digits of the character's ASCII code \NNN, where
// N is 0-7, for example:
//
// space -- as \040
// tab (aka \t) -- as \011
// newline (aka \n) -- as \012
// backslash (aka \\) -- as \134
// hash (aka #) -- as \043
//
// This function converts path from mountinfo back, i.e. it unescapes the above sequences.
func unescape(path string) (string, error) {
// try to avoid copying
// This function converts all such escape sequences back to ASCII, and returns
// the unescaped string.
func unescape(path string) string {
// Try to avoid copying.
if strings.IndexByte(path, '\\') == -1 {
return path, nil
return path
}

// The following code is UTF-8 transparent as it only looks for some
// specific characters (backslash and 0..7) with values < utf8.RuneSelf,
// and everything else is passed through as is.
// specific characters (backslash and 0..7) with values less than
// utf8.RuneSelf, and everything else is passed through as is.
buf := make([]byte, len(path))
bufLen := 0
for i := 0; i < len(path); i++ {
if path[i] != '\\' {
buf[bufLen] = path[i]
bufLen++
continue
}
s := path[i:]
if len(s) < 4 {
// too short
return "", fmt.Errorf("bad escape sequence %q: too short", s)
}
c := s[1]
switch c {
case '0', '1', '2', '3', '4', '5', '6', '7':
v := c - '0'
for j := 2; j < 4; j++ { // one digit already; two more
if s[j] < '0' || s[j] > '7' {
return "", fmt.Errorf("bad escape sequence %q: not a digit", s[:3])
}
x := s[j] - '0'
v = (v << 3) | x
}
if v > 255 {
return "", fmt.Errorf("bad escape sequence %q: out of range" + s[:3])
}
buf[bufLen] = v
bufLen++
c := path[i]
// Looking for \\ followed by three octal digits,
// maximum value is 177 (less than utf8.RuneSelf).
if c == '\\' && i+3 < len(path) &&
(path[i+1] == '0' || path[i+1] == '1') &&
(path[i+2] >= '0' && path[i+2] <= '7') &&
(path[i+3] >= '0' && path[i+3] <= '7') {
c1 := path[i+1] - '0'
c2 := path[i+2] - '0'
c3 := path[i+3] - '0'
c = c1<<6 | c2<<3 | c3
i += 3
continue
default:
return "", fmt.Errorf("bad escape sequence %q: not a digit" + s[:3])

}
buf[bufLen] = c
bufLen++
}

return string(buf[:bufLen]), nil
return string(buf[:bufLen])
}

// toInt converts a string to an int, and ignores any numbers parsing errors,
Expand Down
58 changes: 28 additions & 30 deletions mountinfo/mountinfo_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -733,42 +733,40 @@ func TestParseMountinfoExtraCases(t *testing.T) {
func TestUnescape(t *testing.T) {
testCases := []struct {
input, output string
isErr bool
}{
{"", "", false},
{"/", "/", false},
{"/some/longer/path", "/some/longer/path", false},
{"/path\\040with\\040spaces", "/path with spaces", false},
{"/path/with\\134backslash", "/path/with\\backslash", false},
{"/tab\\011in/path", "/tab\tin/path", false},
{`/path/"with'quotes`, `/path/"with'quotes`, false},
{`/path/"with'quotes,\040space,\011tab`, `/path/"with'quotes, space, tab`, false},
{`\12`, "", true},
{`\134`, `\`, false},
{`"'"'"'`, `"'"'"'`, false},
{`/\1345`, `/\5`, false},
{`/\12x`, "", true},
{`\0`, "", true},
{`\x`, "", true},
{"\\\\", "", true},
// NOTE Go interprets \NNN inside double quotes exactly
// the same way as the function being tested, so:
// - for input strings, we must either escape all backslash
// characters ("\\") or use backticks `\`;
// - for output strings, we can write something like "\040",
// which is identical to " ".
{"", ""},
{"/", "/"},
{"/some/longer/path", "/some/longer/path"},
{`/path\040with\040spaces`, "/path\040with\040spaces"},
{"/path/with\\134backslash", "/path/with\\backslash"},
{"/tab\\011in/path", "/tab\011in/path"},
{`/path/"with'quotes`, `/path/"with'quotes`},
{`/path/"with'quotes,\040space,\011tab`, `/path/"with'quotes, space, tab`},
{`\12`, `\12`}, // Not enough digits.
{`\134`, `\`}, // Backslash.
{`"'"'"'`, `"'"'"'`},
{`/\1345`, `/\5`}, // Backslash with extra digit.
{`/\12x`, `/\12x`},
{`\0`, `\0`}, // Not enough digits.
{`\000\000`, "\000\000"}, // NUL (min allowed ASCII value).
{`\x`, `\x`},
{"\\\\", "\\\\"},
{`\177`, "\177"}, // Max allowed ASCII value.
{`\222`, `\222`}, // Too large value.
{`Это\040камон\040какой-то`, "Это камон какой-то"}, // Some UTF-8.
}

for _, tc := range testCases {
res, err := unescape(tc.input)
if tc.isErr == true {
if err == nil {
t.Errorf("Input %q, want error, got nil", tc.input)
}
// no more checks
continue
}
res := unescape(tc.input)
if res != tc.output {
t.Errorf("Input %q, want %q, got %q", tc.input, tc.output, res)
}
if err != nil {
t.Errorf("Input %q, want nil, got error %v", tc.input, err)
continue
}
}
}

Expand Down Expand Up @@ -796,7 +794,7 @@ func BenchmarkUnescape(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for x := 0; x < len(testCases); x++ {
_, _ = unescape(testCases[x])
_ = unescape(testCases[x])
}
}
}

0 comments on commit 4733520

Please sign in to comment.