Skip to content

Commit

Permalink
Merge pull request #144 from kolyshkin/unescape-speedup
Browse files Browse the repository at this point in the history
mountinfo: simpify, speedup, and fix unescape
  • Loading branch information
thaJeztah authored Sep 9, 2024
2 parents 5447519 + 4446de2 commit a9393fd
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 87 deletions.
91 changes: 32 additions & 59 deletions mountinfo/mountinfo_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) {
s := bufio.NewScanner(r)
out := []*Info{}
for s.Scan() {
var err error

/*
See http://man7.org/linux/man-pages/man5/proc.5.html
Expand Down Expand Up @@ -85,29 +83,15 @@ func GetMountsFromReader(r io.Reader, filter FilterFunc) ([]*Info, error) {
Parent: toInt(fields[1]),
Major: toInt(major),
Minor: toInt(minor),
Root: unescape(fields[3]),
Mountpoint: unescape(fields[4]),
Options: fields[5],
Optional: strings.Join(fields[6:sepIdx], " "), // zero or more optional fields
FSType: unescape(fields[sepIdx+1]),
Source: unescape(fields[sepIdx+2]),
VFSOptions: fields[sepIdx+3],
}

p.Mountpoint, err = unescape(fields[4])
if err != nil {
return nil, fmt.Errorf("parsing '%s' failed: mount point: %w", fields[4], err)
}
p.FSType, err = unescape(fields[sepIdx+1])
if err != nil {
return nil, fmt.Errorf("parsing '%s' failed: fstype: %w", fields[sepIdx+1], err)
}
p.Source, err = unescape(fields[sepIdx+2])
if err != nil {
return nil, fmt.Errorf("parsing '%s' failed: source: %w", fields[sepIdx+2], err)
}

p.Root, err = unescape(fields[3])
if err != nil {
return nil, fmt.Errorf("parsing '%s' failed: root: %w", fields[3], err)
}

// Run the filter after parsing all fields.
var skip, stop bool
if filter != nil {
Expand Down Expand Up @@ -188,62 +172,51 @@ func PidMountInfo(pid int) ([]*Info, error) {
return GetMountsFromReader(f, nil)
}

// A few specific characters in mountinfo path entries (root and mountpoint)
// are escaped using a backslash followed by a character's ascii code in octal.
// Some characters in some mountinfo fields may be escaped using a backslash
// followed by a three octal digits of the character's ASCII code \NNN, where
// N is 0-7, for example:
//
// space -- as \040
// tab (aka \t) -- as \011
// newline (aka \n) -- as \012
// backslash (aka \\) -- as \134
// hash (aka #) -- as \043
//
// This function converts path from mountinfo back, i.e. it unescapes the above sequences.
func unescape(path string) (string, error) {
// try to avoid copying
// This function converts all such escape sequences back to ASCII, and returns
// the unescaped string.
func unescape(path string) string {
// Try to avoid copying.
if strings.IndexByte(path, '\\') == -1 {
return path, nil
return path
}

// The following code is UTF-8 transparent as it only looks for some
// specific characters (backslash and 0..7) with values < utf8.RuneSelf,
// and everything else is passed through as is.
// specific characters (backslash and 0..7) with values less than
// utf8.RuneSelf, and everything else is passed through as is.
buf := make([]byte, len(path))
bufLen := 0
for i := 0; i < len(path); i++ {
if path[i] != '\\' {
buf[bufLen] = path[i]
bufLen++
continue
}
s := path[i:]
if len(s) < 4 {
// too short
return "", fmt.Errorf("bad escape sequence %q: too short", s)
}
c := s[1]
switch c {
case '0', '1', '2', '3', '4', '5', '6', '7':
v := c - '0'
for j := 2; j < 4; j++ { // one digit already; two more
if s[j] < '0' || s[j] > '7' {
return "", fmt.Errorf("bad escape sequence %q: not a digit", s[:3])
}
x := s[j] - '0'
v = (v << 3) | x
}
if v > 255 {
return "", fmt.Errorf("bad escape sequence %q: out of range" + s[:3])
}
buf[bufLen] = v
bufLen++
c := path[i]
// Look for \NNN, i.e. a backslash followed by three octal
// digits. Maximum value is 177 (equals utf8.RuneSelf-1).
if c == '\\' && i+3 < len(path) &&
(path[i+1] == '0' || path[i+1] == '1') &&
(path[i+2] >= '0' && path[i+2] <= '7') &&
(path[i+3] >= '0' && path[i+3] <= '7') {
// Convert from ASCII to numeric values.
c1 := path[i+1] - '0'
c2 := path[i+2] - '0'
c3 := path[i+3] - '0'
// Each octal digit is three bits, thus the shift value.
c = c1<<6 | c2<<3 | c3
// We read three extra bytes of input.
i += 3
continue
default:
return "", fmt.Errorf("bad escape sequence %q: not a digit" + s[:3])

}
buf[bufLen] = c
bufLen++
}

return string(buf[:bufLen]), nil
return string(buf[:bufLen])
}

// toInt converts a string to an int, and ignores any numbers parsing errors,
Expand Down
83 changes: 55 additions & 28 deletions mountinfo/mountinfo_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -731,43 +731,70 @@ func TestParseMountinfoExtraCases(t *testing.T) {
}

func TestUnescape(t *testing.T) {
// When adding test cases below, be aware that Go interprets \NNN
// inside strings enclosed in double quotes in the same way as the
// function being tested, so:
// - for input: either escape every backslash character (i.e. \\), or
// enclose the whole string in `backticks` so \NNN is passed as-is;
// - for output: write it like "\040", which is identical to " ".
testCases := []struct {
input, output string
isErr bool
}{
{"", "", false},
{"/", "/", false},
{"/some/longer/path", "/some/longer/path", false},
{"/path\\040with\\040spaces", "/path with spaces", false},
{"/path/with\\134backslash", "/path/with\\backslash", false},
{"/tab\\011in/path", "/tab\tin/path", false},
{`/path/"with'quotes`, `/path/"with'quotes`, false},
{`/path/"with'quotes,\040space,\011tab`, `/path/"with'quotes, space, tab`, false},
{`\12`, "", true},
{`\134`, `\`, false},
{`"'"'"'`, `"'"'"'`, false},
{`/\1345`, `/\5`, false},
{`/\12x`, "", true},
{`\0`, "", true},
{`\x`, "", true},
{"\\\\", "", true},
{"", ""},
{"/", "/"},
{"/some/longer/path", "/some/longer/path"},
{`/path\040with\040spaces`, "/path\040with\040spaces"},
{"/path/with\\134backslash", "/path/with\\backslash"},
{"/tab\\011in/path", "/tab\011in/path"},
{`/path/"with'quotes`, `/path/"with'quotes`},
{`/path/"with'quotes,\040space,\011tab`, `/path/"with'quotes, space, tab`},
{`\12`, `\12`}, // Not enough digits.
{`\134`, `\`}, // Backslash.
{`"'"'"'`, `"'"'"'`},
{`/\1345`, `/\5`}, // Backslash with extra digit.
{`/\12x`, `/\12x`},
{`\0`, `\0`}, // Not enough digits.
{`\000\000`, "\000\000"}, // NUL (min allowed ASCII value).
{`\x`, `\x`},
{"\\\\", "\\\\"},
{`\177`, "\177"}, // Max allowed ASCII value.
{`\222`, `\222`}, // Too large value -- not unescaped.
{`Это\040комон\040какой-то`, "Это комон какой-то"}, // Some UTF-8 -- not unescaped.
}

for _, tc := range testCases {
res, err := unescape(tc.input)
if tc.isErr == true {
if err == nil {
t.Errorf("Input %q, want error, got nil", tc.input)
}
// no more checks
continue
}
res := unescape(tc.input)
if res != tc.output {
t.Errorf("Input %q, want %q, got %q", tc.input, tc.output, res)
}
if err != nil {
t.Errorf("Input %q, want nil, got error %v", tc.input, err)
continue
}
}

func BenchmarkUnescape(b *testing.B) {
testCases := []string{
"",
"/",
"/some/longer/path",
"/path\\040with\\040spaces",
"/path/with\\134backslash",
"/tab\\011in/path",
`/path/"with'quotes`,
`/path/"with'quotes,\040space,\011tab`,
`\12`,
`\134`,
`"'"'"'`,
`/\1345`,
`/\12x`,
`\0`,
`\x`,
"\\\\",
}

b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for x := 0; x < len(testCases); x++ {
_ = unescape(testCases[x])
}
}
}

0 comments on commit a9393fd

Please sign in to comment.