Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve heredoc parsing to allow more generic shell-words #2213

Merged
merged 1 commit into from
Jul 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 43 additions & 14 deletions frontend/dockerfile/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ var (
reWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
reDirectives = regexp.MustCompile(`^#\s*([a-zA-Z][a-zA-Z0-9]*)\s*=\s*(.+?)\s*$`)
reComment = regexp.MustCompile(`^#.*$`)
reHeredoc = regexp.MustCompile(`^(\d*)<<(-?)(['"]?)([a-zA-Z][a-zA-Z0-9]*)(['"]?)$`)
reHeredoc = regexp.MustCompile(`^(\d*)<<(-?)([^<]*)$`)
reLeadingTabs = regexp.MustCompile(`(?m)^\t+`)
)

Expand Down Expand Up @@ -399,30 +399,57 @@ func Parse(rwc io.Reader) (*Result, error) {
}, withLocation(handleScannerError(scanner.Err()), currentLine, 0)
}

// Extracts a heredoc from a possible heredoc regex match
func heredocFromMatch(match []string) (*Heredoc, error) {
if len(match) == 0 {
return nil, nil
}

fileDescriptor, _ := strconv.ParseUint(match[1], 10, 0)
fd, _ := strconv.ParseUint(match[1], 10, 0)
chomp := match[2] == "-"
quoteOpen := match[3]
name := match[4]
quoteClose := match[5]

expand := true
if quoteOpen != "" || quoteClose != "" {
if quoteOpen != quoteClose {
return nil, errors.New("quoted heredoc quotes do not match")
}
expand = false
rest := match[3]

if len(rest) == 0 {
return nil, nil
}

shlex := shell.NewLex('\\')
shlex.SkipUnsetEnv = true

// Attempt to parse both the heredoc both with *and* without quotes.
// If there are quotes in one but not the other, then we know that some
// part of the heredoc word is quoted, so we shouldn't expand the content.
shlex.RawQuotes = false
words, err := shlex.ProcessWords(rest, []string{})
if err != nil {
return nil, err
}
// quick sanity check that rest is a single word
if len(words) != 1 {
return nil, nil
}

shlex.RawQuotes = true
wordsRaw, err := shlex.ProcessWords(rest, []string{})
if err != nil {
return nil, err
}
if len(wordsRaw) != len(words) {
return nil, fmt.Errorf("internal lexing of heredoc produced inconsistent results: %s", rest)
}

word := words[0]
wordQuoteCount := strings.Count(word, `'`) + strings.Count(word, `"`)
wordRaw := wordsRaw[0]
wordRawQuoteCount := strings.Count(wordRaw, `'`) + strings.Count(wordRaw, `"`)

expand := wordQuoteCount == wordRawQuoteCount

return &Heredoc{
Name: name,
Name: word,
Expand: expand,
Chomp: chomp,
FileDescriptor: uint(fileDescriptor),
FileDescriptor: uint(fd),
}, nil
}

Expand All @@ -437,6 +464,8 @@ func MustParseHeredoc(src string) *Heredoc {
func heredocsFromLine(line string) ([]Heredoc, error) {
shlex := shell.NewLex('\\')
shlex.RawQuotes = true
shlex.RawEscapes = true
shlex.SkipUnsetEnv = true
words, _ := shlex.ProcessWords(line, []string{})

var docs []Heredoc
Expand Down
85 changes: 85 additions & 0 deletions frontend/dockerfile/parser/parser_heredoc_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,33 @@ Y
X
X
Y

RUN <<COMPLEX python3
print('hello world')
COMPLEX

COPY <<file.txt /dest
hello world
file.txt

RUN <<eo'f'
echo foo
eof

RUN <<eo\'f
echo foo
eo'f

RUN <<'e'o\'f
echo foo
eo'f

RUN <<'one two'
echo bar
one two

RUN <<$EOF
$EOF
`)

tests := [][]Heredoc{
Expand Down Expand Up @@ -196,6 +223,62 @@ Y
Expand: true,
},
},
{
// RUN <<COMPLEX python3
{
Name: "COMPLEX",
Content: "print('hello world')\n",
Expand: true,
},
},
{
// COPY <<file.txt /dest
{
Name: "file.txt",
Content: "hello world\n",
Expand: true,
},
},
{
// RUN <<eo'f'
{
Name: "eof",
Content: "echo foo\n",
Expand: false,
},
},
{
// RUN <<eo\'f
{
Name: "eo'f",
Content: "echo foo\n",
Expand: true,
},
},
{
// RUN <<'e'o\'f
{
Name: "eo'f",
Content: "echo foo\n",
Expand: false,
},
},
{
// RUN <<'one two'
{
Name: "one two",
Content: "echo bar\n",
Expand: false,
},
},
{
// RUN <<$EOF
{
Name: "$EOF",
Content: "",
Expand: true,
},
},
}

result, err := Parse(dockerfile)
Expand Down Expand Up @@ -238,6 +321,7 @@ func TestParseHeredocHelpers(t *testing.T) {
"<<-EOF",
"<<-'EOF'",
`<<-"EOF"`,
`<<EO"F"`,
}
invalidHeredocs := []string{
"<<'EOF",
Expand All @@ -252,6 +336,7 @@ func TestParseHeredocHelpers(t *testing.T) {
"<<-",
"<EOF",
"<<<EOF",
"<<EOF sh",
}
for _, src := range notHeredocs {
heredoc, err := ParseHeredoc(src)
Expand Down
11 changes: 11 additions & 0 deletions frontend/dockerfile/shell/lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
type Lex struct {
escapeToken rune
RawQuotes bool
RawEscapes bool
SkipUnsetEnv bool
}

Expand Down Expand Up @@ -65,6 +66,7 @@ func (s *Lex) process(word string, env map[string]string) (string, []string, err
escapeToken: s.escapeToken,
skipUnsetEnv: s.SkipUnsetEnv,
rawQuotes: s.RawQuotes,
rawEscapes: s.RawEscapes,
}
sw.scanner.Init(strings.NewReader(word))
return sw.process(word)
Expand All @@ -75,6 +77,7 @@ type shellWord struct {
envs map[string]string
escapeToken rune
rawQuotes bool
rawEscapes bool
skipUnsetEnv bool
}

Expand Down Expand Up @@ -168,6 +171,10 @@ func (sw *shellWord) processStopOn(stopChar rune) (string, []string, error) {
ch = sw.scanner.Next()

if ch == sw.escapeToken {
if sw.rawEscapes {
words.addRawChar(ch)
}

// '\' (default escape token, but ` allowed) escapes, except end of line
ch = sw.scanner.Next()

Expand Down Expand Up @@ -260,6 +267,10 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
default:
ch := sw.scanner.Next()
if ch == sw.escapeToken {
if sw.rawEscapes {
result.WriteRune(ch)
}

switch sw.scanner.Peek() {
case scanner.EOF:
// Ignore \ at end of word
Expand Down