diff --git a/internal/txtar/archive.go b/internal/txtar/archive.go new file mode 100644 index 0000000..c384f33 --- /dev/null +++ b/internal/txtar/archive.go @@ -0,0 +1,140 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package txtar implements a trivial text-based file archive format. +// +// The goals for the format are: +// +// - be trivial enough to create and edit by hand. +// - be able to store trees of text files describing go command test cases. +// - diff nicely in git history and code reviews. +// +// Non-goals include being a completely general archive format, +// storing binary data, storing file modes, storing special files like +// symbolic links, and so on. +// +// Txtar format +// +// A txtar archive is zero or more comment lines and then a sequence of file entries. +// Each file entry begins with a file marker line of the form "-- FILENAME --" +// and is followed by zero or more file content lines making up the file data. +// The comment or file content ends at the next file marker line. +// The file marker line must begin with the three-byte sequence "-- " +// and end with the three-byte sequence " --", but the enclosed +// file name can be surrounding by additional white space, +// all of which is stripped. +// +// If the txtar file is missing a trailing newline on the final line, +// parsers should consider a final newline to be present anyway. +// +// There are no possible syntax errors in a txtar archive. +package txtar + +import ( + "bytes" + "fmt" + "io/ioutil" + "strings" +) + +// An Archive is a collection of files. +type Archive struct { + Comment []byte + Files []File +} + +// A File is a single file in an archive. +type File struct { + Name string // name of file ("foo/bar.txt") + Data []byte // text content of file +} + +// Format returns the serialized form of an Archive. +// It is assumed that the Archive data structure is well-formed: +// a.Comment and all a.File[i].Data contain no file marker lines, +// and all a.File[i].Name is non-empty. +func Format(a *Archive) []byte { + var buf bytes.Buffer + buf.Write(fixNL(a.Comment)) + for _, f := range a.Files { + fmt.Fprintf(&buf, "-- %s --\n", f.Name) + buf.Write(fixNL(f.Data)) + } + return buf.Bytes() +} + +// ParseFile parses the named file as an archive. +func ParseFile(file string) (*Archive, error) { + data, err := ioutil.ReadFile(file) + if err != nil { + return nil, err + } + return Parse(data), nil +} + +// Parse parses the serialized form of an Archive. +// The returned Archive holds slices of data. +func Parse(data []byte) *Archive { + a := new(Archive) + var name string + a.Comment, name, data = findFileMarker(data) + for name != "" { + f := File{name, nil} + f.Data, name, data = findFileMarker(data) + a.Files = append(a.Files, f) + } + return a +} + +var ( + newlineMarker = []byte("\n-- ") + marker = []byte("-- ") + markerEnd = []byte(" --") +) + +// findFileMarker finds the next file marker in data, +// extracts the file name, and returns the data before the marker, +// the file name, and the data after the marker. +// If there is no next marker, findFileMarker returns before = fixNL(data), name = "", after = nil. +func findFileMarker(data []byte) (before []byte, name string, after []byte) { + var i int + for { + if name, after = isMarker(data[i:]); name != "" { + return data[:i], name, after + } + j := bytes.Index(data[i:], newlineMarker) + if j < 0 { + return fixNL(data), "", nil + } + i += j + 1 // positioned at start of new possible marker + } +} + +// isMarker checks whether data begins with a file marker line. +// If so, it returns the name from the line and the data after the line. +// Otherwise it returns name == "" with an unspecified after. +func isMarker(data []byte) (name string, after []byte) { + if !bytes.HasPrefix(data, marker) { + return "", nil + } + if i := bytes.IndexByte(data, '\n'); i >= 0 { + data, after = data[:i], data[i+1:] + } + if !bytes.HasSuffix(data, markerEnd) { + return "", nil + } + return strings.TrimSpace(string(data[len(marker) : len(data)-len(markerEnd)])), after +} + +// If data is empty or ends in \n, fixNL returns data. +// Otherwise fixNL returns a new slice consisting of data with a final \n added. +func fixNL(data []byte) []byte { + if len(data) == 0 || data[len(data)-1] == '\n' { + return data + } + d := make([]byte, len(data)+1) + copy(d, data) + d[len(data)] = '\n' + return d +} diff --git a/internal/txtar/archive_test.go b/internal/txtar/archive_test.go new file mode 100644 index 0000000..3f734f6 --- /dev/null +++ b/internal/txtar/archive_test.go @@ -0,0 +1,67 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package txtar + +import ( + "bytes" + "fmt" + "reflect" + "testing" +) + +var tests = []struct { + name string + text string + parsed *Archive +}{ + { + name: "basic", + text: `comment1 +comment2 +-- file1 -- +File 1 text. +-- foo --- +More file 1 text. +-- file 2 -- +File 2 text. +-- empty -- +-- noNL -- +hello world`, + parsed: &Archive{ + Comment: []byte("comment1\ncomment2\n"), + Files: []File{ + {"file1", []byte("File 1 text.\n-- foo ---\nMore file 1 text.\n")}, + {"file 2", []byte("File 2 text.\n")}, + {"empty", []byte{}}, + {"noNL", []byte("hello world\n")}, + }, + }, + }, +} + +func Test(t *testing.T) { + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := Parse([]byte(tt.text)) + if !reflect.DeepEqual(a, tt.parsed) { + t.Fatalf("Parse: wrong output:\nhave:\n%s\nwant:\n%s", shortArchive(a), shortArchive(tt.parsed)) + } + text := Format(a) + a = Parse(text) + if !reflect.DeepEqual(a, tt.parsed) { + t.Fatalf("Parse after Format: wrong output:\nhave:\n%s\nwant:\n%s", shortArchive(a), shortArchive(tt.parsed)) + } + }) + } +} + +func shortArchive(a *Archive) string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "comment: %q\n", a.Comment) + for _, f := range a.Files { + fmt.Fprintf(&buf, "file %q: %q\n", f.Name, f.Data) + } + return buf.String() +} diff --git a/zip/testdata/create/bad_file_path.txt b/zip/testdata/create/bad_file_path.txt new file mode 100644 index 0000000..f905e1f --- /dev/null +++ b/zip/testdata/create/bad_file_path.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +wantErr=malformed file path "bad.go'": invalid char '\'' +-- bad.go' -- +package bad diff --git a/zip/testdata/create/bad_gomod_case.txt b/zip/testdata/create/bad_gomod_case.txt new file mode 100644 index 0000000..0a05278 --- /dev/null +++ b/zip/testdata/create/bad_gomod_case.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +wantErr=found file named GO.MOD, want all lower-case go.mod +-- GO.MOD -- +module example.com/m diff --git a/zip/testdata/create/bad_mod_path.txt b/zip/testdata/create/bad_mod_path.txt new file mode 100644 index 0000000..6d2b8a5 --- /dev/null +++ b/zip/testdata/create/bad_mod_path.txt @@ -0,0 +1,3 @@ +path=cache +version=v1.0.0 +wantErr=missing dot in first path element diff --git a/zip/testdata/create/bad_mod_path_version_suffix.txt b/zip/testdata/create/bad_mod_path_version_suffix.txt new file mode 100644 index 0000000..bcc18e4 --- /dev/null +++ b/zip/testdata/create/bad_mod_path_version_suffix.txt @@ -0,0 +1,3 @@ +path=example.com/m +version=v2.0.0 +wantErr=invalid version: should be v0 or v1, not v2 diff --git a/zip/testdata/create/bad_version.txt b/zip/testdata/create/bad_version.txt new file mode 100644 index 0000000..f18ca44 --- /dev/null +++ b/zip/testdata/create/bad_version.txt @@ -0,0 +1,3 @@ +path=example.com/m +version=v1.0.0+bad +wantErr=version "v1.0.0+bad" is not canonical (should be "v1.0.0") diff --git a/zip/testdata/create/dup_file.txt b/zip/testdata/create/dup_file.txt new file mode 100644 index 0000000..a97eda9 --- /dev/null +++ b/zip/testdata/create/dup_file.txt @@ -0,0 +1,7 @@ +path=example.com/m +version=v1.0.0 +wantErr=multiple entries for file "dup.go" +-- dup.go -- +package d1 +-- dup.go -- +package d2 diff --git a/zip/testdata/create/dup_file_and_dir.txt b/zip/testdata/create/dup_file_and_dir.txt new file mode 100644 index 0000000..9e73705 --- /dev/null +++ b/zip/testdata/create/dup_file_and_dir.txt @@ -0,0 +1,7 @@ +path=example.com/m +version=v1.0.0 +wantErr=entry "a.go" is both a file and a directory +-- a.go -- +package a +-- a.go/b.go -- +package b diff --git a/zip/testdata/create/empty.txt b/zip/testdata/create/empty.txt new file mode 100644 index 0000000..3e939c1 --- /dev/null +++ b/zip/testdata/create/empty.txt @@ -0,0 +1,3 @@ +path=example.com/empty +version=v1.0.0 +hash=h1:47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU= diff --git a/zip/testdata/create/exclude_cap_go_mod_submodule.txt b/zip/testdata/create/exclude_cap_go_mod_submodule.txt new file mode 100644 index 0000000..954f7be --- /dev/null +++ b/zip/testdata/create/exclude_cap_go_mod_submodule.txt @@ -0,0 +1,9 @@ +path=example.com/m +version=v1.0.0 +hash=h1:xctQQey8/y7IcBjFZDP/onWLSXhlqcsC3i1fgSdpMHk= +-- a.go -- +package a +-- b/GO.MOD -- +MODULE EXAMPLE.COM/M/B +-- b/b.go -- +package b diff --git a/zip/testdata/create/exclude_submodule.txt b/zip/testdata/create/exclude_submodule.txt new file mode 100644 index 0000000..91069da --- /dev/null +++ b/zip/testdata/create/exclude_submodule.txt @@ -0,0 +1,11 @@ +path=example.com/m +version=v1.0.0 +hash=h1:XduFAgX/GaspZa8Jv4pfzoGEzNaU/r88PiCunijw5ok= +-- go.mod -- +module example.com/m + +go 1.13 +-- sub/go.mod -- +module example.com/m/sub +-- sub/x.go' -- +invalid name, but this shouldn't be read diff --git a/zip/testdata/create/exclude_vendor.txt b/zip/testdata/create/exclude_vendor.txt new file mode 100644 index 0000000..79b2c08 --- /dev/null +++ b/zip/testdata/create/exclude_vendor.txt @@ -0,0 +1,14 @@ +path=example.com/m +version=v1.0.0 +hash=h1:5u93LDLN0Me+NGfZtRpA5mHxY8svfykHpq4CMSaBZyc= +-- go.mod -- +module example.com/m + +go 1.13 +-- vendor/modules.txt -- +included +see comment in isVendoredPackage and golang.org/issue/31562. +-- vendor/example.com/x/x.go -- +excluded +-- sub/vendor/sub.txt -- +excluded diff --git a/zip/testdata/create/file_case_conflict.txt b/zip/testdata/create/file_case_conflict.txt new file mode 100644 index 0000000..68623e8 --- /dev/null +++ b/zip/testdata/create/file_case_conflict.txt @@ -0,0 +1,7 @@ +path=example.com/m +version=v1.0.0 +wantErr=case-insensitive file name collision: "m.go" and "M.GO" +-- m.go -- +package m +-- M.GO -- +package m diff --git a/zip/testdata/create/go_mod_dir.txt b/zip/testdata/create/go_mod_dir.txt new file mode 100644 index 0000000..4bce7f6 --- /dev/null +++ b/zip/testdata/create/go_mod_dir.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +hash=h1:Mun5l9cBlDnnV6JasTpio2aZJSbFj++h+814mnKC/OM= +-- go.mod/a.go -- +package a diff --git a/zip/testdata/create/invalid_utf8_mod_path.txt b/zip/testdata/create/invalid_utf8_mod_path.txt new file mode 100644 index 0000000..425fb36 --- /dev/null +++ b/zip/testdata/create/invalid_utf8_mod_path.txt @@ -0,0 +1,3 @@ +path=ÿ # 0xFF +version=v1.0.0 +wantErr=malformed module path "\xff": invalid UTF-8 diff --git a/zip/testdata/create/simple.txt b/zip/testdata/create/simple.txt new file mode 100644 index 0000000..52e83c2 --- /dev/null +++ b/zip/testdata/create/simple.txt @@ -0,0 +1,22 @@ +path=example.com/m +version=v1.0.0 +hash=h1:tpqYOOmuilagXzyqoJ3roUjp8gneQeTv5YVpL6BG7/k= +-- go.mod -- +module example.com/m + +go 1.13 +-- m.go -- +package m + +func Foo() int { return 42 } +-- cmd/hello/hello.go -- +package main + +import ( + "fmt" + "example.com/m" +) + +func main() { + fmt.Println(m.Foo()) +} diff --git a/zip/testdata/create_from_dir/bad_file_path.txt b/zip/testdata/create_from_dir/bad_file_path.txt new file mode 100644 index 0000000..f905e1f --- /dev/null +++ b/zip/testdata/create_from_dir/bad_file_path.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +wantErr=malformed file path "bad.go'": invalid char '\'' +-- bad.go' -- +package bad diff --git a/zip/testdata/create_from_dir/bad_gomod_case.txt b/zip/testdata/create_from_dir/bad_gomod_case.txt new file mode 100644 index 0000000..0a05278 --- /dev/null +++ b/zip/testdata/create_from_dir/bad_gomod_case.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +wantErr=found file named GO.MOD, want all lower-case go.mod +-- GO.MOD -- +module example.com/m diff --git a/zip/testdata/create_from_dir/bad_mod_path.txt b/zip/testdata/create_from_dir/bad_mod_path.txt new file mode 100644 index 0000000..6d2b8a5 --- /dev/null +++ b/zip/testdata/create_from_dir/bad_mod_path.txt @@ -0,0 +1,3 @@ +path=cache +version=v1.0.0 +wantErr=missing dot in first path element diff --git a/zip/testdata/create_from_dir/bad_mod_path_version_suffix.txt b/zip/testdata/create_from_dir/bad_mod_path_version_suffix.txt new file mode 100644 index 0000000..bcc18e4 --- /dev/null +++ b/zip/testdata/create_from_dir/bad_mod_path_version_suffix.txt @@ -0,0 +1,3 @@ +path=example.com/m +version=v2.0.0 +wantErr=invalid version: should be v0 or v1, not v2 diff --git a/zip/testdata/create_from_dir/bad_version.txt b/zip/testdata/create_from_dir/bad_version.txt new file mode 100644 index 0000000..f18ca44 --- /dev/null +++ b/zip/testdata/create_from_dir/bad_version.txt @@ -0,0 +1,3 @@ +path=example.com/m +version=v1.0.0+bad +wantErr=version "v1.0.0+bad" is not canonical (should be "v1.0.0") diff --git a/zip/testdata/create_from_dir/empty.txt b/zip/testdata/create_from_dir/empty.txt new file mode 100644 index 0000000..3e939c1 --- /dev/null +++ b/zip/testdata/create_from_dir/empty.txt @@ -0,0 +1,3 @@ +path=example.com/empty +version=v1.0.0 +hash=h1:47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU= diff --git a/zip/testdata/create_from_dir/exclude_submodule.txt b/zip/testdata/create_from_dir/exclude_submodule.txt new file mode 100644 index 0000000..91069da --- /dev/null +++ b/zip/testdata/create_from_dir/exclude_submodule.txt @@ -0,0 +1,11 @@ +path=example.com/m +version=v1.0.0 +hash=h1:XduFAgX/GaspZa8Jv4pfzoGEzNaU/r88PiCunijw5ok= +-- go.mod -- +module example.com/m + +go 1.13 +-- sub/go.mod -- +module example.com/m/sub +-- sub/x.go' -- +invalid name, but this shouldn't be read diff --git a/zip/testdata/create_from_dir/exclude_vendor.txt b/zip/testdata/create_from_dir/exclude_vendor.txt new file mode 100644 index 0000000..79b2c08 --- /dev/null +++ b/zip/testdata/create_from_dir/exclude_vendor.txt @@ -0,0 +1,14 @@ +path=example.com/m +version=v1.0.0 +hash=h1:5u93LDLN0Me+NGfZtRpA5mHxY8svfykHpq4CMSaBZyc= +-- go.mod -- +module example.com/m + +go 1.13 +-- vendor/modules.txt -- +included +see comment in isVendoredPackage and golang.org/issue/31562. +-- vendor/example.com/x/x.go -- +excluded +-- sub/vendor/sub.txt -- +excluded diff --git a/zip/testdata/create_from_dir/go_mod_dir.txt b/zip/testdata/create_from_dir/go_mod_dir.txt new file mode 100644 index 0000000..4bce7f6 --- /dev/null +++ b/zip/testdata/create_from_dir/go_mod_dir.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +hash=h1:Mun5l9cBlDnnV6JasTpio2aZJSbFj++h+814mnKC/OM= +-- go.mod/a.go -- +package a diff --git a/zip/testdata/create_from_dir/invalid_utf8_mod_path.txt b/zip/testdata/create_from_dir/invalid_utf8_mod_path.txt new file mode 100644 index 0000000..425fb36 --- /dev/null +++ b/zip/testdata/create_from_dir/invalid_utf8_mod_path.txt @@ -0,0 +1,3 @@ +path=ÿ # 0xFF +version=v1.0.0 +wantErr=malformed module path "\xff": invalid UTF-8 diff --git a/zip/testdata/create_from_dir/simple.txt b/zip/testdata/create_from_dir/simple.txt new file mode 100644 index 0000000..52e83c2 --- /dev/null +++ b/zip/testdata/create_from_dir/simple.txt @@ -0,0 +1,22 @@ +path=example.com/m +version=v1.0.0 +hash=h1:tpqYOOmuilagXzyqoJ3roUjp8gneQeTv5YVpL6BG7/k= +-- go.mod -- +module example.com/m + +go 1.13 +-- m.go -- +package m + +func Foo() int { return 42 } +-- cmd/hello/hello.go -- +package main + +import ( + "fmt" + "example.com/m" +) + +func main() { + fmt.Println(m.Foo()) +} diff --git a/zip/testdata/unzip/bad_file_path.txt b/zip/testdata/unzip/bad_file_path.txt new file mode 100644 index 0000000..df1d1be --- /dev/null +++ b/zip/testdata/unzip/bad_file_path.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +wantErr=malformed file path "bad.go'": invalid char '\'' +-- example.com/m@v1.0.0/bad.go' -- +package bad diff --git a/zip/testdata/unzip/bad_gomod_case.txt b/zip/testdata/unzip/bad_gomod_case.txt new file mode 100644 index 0000000..9c40d51 --- /dev/null +++ b/zip/testdata/unzip/bad_gomod_case.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +wantErr=found file named example.com/m@v1.0.0/GO.MOD, want all lower-case go.mod +-- example.com/m@v1.0.0/GO.MOD -- +module example.com/m diff --git a/zip/testdata/unzip/bad_mod_path.txt b/zip/testdata/unzip/bad_mod_path.txt new file mode 100644 index 0000000..6d2b8a5 --- /dev/null +++ b/zip/testdata/unzip/bad_mod_path.txt @@ -0,0 +1,3 @@ +path=cache +version=v1.0.0 +wantErr=missing dot in first path element diff --git a/zip/testdata/unzip/bad_mod_path_version_suffix.txt b/zip/testdata/unzip/bad_mod_path_version_suffix.txt new file mode 100644 index 0000000..bcc18e4 --- /dev/null +++ b/zip/testdata/unzip/bad_mod_path_version_suffix.txt @@ -0,0 +1,3 @@ +path=example.com/m +version=v2.0.0 +wantErr=invalid version: should be v0 or v1, not v2 diff --git a/zip/testdata/unzip/bad_submodule.txt b/zip/testdata/unzip/bad_submodule.txt new file mode 100644 index 0000000..3d35010 --- /dev/null +++ b/zip/testdata/unzip/bad_submodule.txt @@ -0,0 +1,9 @@ +path=example.com/m +version=v1.0.0 +wantErr=found go.mod file not in module root directory +-- example.com/m@v1.0.0/go.mod -- +module example.com/m + +go 1.13 +-- example.com/m@v1.0.0/sub/go.mod -- +module example.com/m/sub diff --git a/zip/testdata/unzip/bad_version.txt b/zip/testdata/unzip/bad_version.txt new file mode 100644 index 0000000..f18ca44 --- /dev/null +++ b/zip/testdata/unzip/bad_version.txt @@ -0,0 +1,3 @@ +path=example.com/m +version=v1.0.0+bad +wantErr=version "v1.0.0+bad" is not canonical (should be "v1.0.0") diff --git a/zip/testdata/unzip/cap_go_mod_not_submodule.txt b/zip/testdata/unzip/cap_go_mod_not_submodule.txt new file mode 100644 index 0000000..a02bee4 --- /dev/null +++ b/zip/testdata/unzip/cap_go_mod_not_submodule.txt @@ -0,0 +1,9 @@ +path=example.com/m +version=v1.0.0 +wantErr=found go.mod file not in module root directory +-- example.com/m@v1.0.0/a.go -- +package a +-- example.com/m@v1.0.0/b/GO.MOD -- +MODULE EXAMPLE.COM/M/B +-- example.com/m@v1.0.0/b/b.go -- +package b diff --git a/zip/testdata/unzip/dup_file.txt b/zip/testdata/unzip/dup_file.txt new file mode 100644 index 0000000..7997e2b --- /dev/null +++ b/zip/testdata/unzip/dup_file.txt @@ -0,0 +1,7 @@ +path=example.com/m +version=v1.0.0 +wantErr=multiple entries for file "dup.go" +-- example.com/m@v1.0.0/dup.go -- +package d1 +-- example.com/m@v1.0.0/dup.go -- +package d2 diff --git a/zip/testdata/unzip/dup_file_and_dir.txt b/zip/testdata/unzip/dup_file_and_dir.txt new file mode 100644 index 0000000..f16556e --- /dev/null +++ b/zip/testdata/unzip/dup_file_and_dir.txt @@ -0,0 +1,7 @@ +path=example.com/m +version=v1.0.0 +wantErr=entry "a.go" is both a file and a directory +-- example.com/m@v1.0.0/a.go -- +package a +-- example.com/m@v1.0.0/a.go/b.go -- +package b diff --git a/zip/testdata/unzip/empty.txt b/zip/testdata/unzip/empty.txt new file mode 100644 index 0000000..3e939c1 --- /dev/null +++ b/zip/testdata/unzip/empty.txt @@ -0,0 +1,3 @@ +path=example.com/empty +version=v1.0.0 +hash=h1:47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU= diff --git a/zip/testdata/unzip/file_case_conflict.txt b/zip/testdata/unzip/file_case_conflict.txt new file mode 100644 index 0000000..44d4c14 --- /dev/null +++ b/zip/testdata/unzip/file_case_conflict.txt @@ -0,0 +1,7 @@ +path=example.com/m +version=v1.0.0 +wantErr=case-insensitive file name collision: "m.go" and "M.GO" +-- example.com/m@v1.0.0/m.go -- +package m +-- example.com/m@v1.0.0/M.GO -- +package m diff --git a/zip/testdata/unzip/go_mod_dir.txt b/zip/testdata/unzip/go_mod_dir.txt new file mode 100644 index 0000000..c0616c5 --- /dev/null +++ b/zip/testdata/unzip/go_mod_dir.txt @@ -0,0 +1,5 @@ +path=example.com/m +version=v1.0.0 +hash=h1:Mun5l9cBlDnnV6JasTpio2aZJSbFj++h+814mnKC/OM= +-- example.com/m@v1.0.0/go.mod/a.go -- +package a diff --git a/zip/testdata/unzip/invalid_utf8_mod_path.txt b/zip/testdata/unzip/invalid_utf8_mod_path.txt new file mode 100644 index 0000000..425fb36 --- /dev/null +++ b/zip/testdata/unzip/invalid_utf8_mod_path.txt @@ -0,0 +1,3 @@ +path=ÿ # 0xFF +version=v1.0.0 +wantErr=malformed module path "\xff": invalid UTF-8 diff --git a/zip/testdata/unzip/prefix_only.txt b/zip/testdata/unzip/prefix_only.txt new file mode 100644 index 0000000..7c9f252 --- /dev/null +++ b/zip/testdata/unzip/prefix_only.txt @@ -0,0 +1,6 @@ +path=example.com/m +version=v1.0.0 +wantErr=unexpected file name example.com/m@v1.0.0 +-- example.com/m@v1.0.0 -- +-- example.com/m@v1.0.0/go.mod -- +module example.com/m diff --git a/zip/testdata/unzip/simple.txt b/zip/testdata/unzip/simple.txt new file mode 100644 index 0000000..c92b394 --- /dev/null +++ b/zip/testdata/unzip/simple.txt @@ -0,0 +1,22 @@ +path=example.com/m +version=v1.0.0 +hash=h1:tpqYOOmuilagXzyqoJ3roUjp8gneQeTv5YVpL6BG7/k= +-- example.com/m@v1.0.0/go.mod -- +module example.com/m + +go 1.13 +-- example.com/m@v1.0.0/m.go -- +package m + +func Foo() int { return 42 } +-- example.com/m@v1.0.0/cmd/hello/hello.go -- +package main + +import ( + "fmt" + "example.com/m" +) + +func main() { + fmt.Println(m.Foo()) +} diff --git a/zip/zip.go b/zip/zip.go new file mode 100644 index 0000000..9d45016 --- /dev/null +++ b/zip/zip.go @@ -0,0 +1,560 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package zip provides functions for creating and extracting module zip files. +// +// Module zip files have several restrictions listed below. These are necessary +// to ensure that module zip files can be extracted consistently on supported +// platforms and file systems. +// +// - All file paths within a zip file must start with "@/", +// where "" is the module path and "" is the version. +// The module path must be valid (see golang.org/x/mod/module.CheckPath). +// The version must be valid and canonical (see +// golang.org/x/mod/module.CanonicalVersion). The path must have a major +// version suffix consistent with the version (see +// golang.org/x/mod/module.Check). The part of the file path after the +// "@/" prefix must be valid (see +// golang.org/x/mod/module.CheckFilePath). +// +// - No two file paths may be equal under Unicode case-folding (see +// strings.EqualFold). +// +// - A go.mod file may or may not appear in the top-level directory. If present, +// it must be named "go.mod", not any other case. Files named "go.mod" +// are not allowed in any other directory. +// +// - The total size in bytes of a module zip file may be at most MaxZipFile +/// bytes (500 MiB). The total uncompressed size of the files within the +// zip may also be at most MaxZipFile bytes. +// +// - Each file's uncompressed size must match its declared 64-bit uncompressed +// size in the zip file header. +// +// - If the zip contains files named "@/go.mod" or +// "@/LICENSE", their sizes in bytes may be at most +// MaxGoMod or MaxLICENSE, respectively (both are 16 MiB). +// +// - Empty directories are ignored. File permissions and timestamps are also +// ignored. +// +// - Symbolic links and other irregular files are not allowed. +// +// Note that this package does not provide hashing functionality. See +// golang.org/x/mod/sumdb/dirhash. +package zip + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "io/ioutil" + "os" + "path" + "path/filepath" + "strings" + "unicode" + "unicode/utf8" + + "golang.org/x/mod/module" +) + +const ( + // MaxZipFile is the maximum size in bytes of a module zip file. The + // go command will report an error if either the zip file or its extracted + // content is larger than this. + MaxZipFile = 500 << 20 + + // MaxGoMod is the maximum size in bytes of a go.mod file within a + // module zip file. + MaxGoMod = 16 << 20 + + // MaxLICENSE is the maximum size in bytes of a LICENSE file within a + // module zip file. + MaxLICENSE = 16 << 20 +) + +// File provides an abstraction for a file in a directory, zip, or anything +// else that looks like a file. +type File interface { + // Path returns a clean slash-separated relative path from the module root + // directory to the file. + Path() string + + // Lstat returns information about the file. If the file is a symbolic link, + // Lstat returns information about the link itself, not the file it points to. + Lstat() (os.FileInfo, error) + + // Open provides access to the data within a regular file. Open may return + // an error if called on a directory or symbolic link. + Open() (io.ReadCloser, error) +} + +// Create builds a zip archive for module m from an abstract list of files +// and writes it to w. +// +// Create verifies the restrictions described in the package documentation +// and should not produce an archive that Unzip cannot extract. Create does not +// include files in the output archive if they don't belong in the module zip. +// In particular, Create will not include files in mdoules found in +// subdirectories, most files in vendor directories, or irregular files (such +// as symbolic links) in the output archive. +func Create(w io.Writer, m module.Version, files []File) (err error) { + defer func() { + if err != nil { + err = &zipError{verb: "create zip", err: err} + } + }() + + // Check that the version is canonical, the module path is well-formed, and + // the major version suffix matches the major version. + if vers := module.CanonicalVersion(m.Version); vers != m.Version { + return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers) + } + if err := module.Check(m.Path, m.Version); err != nil { + return err + } + + // Find directories containing go.mod files (other than the root). + // These directories will not be included in the output zip. + haveGoMod := make(map[string]bool) + for _, f := range files { + dir, base := path.Split(f.Path()) + if strings.EqualFold(base, "go.mod") { + info, err := f.Lstat() + if err != nil { + return err + } + if info.Mode().IsRegular() { + haveGoMod[dir] = true + } + } + } + + inSubmodule := func(p string) bool { + for { + dir, _ := path.Split(p) + if dir == "" { + return false + } + if haveGoMod[dir] { + return true + } + p = dir[:len(dir)-1] + } + } + + // Create the module zip file. + zw := zip.NewWriter(w) + prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version) + + addFile := func(f File, path string, size int64) error { + rc, err := f.Open() + if err != nil { + return err + } + defer rc.Close() + w, err := zw.Create(prefix + path) + if err != nil { + return err + } + lr := &io.LimitedReader{R: rc, N: size + 1} + if _, err := io.Copy(w, lr); err != nil { + return err + } + if lr.N <= 0 { + return fmt.Errorf("file %q is larger than declared size", path) + } + return nil + } + + collisions := make(collisionChecker) + maxSize := int64(MaxZipFile) + for _, f := range files { + p := f.Path() + if p != path.Clean(p) { + return fmt.Errorf("file path %s is not clean", p) + } + if path.IsAbs(p) { + return fmt.Errorf("file path %s is not relative", p) + } + if isVendoredPackage(p) || inSubmodule(p) { + continue + } + if p == ".hg_archival.txt" { + // Inserted by hg archive. + // The go command drops this regardless of the VCS being used. + continue + } + if err := module.CheckFilePath(p); err != nil { + return err + } + if strings.ToLower(p) == "go.mod" && p != "go.mod" { + return fmt.Errorf("found file named %s, want all lower-case go.mod", p) + } + info, err := f.Lstat() + if err != nil { + return err + } + if err := collisions.check(p, info.IsDir()); err != nil { + return err + } + if !info.Mode().IsRegular() { + // Skip symbolic links (golang.org/issue/27093). + continue + } + size := info.Size() + if size < 0 || maxSize < size { + return fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile) + } + maxSize -= size + if p == "go.mod" && size > MaxGoMod { + return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod) + } + if p == "LICENSE" && size > MaxLICENSE { + return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE) + } + + if err := addFile(f, p, size); err != nil { + return err + } + } + + return zw.Close() +} + +// CreateFromDir creates a module zip file for module m from the contents of +// a directory, dir. The zip content is written to w. +// +// CreateFromDir verifies the restrictions described in the package +// documentation and should not produce an archive that Unzip cannot extract. +// CreateFromDir does not include files in the output archive if they don't +// belong in the module zip. In particular, CreateFromDir will not include +// files in modules found in subdirectories, most files in vendor directories, +// or irregular files (such as symbolic links) in the output archive. +func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) { + defer func() { + if zerr, ok := err.(*zipError); ok { + zerr.path = dir + } else if err != nil { + err = &zipError{verb: "create zip", path: dir, err: err} + } + }() + + var files []File + err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error { + relPath, err := filepath.Rel(dir, filePath) + if err != nil { + return err + } + slashPath := filepath.ToSlash(relPath) + + if info.IsDir() { + if filePath == dir { + // Don't skip the top-level directory. + return nil + } + + // Skip some subdirectories inside vendor, but maintain bug + // golang.org/issue/31562, described in isVendoredPackage. + // We would like Create and CreateFromDir to produce the same result + // for a set of files, whether expressed as a directory tree or zip. + if isVendoredPackage(slashPath) { + return filepath.SkipDir + } + + // Skip submodules (directories containing go.mod files). + if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() { + return filepath.SkipDir + } + return nil + } + + if info.Mode().IsRegular() { + if !isVendoredPackage(slashPath) { + files = append(files, dirFile{ + filePath: filePath, + slashPath: slashPath, + info: info, + }) + } + return nil + } + + // Not a regular file or a directory. Probably a symbolic link. + // Irregular files are ignored, so skip it. + return nil + }) + if err != nil { + return err + } + + return Create(w, m, files) +} + +type dirFile struct { + filePath, slashPath string + info os.FileInfo +} + +func (f dirFile) Path() string { return f.slashPath } +func (f dirFile) Lstat() (os.FileInfo, error) { return f.info, nil } +func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) } + +func isVendoredPackage(name string) bool { + var i int + if strings.HasPrefix(name, "vendor/") { + i += len("vendor/") + } else if j := strings.Index(name, "/vendor/"); j >= 0 { + // This offset looks incorrect; this should probably be + // + // i = j + len("/vendor/") + // + // (See https://golang.org/issue/31562.) + // + // Unfortunately, we can't fix it without invalidating checksums. + // Fortunately, the error appears to be strictly conservative: we'll retain + // vendored packages that we should have pruned, but we won't prune + // non-vendored packages that we should have retained. + // + // Since this defect doesn't seem to break anything, it's not worth fixing + // for now. + i += len("/vendor/") + } else { + return false + } + return strings.Contains(name[i:], "/") +} + +// Unzip extracts the contents of a module zip file to a directory. +// +// Unzip checks all restrictions listed in the package documentation and returns +// an error if the zip archive is not valid. In some cases, files may be written +// to dir before an error is returned (for example, if a file's uncompressed +// size does not match its declared size). +// +// dir may or may not exist: Unzip will create it and any missing parent +// directories if it doesn't exist. If dir exists, it must be empty. +func Unzip(dir string, m module.Version, zipFile string) (err error) { + defer func() { + if err != nil { + err = &zipError{verb: "unzip", path: zipFile, err: err} + } + }() + + if vers := module.CanonicalVersion(m.Version); vers != m.Version { + return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers) + } + if err := module.Check(m.Path, m.Version); err != nil { + return err + } + + // Check that the directory is empty. Don't create it yet in case there's + // an error reading the zip. + files, _ := ioutil.ReadDir(dir) + if len(files) > 0 { + return fmt.Errorf("target directory %v exists and is not empty", dir) + } + + // Open the zip file and ensure it's under the size limit. + f, err := os.Open(zipFile) + if err != nil { + return err + } + defer f.Close() + info, err := f.Stat() + if err != nil { + return err + } + zipSize := info.Size() + if zipSize > MaxZipFile { + return fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile) + } + + z, err := zip.NewReader(f, zipSize) + if err != nil { + return err + } + + // Check total size, valid file names. + collisions := make(collisionChecker) + prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version) + var size int64 + for _, zf := range z.File { + if !strings.HasPrefix(zf.Name, prefix) { + return fmt.Errorf("unexpected file name %s", zf.Name) + } + name := zf.Name[len(prefix):] + if name == "" { + continue + } + isDir := strings.HasSuffix(name, "/") + if isDir { + name = name[:len(name)-1] + } + if path.Clean(name) != name { + return fmt.Errorf("invalid file name %s", zf.Name) + } + if err := module.CheckFilePath(name); err != nil { + return err + } + if err := collisions.check(name, isDir); err != nil { + return err + } + if isDir { + continue + } + if base := path.Base(name); strings.EqualFold(base, "go.mod") { + if base != name { + return fmt.Errorf("found go.mod file not in module root directory (%s)", zf.Name) + } else if name != "go.mod" { + return fmt.Errorf("found file named %s, want all lower-case go.mod", zf.Name) + } + } + s := int64(zf.UncompressedSize64) + if s < 0 || MaxZipFile-size < s { + return fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile) + } + size += s + if name == "go.mod" && s > MaxGoMod { + return fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod) + } + if name == "LICENSE" && s > MaxLICENSE { + return fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE) + } + } + + // Unzip, enforcing sizes checked earlier. + if err := os.MkdirAll(dir, 0777); err != nil { + return err + } + for _, zf := range z.File { + name := zf.Name[len(prefix):] + if name == "" || strings.HasSuffix(name, "/") { + continue + } + dst := filepath.Join(dir, name) + if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil { + return err + } + w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444) + if err != nil { + return err + } + r, err := zf.Open() + if err != nil { + w.Close() + return err + } + lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1} + _, err = io.Copy(w, lr) + r.Close() + if err != nil { + w.Close() + return err + } + if err := w.Close(); err != nil { + return err + } + if lr.N <= 0 { + return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64) + } + } + + return nil +} + +// collisionChecker finds case-insensitive name collisions and paths that +// are listed as both files and directories. +// +// The keys of this map are processed with strToFold. pathInfo has the original +// path for each folded path. +type collisionChecker map[string]pathInfo + +type pathInfo struct { + path string + isDir bool +} + +func (cc collisionChecker) check(p string, isDir bool) error { + fold := strToFold(p) + if other, ok := cc[fold]; ok { + if p != other.path { + return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p) + } + if isDir != other.isDir { + return fmt.Errorf("entry %q is both a file and a directory", p) + } + if !isDir { + return fmt.Errorf("multiple entries for file %q", p) + } + // It's not an error if check is called with the same directory multiple + // times. check is called recursively on parent directories, so check + // may be called on the same directory many times. + } else { + cc[fold] = pathInfo{path: p, isDir: isDir} + } + + if parent := path.Dir(p); parent != "." { + return cc.check(parent, true) + } + return nil +} + +type zipError struct { + verb, path string + err error +} + +func (e *zipError) Error() string { + if e.path == "" { + return fmt.Sprintf("%s: %v", e.verb, e.err) + } else { + return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err) + } +} + +func (e *zipError) Unwrap() error { + return e.err +} + +// strToFold returns a string with the property that +// strings.EqualFold(s, t) iff strToFold(s) == strToFold(t) +// This lets us test a large set of strings for fold-equivalent +// duplicates without making a quadratic number of calls +// to EqualFold. Note that strings.ToUpper and strings.ToLower +// do not have the desired property in some corner cases. +func strToFold(s string) string { + // Fast path: all ASCII, no upper case. + // Most paths look like this already. + for i := 0; i < len(s); i++ { + c := s[i] + if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' { + goto Slow + } + } + return s + +Slow: + var buf bytes.Buffer + for _, r := range s { + // SimpleFold(x) cycles to the next equivalent rune > x + // or wraps around to smaller values. Iterate until it wraps, + // and we've found the minimum value. + for { + r0 := r + r = unicode.SimpleFold(r0) + if r <= r0 { + break + } + } + // Exception to allow fast path above: A-Z => a-z + if 'A' <= r && r <= 'Z' { + r += 'a' - 'A' + } + buf.WriteRune(r) + } + return buf.String() +} diff --git a/zip/zip_test.go b/zip/zip_test.go new file mode 100644 index 0000000..ec5100f --- /dev/null +++ b/zip/zip_test.go @@ -0,0 +1,1175 @@ +// Copyright 2019 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package zip_test + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "io/ioutil" + "os" + "os/exec" + "path" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "golang.org/x/mod/internal/txtar" + "golang.org/x/mod/module" + "golang.org/x/mod/sumdb/dirhash" + modzip "golang.org/x/mod/zip" +) + +const emptyHash = "h1:47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=" + +type testParams struct { + path, version, wantErr, hash string + archive *txtar.Archive +} + +// readTest loads a test from a txtar file. The comment section of the file +// should contain lines with key=value pairs. Valid keys are the field names +// from testParams. +func readTest(file string) (testParams, error) { + var test testParams + var err error + test.archive, err = txtar.ParseFile(file) + if err != nil { + return testParams{}, err + } + + lines := strings.Split(string(test.archive.Comment), "\n") + for n, line := range lines { + n++ // report line numbers starting with 1 + if i := strings.IndexByte(line, '#'); i >= 0 { + line = line[:i] + } + line = strings.TrimSpace(line) + if line == "" { + continue + } + eq := strings.IndexByte(line, '=') + if eq < 0 { + return testParams{}, fmt.Errorf("%s:%d: missing = separator", file, n) + } + key, value := strings.TrimSpace(line[:eq]), strings.TrimSpace(line[eq+1:]) + switch key { + case "path": + test.path = value + case "version": + test.version = value + case "wantErr": + test.wantErr = value + case "hash": + test.hash = value + default: + return testParams{}, fmt.Errorf("%s:%d: unknown key %q", file, n, key) + } + } + + return test, nil +} + +type fakeFile struct { + name string + size uint64 + data []byte // if nil, Open will access a sequence of 0-bytes +} + +func (f fakeFile) Path() string { return f.name } +func (f fakeFile) Lstat() (os.FileInfo, error) { return fakeFileInfo{f}, nil } +func (f fakeFile) Open() (io.ReadCloser, error) { + if f.data != nil { + return ioutil.NopCloser(bytes.NewReader(f.data)), nil + } + if f.size < 0 || f.size >= uint64(modzip.MaxZipFile<<1) { + return nil, fmt.Errorf("cannot open fakeFile of size %d", f.size) + } + return ioutil.NopCloser(io.LimitReader(zeroReader{}, int64(f.size))), nil +} + +type fakeFileInfo struct { + f fakeFile +} + +func (fi fakeFileInfo) Name() string { return path.Base(fi.f.name) } +func (fi fakeFileInfo) Size() int64 { return int64(fi.f.size) } +func (fi fakeFileInfo) Mode() os.FileMode { return 0644 } +func (fi fakeFileInfo) ModTime() time.Time { return time.Time{} } +func (fi fakeFileInfo) IsDir() bool { return false } +func (fi fakeFileInfo) Sys() interface{} { return nil } + +type zeroReader struct{} + +func (r zeroReader) Read(b []byte) (int, error) { + for i := range b { + b[i] = 0 + } + return len(b), nil +} + +func TestCreate(t *testing.T) { + testDir := filepath.FromSlash("testdata/create") + testInfos, err := ioutil.ReadDir(testDir) + if err != nil { + t.Fatal(err) + } + for _, testInfo := range testInfos { + testInfo := testInfo + base := filepath.Base(testInfo.Name()) + if filepath.Ext(base) != ".txt" { + continue + } + t.Run(base[:len(base)-len(".txt")], func(t *testing.T) { + t.Parallel() + + // Load the test. + testPath := filepath.Join(testDir, testInfo.Name()) + test, err := readTest(testPath) + if err != nil { + t.Fatal(err) + } + + // Write zip to temporary file. + tmpZip, err := ioutil.TempFile("", "TestCreate-*.zip") + if err != nil { + t.Fatal(err) + } + tmpZipPath := tmpZip.Name() + defer func() { + tmpZip.Close() + os.Remove(tmpZipPath) + }() + m := module.Version{Path: test.path, Version: test.version} + files := make([]modzip.File, len(test.archive.Files)) + for i, tf := range test.archive.Files { + files[i] = fakeFile{ + name: tf.Name, + size: uint64(len(tf.Data)), + data: tf.Data, + } + } + if err := modzip.Create(tmpZip, m, files); err != nil { + if test.wantErr == "" { + t.Fatalf("unexpected error: %v", err) + } else if !strings.Contains(err.Error(), test.wantErr) { + t.Fatalf("got error %q; want error containing %q", err.Error(), test.wantErr) + } else { + return + } + } else if test.wantErr != "" { + t.Fatalf("unexpected success; wanted error containing %q", test.wantErr) + } + if err := tmpZip.Close(); err != nil { + t.Fatal(err) + } + + // Hash zip file, compare with known value. + if hash, err := dirhash.HashZip(tmpZipPath, dirhash.Hash1); err != nil { + t.Fatal(err) + } else if hash != test.hash { + t.Fatalf("got hash: %q\nwant: %q", hash, test.hash) + } + }) + } +} + +func TestCreateFromDir(t *testing.T) { + testDir := filepath.FromSlash("testdata/create_from_dir") + testInfos, err := ioutil.ReadDir(testDir) + if err != nil { + t.Fatal(err) + } + for _, testInfo := range testInfos { + testInfo := testInfo + base := filepath.Base(testInfo.Name()) + if filepath.Ext(base) != ".txt" { + continue + } + t.Run(base[:len(base)-len(".txt")], func(t *testing.T) { + t.Parallel() + + // Load the test. + testPath := filepath.Join(testDir, testInfo.Name()) + test, err := readTest(testPath) + if err != nil { + t.Fatal(err) + } + + // Write files to a temporary directory. + tmpDir, err := ioutil.TempDir("", "TestCreateFromDir") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + for _, f := range test.archive.Files { + filePath := filepath.Join(tmpDir, f.Name) + if err := os.MkdirAll(filepath.Dir(filePath), 0777); err != nil { + t.Fatal(err) + } + if err := ioutil.WriteFile(filePath, f.Data, 0666); err != nil { + t.Fatal(err) + } + } + + // Create zip from the directory. + tmpZip, err := ioutil.TempFile("", "TestCreateFromDir-*.zip") + if err != nil { + t.Fatal(err) + } + tmpZipPath := tmpZip.Name() + defer func() { + tmpZip.Close() + os.Remove(tmpZipPath) + }() + m := module.Version{Path: test.path, Version: test.version} + if err := modzip.CreateFromDir(tmpZip, m, tmpDir); err != nil { + if test.wantErr == "" { + t.Fatalf("unexpected error: %v", err) + } else if !strings.Contains(err.Error(), test.wantErr) { + t.Fatalf("got error %q; want error containing %q", err, test.wantErr) + } else { + return + } + } else if test.wantErr != "" { + t.Fatalf("unexpected success; want error containing %q", test.wantErr) + } + + // Hash zip file, compare with known value. + if hash, err := dirhash.HashZip(tmpZipPath, dirhash.Hash1); err != nil { + t.Fatal(err) + } else if hash != test.hash { + t.Fatalf("got hash: %q\nwant: %q", hash, test.hash) + } + }) + } +} + +func TestCreateFromDirSpecial(t *testing.T) { + for _, test := range []struct { + desc string + setup func(t *testing.T, tmpDir string) string + wantHash string + }{ + { + desc: "ignore_empty_dir", + setup: func(t *testing.T, tmpDir string) string { + if err := os.Mkdir(filepath.Join(tmpDir, "empty"), 0777); err != nil { + t.Fatal(err) + } + return tmpDir + }, + wantHash: emptyHash, + }, { + desc: "ignore_symlink", + setup: func(t *testing.T, tmpDir string) string { + if err := os.Symlink(tmpDir, filepath.Join(tmpDir, "link")); err != nil { + if runtime.GOOS == "windows" { + t.Skipf("could not create symlink: %v", err) + } else { + t.Fatal(err) + } + } + return tmpDir + }, + wantHash: emptyHash, + }, { + desc: "dir_is_vendor", + setup: func(t *testing.T, tmpDir string) string { + vendorDir := filepath.Join(tmpDir, "vendor") + if err := os.Mkdir(vendorDir, 0777); err != nil { + t.Fatal(err) + } + goModData := []byte("module example.com/m\n\ngo 1.13\n") + if err := ioutil.WriteFile(filepath.Join(vendorDir, "go.mod"), goModData, 0666); err != nil { + t.Fatal(err) + } + return vendorDir + }, + wantHash: "h1:XduFAgX/GaspZa8Jv4pfzoGEzNaU/r88PiCunijw5ok=", + }, + } { + t.Run(test.desc, func(t *testing.T) { + tmpDir, err := ioutil.TempDir("", "TestCreateFromDirSpecial-"+test.desc) + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + dir := test.setup(t, tmpDir) + + tmpZipFile, err := ioutil.TempFile("", "TestCreateFromDir-*.zip") + if err != nil { + t.Fatal(err) + } + tmpZipPath := tmpZipFile.Name() + defer func() { + tmpZipFile.Close() + os.Remove(tmpZipPath) + }() + + m := module.Version{Path: "example.com/m", Version: "v1.0.0"} + if err := modzip.CreateFromDir(tmpZipFile, m, dir); err != nil { + t.Fatal(err) + } + if err := tmpZipFile.Close(); err != nil { + t.Fatal(err) + } + + if hash, err := dirhash.HashZip(tmpZipPath, dirhash.Hash1); err != nil { + t.Fatal(err) + } else if hash != test.wantHash { + t.Fatalf("got hash %q; want %q", hash, emptyHash) + } + }) + } +} + +func TestUnzip(t *testing.T) { + testDir := filepath.FromSlash("testdata/unzip") + testInfos, err := ioutil.ReadDir(testDir) + if err != nil { + t.Fatal(err) + } + for _, testInfo := range testInfos { + base := filepath.Base(testInfo.Name()) + if filepath.Ext(base) != ".txt" { + continue + } + t.Run(base[:len(base)-len(".txt")], func(t *testing.T) { + // Load the test. + testPath := filepath.Join(testDir, testInfo.Name()) + test, err := readTest(testPath) + if err != nil { + t.Fatal(err) + } + + // Convert txtar to temporary zip file. + tmpZipFile, err := ioutil.TempFile("", "TestUnzip-*.zip") + if err != nil { + t.Fatal(err) + } + tmpZipPath := tmpZipFile.Name() + defer func() { + tmpZipFile.Close() + os.Remove(tmpZipPath) + }() + zw := zip.NewWriter(tmpZipFile) + for _, f := range test.archive.Files { + zf, err := zw.Create(f.Name) + if err != nil { + t.Fatal(err) + } + if _, err := zf.Write(f.Data); err != nil { + t.Fatal(err) + } + } + if err := zw.Close(); err != nil { + t.Fatal(err) + } + if err := tmpZipFile.Close(); err != nil { + t.Fatal(err) + } + + // Extract to a temporary directory. + tmpDir, err := ioutil.TempDir("", "TestUnzip") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + m := module.Version{Path: test.path, Version: test.version} + if err := modzip.Unzip(tmpDir, m, tmpZipPath); err != nil { + if test.wantErr == "" { + t.Fatalf("unexpected error: %v", err) + } else if !strings.Contains(err.Error(), test.wantErr) { + t.Fatalf("got error %q; want error containing %q", err.Error(), test.wantErr) + } else { + return + } + } else if test.wantErr != "" { + t.Fatalf("unexpected success; wanted error containing %q", test.wantErr) + } + + // Hash the directory, compare to known value. + prefix := fmt.Sprintf("%s@%s/", test.path, test.version) + if hash, err := dirhash.HashDir(tmpDir, prefix, dirhash.Hash1); err != nil { + t.Fatal(err) + } else if hash != test.hash { + t.Fatalf("got hash %q\nwant: %q", hash, test.hash) + } + }) + } +} + +type sizeLimitTest struct { + desc string + files []modzip.File + wantErr, wantCreateErr, wantUnzipErr string +} + +// sizeLimitTests is shared by TestCreateSizeLimits and TestUnzipSizeLimits. +var sizeLimitTests = [...]sizeLimitTest{ + { + desc: "one_large", + files: []modzip.File{fakeFile{ + name: "large.go", + size: modzip.MaxZipFile, + }}, + }, { + desc: "one_too_large", + files: []modzip.File{fakeFile{ + name: "large.go", + size: modzip.MaxZipFile + 1, + }}, + wantCreateErr: "module source tree too large", + wantUnzipErr: "total uncompressed size of module contents too large", + }, { + desc: "total_large", + files: []modzip.File{ + fakeFile{ + name: "small.go", + size: 10, + }, + fakeFile{ + name: "large.go", + size: modzip.MaxZipFile - 10, + }, + }, + }, { + desc: "total_too_large", + files: []modzip.File{ + fakeFile{ + name: "small.go", + size: 10, + }, + fakeFile{ + name: "large.go", + size: modzip.MaxZipFile - 9, + }, + }, + wantCreateErr: "module source tree too large", + wantUnzipErr: "total uncompressed size of module contents too large", + }, { + desc: "large_gomod", + files: []modzip.File{fakeFile{ + name: "go.mod", + size: modzip.MaxGoMod, + }}, + }, { + desc: "too_large_gomod", + files: []modzip.File{fakeFile{ + name: "go.mod", + size: modzip.MaxGoMod + 1, + }}, + wantErr: "go.mod file too large", + }, { + desc: "large_license", + files: []modzip.File{fakeFile{ + name: "LICENSE", + size: modzip.MaxLICENSE, + }}, + }, { + desc: "too_large_license", + files: []modzip.File{fakeFile{ + name: "LICENSE", + size: modzip.MaxLICENSE + 1, + }}, + wantErr: "LICENSE file too large", + }, +} + +var sizeLimitVersion = module.Version{Path: "example.com/large", Version: "v1.0.0"} + +func TestCreateSizeLimits(t *testing.T) { + if testing.Short() { + t.Skip("creating large files takes time") + } + tests := append(sizeLimitTests[:], sizeLimitTest{ + // negative file size may happen when size is represented as uint64 + // but is cast to int64, as is the case in zip files. + desc: "negative", + files: []modzip.File{fakeFile{ + name: "neg.go", + size: 0x8000000000000000, + }}, + wantErr: "module source tree too large", + }, sizeLimitTest{ + desc: "size_is_a_lie", + files: []modzip.File{fakeFile{ + name: "lie.go", + size: 1, + data: []byte(`package large`), + }}, + wantErr: "larger than declared size", + }) + + for _, test := range tests { + test := test + t.Run(test.desc, func(t *testing.T) { + t.Parallel() + wantErr := test.wantCreateErr + if wantErr == "" { + wantErr = test.wantErr + } + if err := modzip.Create(ioutil.Discard, sizeLimitVersion, test.files); err == nil && wantErr != "" { + t.Fatalf("unexpected success; want error containing %q", wantErr) + } else if err != nil && wantErr == "" { + t.Fatalf("got error %q; want success", err) + } else if err != nil && !strings.Contains(err.Error(), wantErr) { + t.Fatalf("got error %q; want error containing %q", err, wantErr) + } + }) + } +} + +func TestUnzipSizeLimits(t *testing.T) { + if testing.Short() { + t.Skip("creating large files takes time") + } + for _, test := range sizeLimitTests { + test := test + t.Run(test.desc, func(t *testing.T) { + t.Parallel() + tmpZipFile, err := ioutil.TempFile("", "TestUnzipSizeLimits-*.zip") + if err != nil { + t.Fatal(err) + } + tmpZipPath := tmpZipFile.Name() + defer func() { + tmpZipFile.Close() + os.Remove(tmpZipPath) + }() + + zw := zip.NewWriter(tmpZipFile) + prefix := fmt.Sprintf("%s@%s/", sizeLimitVersion.Path, sizeLimitVersion.Version) + for _, tf := range test.files { + zf, err := zw.Create(prefix + tf.Path()) + if err != nil { + t.Fatal(err) + } + rc, err := tf.Open() + if err != nil { + t.Fatal(err) + } + _, err = io.Copy(zf, rc) + rc.Close() + if err != nil { + t.Fatal(err) + } + } + if err := zw.Close(); err != nil { + t.Fatal(err) + } + if err := tmpZipFile.Close(); err != nil { + t.Fatal(err) + } + + tmpDir, err := ioutil.TempDir("", "TestUnzipSizeLimits") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + wantErr := test.wantUnzipErr + if wantErr == "" { + wantErr = test.wantErr + } + if err := modzip.Unzip(tmpDir, sizeLimitVersion, tmpZipPath); err == nil && wantErr != "" { + t.Fatalf("unexpected success; want error containing %q", wantErr) + } else if err != nil && wantErr == "" { + t.Fatalf("got error %q; want success", err) + } else if err != nil && !strings.Contains(err.Error(), wantErr) { + t.Fatalf("got error %q; want error containing %q", err, wantErr) + } + }) + } +} + +func TestUnzipSizeLimitsSpecial(t *testing.T) { + if testing.Short() { + t.Skip("skipping test; creating large files takes time") + } + + for _, test := range []struct { + desc, wantErr string + m module.Version + writeZip func(t *testing.T, zipFile *os.File) + }{ + { + desc: "large_zip", + m: module.Version{Path: "example.com/m", Version: "v1.0.0"}, + writeZip: func(t *testing.T, zipFile *os.File) { + if err := zipFile.Truncate(modzip.MaxZipFile); err != nil { + t.Fatal(err) + } + }, + // this is not an error we care about; we're just testing whether + // Unzip checks the size of the file before opening. + // It's harder to create a valid zip file of exactly the right size. + wantErr: "not a valid zip file", + }, { + desc: "too_large_zip", + m: module.Version{Path: "example.com/m", Version: "v1.0.0"}, + writeZip: func(t *testing.T, zipFile *os.File) { + if err := zipFile.Truncate(modzip.MaxZipFile + 1); err != nil { + t.Fatal(err) + } + }, + wantErr: "module zip file is too large", + }, { + desc: "size_is_a_lie", + m: module.Version{Path: "example.com/m", Version: "v1.0.0"}, + writeZip: func(t *testing.T, zipFile *os.File) { + // Create a normal zip file in memory containing one file full of zero + // bytes. Use a distinctive size so we can find it later. + zipBuf := &bytes.Buffer{} + zw := zip.NewWriter(zipBuf) + f, err := zw.Create("example.com/m@v1.0.0/go.mod") + if err != nil { + t.Fatal(err) + } + realSize := 0x0BAD + buf := make([]byte, realSize) + if _, err := f.Write(buf); err != nil { + t.Fatal(err) + } + if err := zw.Close(); err != nil { + t.Fatal(err) + } + + // Replace the uncompressed size of the file. As a shortcut, we just + // search-and-replace the byte sequence. It should occur twice because + // the 32- and 64-byte sizes are stored separately. All multi-byte + // values are little-endian. + zipData := zipBuf.Bytes() + realSizeData := []byte{0xAD, 0x0B} + fakeSizeData := []byte{0xAC, 0x00} + s := zipData + n := 0 + for { + if i := bytes.Index(s, realSizeData); i < 0 { + break + } else { + s = s[i:] + } + copy(s[:len(fakeSizeData)], fakeSizeData) + n++ + } + if n != 2 { + t.Fatalf("replaced size %d times; expected 2", n) + } + + // Write the modified zip to the actual file. + if _, err := zipFile.Write(zipData); err != nil { + t.Fatal(err) + } + }, + wantErr: "uncompressed size of file example.com/m@v1.0.0/go.mod is larger than declared size", + }, + } { + test := test + t.Run(test.desc, func(t *testing.T) { + t.Parallel() + tmpZipFile, err := ioutil.TempFile("", "TestUnzipSizeLimitsSpecial-*.zip") + if err != nil { + t.Fatal(err) + } + tmpZipPath := tmpZipFile.Name() + defer func() { + tmpZipFile.Close() + os.Remove(tmpZipPath) + }() + + test.writeZip(t, tmpZipFile) + if err := tmpZipFile.Close(); err != nil { + t.Fatal(err) + } + + tmpDir, err := ioutil.TempDir("", "TestUnzipSizeLimitsSpecial") + if err != nil { + t.Fatal(err) + } + defer os.RemoveAll(tmpDir) + if err := modzip.Unzip(tmpDir, test.m, tmpZipPath); err == nil && test.wantErr != "" { + t.Fatalf("unexpected success; want error containing %q", test.wantErr) + } else if err != nil && test.wantErr == "" { + t.Fatalf("got error %q; want success", err) + } else if err != nil && !strings.Contains(err.Error(), test.wantErr) { + t.Fatalf("got error %q; want error containing %q", err, test.wantErr) + } + }) + } +} + +// TestVCS clones a repository, creates a zip for a known version, +// and verifies the zip file itself has the same SHA-256 hash as the one +// 'go mod download' produces. +// +// This test is intended to build confidence that this implementation produces +// the same output as the go command, given the same VCS zip input. This is +// not intended to be a complete conformance test. The code that produces zip +// archives from VCS repos is based on the go command, but it's for testing +// only, and we don't export it. +// +// Note that we test the hash of the zip file itself. This is stricter than +// testing the hash of the content, which is what we've promised users. +// It's okay if the zip hash changes without changing the content hash, but +// we should not let that happen accidentally. +func TestVCS(t *testing.T) { + if testing.Short() { + t.Skip() + } + + haveVCS := make(map[string]bool) + for _, vcs := range []string{"git", "hg"} { + _, err := exec.LookPath(vcs) + haveVCS[vcs] = err == nil + } + + for _, test := range []struct { + m module.Version + vcs, url, subdir, rev string + wantContentHash, wantZipHash string + }{ + // Simple tests: all versions of rsc.io/quote + newer major versions + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.0.0"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.0.0", + wantContentHash: "h1:haUSojyo3j2M9g7CEUFG8Na09dtn7QKxvPGaPVQdGwM=", + wantZipHash: "5c08ba2c09a364f93704aaa780e7504346102c6ef4fe1333a11f09904a732078", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.1.0"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.1.0", + wantContentHash: "h1:n/ElL9GOlVEwL0mVjzaYj0UxTI/TX9aQ7lR5LHqP/Rw=", + wantZipHash: "730a5ae6e5c4e216e4f84bb93aa9785a85630ad73f96954ebb5f9daa123dcaa9", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.2.0"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.2.0", + wantContentHash: "h1:fFMCNi0A97hfNrtUZVQKETbuc3h7bmfFQHnjutpPYCg=", + wantZipHash: "fe1bd62652e9737a30d6b7fd396ea13e54ad13fb05f295669eb63d6d33290b06", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.2.1"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.2.1", + wantContentHash: "h1:l+HtgC05eds8qgXNApuv6g1oK1q3B144BM5li1akqXY=", + wantZipHash: "9f0e74de55a6bd20c1567a81e707814dc221f07df176af2a0270392c6faf32fd", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.3.0"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.3.0", + wantContentHash: "h1:aPUoHx/0Cd7BTZs4SAaknT4TaKryH766GcFTvJjVbHU=", + wantZipHash: "03872ee7d6747bc2ee0abadbd4eb09e60f6df17d0a6142264abe8a8a00af50e7", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.4.0"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.4.0", + wantContentHash: "h1:tYuJspOzwTRMUOX6qmSDRTEKFVV80GM0/l89OLZuVNg=", + wantZipHash: "f60be8193c607bf197da01da4bedb3d683fe84c30de61040eb5d7afaf7869f2e", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.5.0"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.5.0", + wantContentHash: "h1:mVjf/WMWxfIw299sOl/O3EXn5qEaaJPMDHMsv7DBDlw=", + wantZipHash: "a2d281834ce159703540da94425fa02c7aec73b88b560081ed0d3681bfe9cd1f", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.5.1"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.5.1", + wantContentHash: "h1:ptSemFtffEBvMed43o25vSUpcTVcqxfXU8Jv0sfFVJs=", + wantZipHash: "4ecd78a6d9f571e84ed2baac1688fd150400db2c5b017b496c971af30aaece02", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.5.2"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.5.2", + wantContentHash: "h1:w5fcysjrx7yqtD/aO+QwRjYZOKnaM9Uh2b40tElTs3Y=", + wantZipHash: "643fcf8ef4e4cbb8f910622c42df3f9a81f3efe8b158a05825a81622c121ca0a", + }, + { + m: module.Version{Path: "rsc.io/quote", Version: "v1.5.3-pre1"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v1.5.3-pre1", + wantContentHash: "h1:c3EJ21kn75/hyrOL/Dvj45+ifxGFSY8Wf4WBcoWTxF0=", + wantZipHash: "24106f0f15384949df51fae5d34191bf120c3b80c1c904721ca2872cf83126b2", + }, + { + m: module.Version{Path: "rsc.io/quote/v2", Version: "v2.0.1"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v2.0.1", + wantContentHash: "h1:DF8hmGbDhgiIa2tpqLjHLIKkJx6WjCtLEqZBAU+hACI=", + wantZipHash: "009ed42474a59526fe56a14a9dd02bd7f977d1bd3844398bd209d0da0484aade", + }, + { + m: module.Version{Path: "rsc.io/quote/v3", Version: "v3.0.0"}, + vcs: "git", + url: "https://github.com/rsc/quote", + rev: "v3.0.0", + subdir: "v3", + wantContentHash: "h1:OEIXClZHFMyx5FdatYfxxpNEvxTqHlu5PNdla+vSYGg=", + wantZipHash: "cf3ff89056b785d7b3ef3a10e984efd83b47d9e65eabe8098b927b3370d5c3eb", + }, + + // Test cases from vcs-test.golang.org + { + m: module.Version{Path: "vcs-test.golang.org/git/v3pkg.git/v3", Version: "v3.0.0"}, + vcs: "git", + url: "https://vcs-test.golang.org/git/v3pkg", + rev: "v3.0.0", + wantContentHash: "h1:mZhljS1BaiW8lODR6wqY5pDxbhXja04rWPFXPwRAtvA=", + wantZipHash: "9c65f0d235e531008dc04e977f6fa5d678febc68679bb63d4148dadb91d3fe57", + }, + { + m: module.Version{Path: "vcs-test.golang.org/go/custom-hg-hello", Version: "v0.0.0-20171010233936-a8c8e7a40da9"}, + vcs: "hg", + url: "https://vcs-test.golang.org/hg/custom-hg-hello", + rev: "a8c8e7a40da9", + wantContentHash: "h1:LU6jFCbwn5VVgTcj+y4LspOpJHLZvl5TGPE+LwwpMw4=", + wantZipHash: "a1b12047da979d618c639ee98f370767a13d0507bd77785dc2f8dad66b40e2e6", + }, + + // Latest versions of selected golang.org/x repos + { + m: module.Version{Path: "golang.org/x/arch", Version: "v0.0.0-20190927153633-4e8777c89be4"}, + vcs: "git", + url: "https://go.googlesource.com/arch", + rev: "4e8777c89be4d9e61691fbe5d4e6c8838a7806f3", + wantContentHash: "h1:QlVATYS7JBoZMVaf+cNjb90WD/beKVHnIxFKT4QaHVI=", + wantZipHash: "d17551a0c4957180ec1507065d13dcdd0f5cd8bfd7dd735fb81f64f3e2b31b68", + }, + { + m: module.Version{Path: "golang.org/x/blog", Version: "v0.0.0-20191017104857-0cd0cdff05c2"}, + vcs: "git", + url: "https://go.googlesource.com/blog", + rev: "0cd0cdff05c251ad0c796cc94d7059e013311fc6", + wantContentHash: "h1:IKGICrORhR1aH2xG/WqrnpggSNolSj5urQxggCfmj28=", + wantZipHash: "0fed6b400de54da34b52b464ef2cdff45167236aaaf9a99ba8eba8855036faff", + }, + { + m: module.Version{Path: "golang.org/x/crypto", Version: "v0.0.0-20191011191535-87dc89f01550"}, + vcs: "git", + url: "https://go.googlesource.com/crypto", + rev: "87dc89f01550277dc22b74ffcf4cd89fa2f40f4c", + wantContentHash: "h1:ObdrDkeb4kJdCP557AjRjq69pTHfNouLtWZG7j9rPN8=", + wantZipHash: "88e47aa05eb25c6abdad7387ccccfc39e74541896d87b7b1269e9dd2fa00100d", + }, + { + m: module.Version{Path: "golang.org/x/net", Version: "v0.0.0-20191014212845-da9a3fd4c582"}, + vcs: "git", + url: "https://go.googlesource.com/net", + rev: "da9a3fd4c5820e74b24a6cb7fb438dc9b0dd377c", + wantContentHash: "h1:p9xBe/w/OzkeYVKm234g55gMdD1nSIooTir5kV11kfA=", + wantZipHash: "34901a85e6c15475a40457c2393ce66fb0999accaf2d6aa5b64b4863751ddbde", + }, + { + m: module.Version{Path: "golang.org/x/sync", Version: "v0.0.0-20190911185100-cd5d95a43a6e"}, + vcs: "git", + url: "https://go.googlesource.com/sync", + rev: "cd5d95a43a6e21273425c7ae415d3df9ea832eeb", + wantContentHash: "h1:vcxGaoTs7kV8m5Np9uUNQin4BrLOthgV7252N8V+FwY=", + wantZipHash: "9c63fe51b0c533b258d3acc30d9319fe78679ce1a051109c9dea3105b93e2eef", + }, + { + m: module.Version{Path: "golang.org/x/sys", Version: "v0.0.0-20191010194322-b09406accb47"}, + vcs: "git", + url: "https://go.googlesource.com/sys", + rev: "b09406accb4736d857a32bf9444cd7edae2ffa79", + wantContentHash: "h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY=", + wantZipHash: "f26f2993757670b4d1fee3156d331513259757f17133a36966c158642c3f61df", + }, + { + m: module.Version{Path: "golang.org/x/talks", Version: "v0.0.0-20191010201600-067e0d331fee"}, + vcs: "git", + url: "https://go.googlesource.com/talks", + rev: "067e0d331feee4f8d0fa17d47444db533bd904e7", + wantContentHash: "h1:8fnBMBUwliuiHuzfFw6kSSx79AzQpqkjZi3FSNIoqYs=", + wantZipHash: "fab2129f3005f970dbf2247378edb3220f6bd36726acdc7300ae3bb0f129e2f2", + }, + { + m: module.Version{Path: "golang.org/x/tools", Version: "v0.0.0-20191017205301-920acffc3e65"}, + vcs: "git", + url: "https://go.googlesource.com/tools", + rev: "920acffc3e65862cb002dae6b227b8d9695e3d29", + wantContentHash: "h1:GwXwgmbrvlcHLDsENMqrQTTIC2C0kIPszsq929NruKI=", + wantZipHash: "7f0ab7466448190f8ad1b8cfb05787c3fb08f4a8f9953cd4b40a51c76ddebb28", + }, + { + m: module.Version{Path: "golang.org/x/tour", Version: "v0.0.0-20191002171047-6bb846ce41cd"}, + vcs: "git", + url: "https://go.googlesource.com/tour", + rev: "6bb846ce41cdca087b14c8e3560a679691c424b6", + wantContentHash: "h1:EUlK3Rq8iTkQERnCnveD654NvRJ/ZCM9XCDne+S5cJ8=", + wantZipHash: "d6a7e03e02e5f7714bd12653d319a3b0f6e1099c01b1f9a17bc3613fb31c9170", + }, + } { + test := test + testName := strings.ReplaceAll(test.m.String(), "/", "_") + t.Run(testName, func(t *testing.T) { + if have, ok := haveVCS[test.vcs]; !ok { + t.Fatalf("unknown vcs: %s", test.vcs) + } else if !have { + t.Skip() + } + t.Parallel() + + repo, dl, cleanup, err := downloadVCSZip(test.vcs, test.url, test.rev, test.subdir) + defer cleanup() + if err != nil { + t.Fatal(err) + } + + // Create a module zip from that archive. + // (adapted from cmd/go/internal/modfetch.codeRepo.Zip) + info, err := dl.Stat() + if err != nil { + t.Fatal(err) + } + zr, err := zip.NewReader(dl, info.Size()) + if err != nil { + t.Fatal(err) + } + + var files []modzip.File + topPrefix := "" + subdir := test.subdir + if subdir != "" && !strings.HasSuffix(subdir, "/") { + subdir += "/" + } + haveLICENSE := false + for _, f := range zr.File { + if !f.FileInfo().Mode().IsRegular() { + continue + } + if topPrefix == "" { + i := strings.Index(f.Name, "/") + if i < 0 { + t.Fatal("missing top-level directory prefix") + } + topPrefix = f.Name[:i+1] + } + if strings.HasSuffix(f.Name, "/") { // drop directory dummy entries + continue + } + if !strings.HasPrefix(f.Name, topPrefix) { + t.Fatal("zip file contains more than one top-level directory") + } + name := strings.TrimPrefix(f.Name, topPrefix) + if !strings.HasPrefix(name, subdir) { + continue + } + name = strings.TrimPrefix(name, subdir) + if name == ".hg_archival.txt" { + // Inserted by hg archive. + // Not correct to drop from other version control systems, but too bad. + continue + } + if name == "LICENSE" { + haveLICENSE = true + } + files = append(files, zipFile{name: name, f: f}) + } + if !haveLICENSE && subdir != "" { + license, err := downloadVCSFile(test.vcs, repo, test.rev, "LICENSE") + if err != nil { + t.Fatal(err) + } + files = append(files, fakeFile{ + name: "LICENSE", + size: uint64(len(license)), + data: license, + }) + } + + tmpModZipFile, err := ioutil.TempFile("", "TestVCS-*.zip") + if err != nil { + t.Fatal(err) + } + tmpModZipPath := tmpModZipFile.Name() + defer func() { + tmpModZipFile.Close() + os.Remove(tmpModZipPath) + }() + h := sha256.New() + w := io.MultiWriter(tmpModZipFile, h) + if err := modzip.Create(w, test.m, files); err != nil { + t.Fatal(err) + } + if err := tmpModZipFile.Close(); err != nil { + t.Fatal(err) + } + + gotZipHash := hex.EncodeToString(h.Sum(nil)) + if test.wantZipHash != gotZipHash { + // If the test fails because the hash of the zip file itself differs, + // that may be okay as long as the hash of the data within the zip file + // does not change. For example, we might change the compression, + // order, or alignment of files without affecting the extracted output. + // We shouldn't make such a change unintentionally though, so this + // test will fail either way. + if gotSum, err := dirhash.HashZip(tmpModZipPath, dirhash.Hash1); err == nil && test.wantContentHash != gotSum { + t.Fatalf("zip content hash: got %s, want %s", gotSum, test.wantContentHash) + } else { + t.Fatalf("zip file hash: got %s, want %s", gotZipHash, test.wantZipHash) + } + } + }) + } +} + +func downloadVCSZip(vcs, url, rev, subdir string) (repoDir string, dl *os.File, cleanup func(), err error) { + var cleanups []func() + cleanup = func() { + for i := len(cleanups) - 1; i >= 0; i-- { + cleanups[i]() + } + } + repoDir, err = ioutil.TempDir("", "downloadVCSZip") + if err != nil { + return "", nil, cleanup, err + } + cleanups = append(cleanups, func() { os.RemoveAll(repoDir) }) + + switch vcs { + case "git": + // Create a repository and download the revision we want. + if err := run(repoDir, "git", "init", "--bare"); err != nil { + return "", nil, cleanup, err + } + if err := os.MkdirAll(filepath.Join(repoDir, "info"), 0777); err != nil { + return "", nil, cleanup, err + } + attrFile, err := os.OpenFile(filepath.Join(repoDir, "info", "attributes"), os.O_CREATE|os.O_APPEND|os.O_RDWR, 0666) + if err != nil { + return "", nil, cleanup, err + } + if _, err := attrFile.Write([]byte("\n* -export-subst -export-ignore\n")); err != nil { + attrFile.Close() + return "", nil, cleanup, err + } + if err := attrFile.Close(); err != nil { + return "", nil, cleanup, err + } + if err := run(repoDir, "git", "remote", "add", "origin", "--", url); err != nil { + return "", nil, cleanup, err + } + var refSpec string + if strings.HasPrefix(rev, "v") { + refSpec = fmt.Sprintf("refs/tags/%[1]s:refs/tags/%[1]s", rev) + } else { + refSpec = fmt.Sprintf("%s:refs/dummy", rev) + } + if err := run(repoDir, "git", "fetch", "-f", "--depth=1", "origin", refSpec); err != nil { + return "", nil, cleanup, err + } + + // Create an archive. + tmpZipFile, err := ioutil.TempFile("", "downloadVCSZip-*.zip") + if err != nil { + return "", nil, cleanup, err + } + cleanups = append(cleanups, func() { + name := tmpZipFile.Name() + tmpZipFile.Close() + os.Remove(name) + }) + subdirArg := subdir + if subdir == "" { + subdirArg = "." + } + cmd := exec.Command("git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", "--prefix=prefix/", rev, "--", subdirArg) + cmd.Dir = repoDir + cmd.Stdout = tmpZipFile + if err := cmd.Run(); err != nil { + return "", nil, cleanup, err + } + if _, err := tmpZipFile.Seek(0, 0); err != nil { + return "", nil, cleanup, err + } + return repoDir, tmpZipFile, cleanup, nil + + case "hg": + // Clone the whole repository. + if err := run(repoDir, "hg", "clone", "-U", "--", url, "."); err != nil { + return "", nil, cleanup, err + } + + // Create an archive. + tmpZipFile, err := ioutil.TempFile("", "downloadVCSZip-*.zip") + if err != nil { + return "", nil, cleanup, err + } + tmpZipPath := tmpZipFile.Name() + tmpZipFile.Close() + cleanups = append(cleanups, func() { os.Remove(tmpZipPath) }) + args := []string{"archive", "-t", "zip", "--no-decode", "-r", rev, "--prefix=prefix/"} + if subdir != "" { + args = append(args, "-I", subdir+"/**") + } + args = append(args, "--", tmpZipPath) + if err := run(repoDir, "hg", args...); err != nil { + return "", nil, cleanup, err + } + if tmpZipFile, err = os.Open(tmpZipPath); err != nil { + return "", nil, cleanup, err + } + cleanups = append(cleanups, func() { tmpZipFile.Close() }) + return repoDir, tmpZipFile, cleanup, err + + default: + return "", nil, cleanup, fmt.Errorf("vcs %q not supported", vcs) + } +} + +func downloadVCSFile(vcs, repo, rev, file string) ([]byte, error) { + switch vcs { + case "git": + cmd := exec.Command("git", "cat-file", "blob", rev+":"+file) + cmd.Dir = repo + return cmd.Output() + default: + return nil, fmt.Errorf("vcs %q not supported", vcs) + } +} + +func run(dir string, name string, args ...string) error { + cmd := exec.Command(name, args...) + cmd.Dir = dir + if err := cmd.Run(); err != nil { + return fmt.Errorf("%s: %v", strings.Join(args, " "), err) + } + return nil +} + +type zipFile struct { + name string + f *zip.File +} + +func (f zipFile) Path() string { return f.name } +func (f zipFile) Lstat() (os.FileInfo, error) { return f.f.FileInfo(), nil } +func (f zipFile) Open() (io.ReadCloser, error) { return f.f.Open() }