Skip to content

Commit

Permalink
zip: add package for creating and extracting module zip files
Browse files Browse the repository at this point in the history
zip provides three new functions:

* Create - build a zip from an abstract list of files, filtering out
  files in submodules and vendor directories. This is useful for
  filtering a zip produced by a VCS tool (as the go command does).
* CreateFromDir - build a zip from a directory. This is a convenience
  wrapper for Create.
* Unzip - extract a zip file, checking various restrictions.

A list of restrictions on module paths, versions, files within zips,
and size limits is included in the package documentation. Both Create
and Unzip enforce these restrictions.

Also: copied cmd/go/internal/txtar to internal/txtar for testing.

Updates golang/go#31302
Updates golang/go#33312
Updates golang/go#33778

Change-Id: I6fedb8b839a0cd991c9b210e73bafedc4b286ec5
Reviewed-on: https://go-review.googlesource.com/c/mod/+/202042
Run-TryBot: Jay Conrod <jayconrod@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Bryan C. Mills <bcmills@google.com>
  • Loading branch information
Jay Conrod committed Nov 1, 2019
1 parent 18c3998 commit 8827845
Show file tree
Hide file tree
Showing 45 changed files with 2,223 additions and 0 deletions.
140 changes: 140 additions & 0 deletions internal/txtar/archive.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package txtar implements a trivial text-based file archive format.
//
// The goals for the format are:
//
// - be trivial enough to create and edit by hand.
// - be able to store trees of text files describing go command test cases.
// - diff nicely in git history and code reviews.
//
// Non-goals include being a completely general archive format,
// storing binary data, storing file modes, storing special files like
// symbolic links, and so on.
//
// Txtar format
//
// A txtar archive is zero or more comment lines and then a sequence of file entries.
// Each file entry begins with a file marker line of the form "-- FILENAME --"
// and is followed by zero or more file content lines making up the file data.
// The comment or file content ends at the next file marker line.
// The file marker line must begin with the three-byte sequence "-- "
// and end with the three-byte sequence " --", but the enclosed
// file name can be surrounding by additional white space,
// all of which is stripped.
//
// If the txtar file is missing a trailing newline on the final line,
// parsers should consider a final newline to be present anyway.
//
// There are no possible syntax errors in a txtar archive.
package txtar

import (
"bytes"
"fmt"
"io/ioutil"
"strings"
)

// An Archive is a collection of files.
type Archive struct {
Comment []byte
Files []File
}

// A File is a single file in an archive.
type File struct {
Name string // name of file ("foo/bar.txt")
Data []byte // text content of file
}

// Format returns the serialized form of an Archive.
// It is assumed that the Archive data structure is well-formed:
// a.Comment and all a.File[i].Data contain no file marker lines,
// and all a.File[i].Name is non-empty.
func Format(a *Archive) []byte {
var buf bytes.Buffer
buf.Write(fixNL(a.Comment))
for _, f := range a.Files {
fmt.Fprintf(&buf, "-- %s --\n", f.Name)
buf.Write(fixNL(f.Data))
}
return buf.Bytes()
}

// ParseFile parses the named file as an archive.
func ParseFile(file string) (*Archive, error) {
data, err := ioutil.ReadFile(file)
if err != nil {
return nil, err
}
return Parse(data), nil
}

// Parse parses the serialized form of an Archive.
// The returned Archive holds slices of data.
func Parse(data []byte) *Archive {
a := new(Archive)
var name string
a.Comment, name, data = findFileMarker(data)
for name != "" {
f := File{name, nil}
f.Data, name, data = findFileMarker(data)
a.Files = append(a.Files, f)
}
return a
}

var (
newlineMarker = []byte("\n-- ")
marker = []byte("-- ")
markerEnd = []byte(" --")
)

// findFileMarker finds the next file marker in data,
// extracts the file name, and returns the data before the marker,
// the file name, and the data after the marker.
// If there is no next marker, findFileMarker returns before = fixNL(data), name = "", after = nil.
func findFileMarker(data []byte) (before []byte, name string, after []byte) {
var i int
for {
if name, after = isMarker(data[i:]); name != "" {
return data[:i], name, after
}
j := bytes.Index(data[i:], newlineMarker)
if j < 0 {
return fixNL(data), "", nil
}
i += j + 1 // positioned at start of new possible marker
}
}

// isMarker checks whether data begins with a file marker line.
// If so, it returns the name from the line and the data after the line.
// Otherwise it returns name == "" with an unspecified after.
func isMarker(data []byte) (name string, after []byte) {
if !bytes.HasPrefix(data, marker) {
return "", nil
}
if i := bytes.IndexByte(data, '\n'); i >= 0 {
data, after = data[:i], data[i+1:]
}
if !bytes.HasSuffix(data, markerEnd) {
return "", nil
}
return strings.TrimSpace(string(data[len(marker) : len(data)-len(markerEnd)])), after
}

// If data is empty or ends in \n, fixNL returns data.
// Otherwise fixNL returns a new slice consisting of data with a final \n added.
func fixNL(data []byte) []byte {
if len(data) == 0 || data[len(data)-1] == '\n' {
return data
}
d := make([]byte, len(data)+1)
copy(d, data)
d[len(data)] = '\n'
return d
}
67 changes: 67 additions & 0 deletions internal/txtar/archive_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package txtar

import (
"bytes"
"fmt"
"reflect"
"testing"
)

var tests = []struct {
name string
text string
parsed *Archive
}{
{
name: "basic",
text: `comment1
comment2
-- file1 --
File 1 text.
-- foo ---
More file 1 text.
-- file 2 --
File 2 text.
-- empty --
-- noNL --
hello world`,
parsed: &Archive{
Comment: []byte("comment1\ncomment2\n"),
Files: []File{
{"file1", []byte("File 1 text.\n-- foo ---\nMore file 1 text.\n")},
{"file 2", []byte("File 2 text.\n")},
{"empty", []byte{}},
{"noNL", []byte("hello world\n")},
},
},
},
}

func Test(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
a := Parse([]byte(tt.text))
if !reflect.DeepEqual(a, tt.parsed) {
t.Fatalf("Parse: wrong output:\nhave:\n%s\nwant:\n%s", shortArchive(a), shortArchive(tt.parsed))
}
text := Format(a)
a = Parse(text)
if !reflect.DeepEqual(a, tt.parsed) {
t.Fatalf("Parse after Format: wrong output:\nhave:\n%s\nwant:\n%s", shortArchive(a), shortArchive(tt.parsed))
}
})
}
}

func shortArchive(a *Archive) string {
var buf bytes.Buffer
fmt.Fprintf(&buf, "comment: %q\n", a.Comment)
for _, f := range a.Files {
fmt.Fprintf(&buf, "file %q: %q\n", f.Name, f.Data)
}
return buf.String()
}
5 changes: 5 additions & 0 deletions zip/testdata/create/bad_file_path.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
path=example.com/m
version=v1.0.0
wantErr=malformed file path "bad.go'": invalid char '\''
-- bad.go' --
package bad
5 changes: 5 additions & 0 deletions zip/testdata/create/bad_gomod_case.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
path=example.com/m
version=v1.0.0
wantErr=found file named GO.MOD, want all lower-case go.mod
-- GO.MOD --
module example.com/m
3 changes: 3 additions & 0 deletions zip/testdata/create/bad_mod_path.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=cache
version=v1.0.0
wantErr=missing dot in first path element
3 changes: 3 additions & 0 deletions zip/testdata/create/bad_mod_path_version_suffix.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=example.com/m
version=v2.0.0
wantErr=invalid version: should be v0 or v1, not v2
3 changes: 3 additions & 0 deletions zip/testdata/create/bad_version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=example.com/m
version=v1.0.0+bad
wantErr=version "v1.0.0+bad" is not canonical (should be "v1.0.0")
7 changes: 7 additions & 0 deletions zip/testdata/create/dup_file.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
path=example.com/m
version=v1.0.0
wantErr=multiple entries for file "dup.go"
-- dup.go --
package d1
-- dup.go --
package d2
7 changes: 7 additions & 0 deletions zip/testdata/create/dup_file_and_dir.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
path=example.com/m
version=v1.0.0
wantErr=entry "a.go" is both a file and a directory
-- a.go --
package a
-- a.go/b.go --
package b
3 changes: 3 additions & 0 deletions zip/testdata/create/empty.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=example.com/empty
version=v1.0.0
hash=h1:47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=
9 changes: 9 additions & 0 deletions zip/testdata/create/exclude_cap_go_mod_submodule.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
path=example.com/m
version=v1.0.0
hash=h1:xctQQey8/y7IcBjFZDP/onWLSXhlqcsC3i1fgSdpMHk=
-- a.go --
package a
-- b/GO.MOD --
MODULE EXAMPLE.COM/M/B
-- b/b.go --
package b
11 changes: 11 additions & 0 deletions zip/testdata/create/exclude_submodule.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
path=example.com/m
version=v1.0.0
hash=h1:XduFAgX/GaspZa8Jv4pfzoGEzNaU/r88PiCunijw5ok=
-- go.mod --
module example.com/m

go 1.13
-- sub/go.mod --
module example.com/m/sub
-- sub/x.go' --
invalid name, but this shouldn't be read
14 changes: 14 additions & 0 deletions zip/testdata/create/exclude_vendor.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
path=example.com/m
version=v1.0.0
hash=h1:5u93LDLN0Me+NGfZtRpA5mHxY8svfykHpq4CMSaBZyc=
-- go.mod --
module example.com/m

go 1.13
-- vendor/modules.txt --
included
see comment in isVendoredPackage and golang.org/issue/31562.
-- vendor/example.com/x/x.go --
excluded
-- sub/vendor/sub.txt --
excluded
7 changes: 7 additions & 0 deletions zip/testdata/create/file_case_conflict.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
path=example.com/m
version=v1.0.0
wantErr=case-insensitive file name collision: "m.go" and "M.GO"
-- m.go --
package m
-- M.GO --
package m
5 changes: 5 additions & 0 deletions zip/testdata/create/go_mod_dir.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
path=example.com/m
version=v1.0.0
hash=h1:Mun5l9cBlDnnV6JasTpio2aZJSbFj++h+814mnKC/OM=
-- go.mod/a.go --
package a
3 changes: 3 additions & 0 deletions zip/testdata/create/invalid_utf8_mod_path.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=� # 0xFF
version=v1.0.0
wantErr=malformed module path "\xff": invalid UTF-8
22 changes: 22 additions & 0 deletions zip/testdata/create/simple.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
path=example.com/m
version=v1.0.0
hash=h1:tpqYOOmuilagXzyqoJ3roUjp8gneQeTv5YVpL6BG7/k=
-- go.mod --
module example.com/m

go 1.13
-- m.go --
package m

func Foo() int { return 42 }
-- cmd/hello/hello.go --
package main

import (
"fmt"
"example.com/m"
)

func main() {
fmt.Println(m.Foo())
}
5 changes: 5 additions & 0 deletions zip/testdata/create_from_dir/bad_file_path.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
path=example.com/m
version=v1.0.0
wantErr=malformed file path "bad.go'": invalid char '\''
-- bad.go' --
package bad
5 changes: 5 additions & 0 deletions zip/testdata/create_from_dir/bad_gomod_case.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
path=example.com/m
version=v1.0.0
wantErr=found file named GO.MOD, want all lower-case go.mod
-- GO.MOD --
module example.com/m
3 changes: 3 additions & 0 deletions zip/testdata/create_from_dir/bad_mod_path.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=cache
version=v1.0.0
wantErr=missing dot in first path element
3 changes: 3 additions & 0 deletions zip/testdata/create_from_dir/bad_mod_path_version_suffix.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=example.com/m
version=v2.0.0
wantErr=invalid version: should be v0 or v1, not v2
3 changes: 3 additions & 0 deletions zip/testdata/create_from_dir/bad_version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=example.com/m
version=v1.0.0+bad
wantErr=version "v1.0.0+bad" is not canonical (should be "v1.0.0")
3 changes: 3 additions & 0 deletions zip/testdata/create_from_dir/empty.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=example.com/empty
version=v1.0.0
hash=h1:47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=
11 changes: 11 additions & 0 deletions zip/testdata/create_from_dir/exclude_submodule.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
path=example.com/m
version=v1.0.0
hash=h1:XduFAgX/GaspZa8Jv4pfzoGEzNaU/r88PiCunijw5ok=
-- go.mod --
module example.com/m

go 1.13
-- sub/go.mod --
module example.com/m/sub
-- sub/x.go' --
invalid name, but this shouldn't be read
14 changes: 14 additions & 0 deletions zip/testdata/create_from_dir/exclude_vendor.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
path=example.com/m
version=v1.0.0
hash=h1:5u93LDLN0Me+NGfZtRpA5mHxY8svfykHpq4CMSaBZyc=
-- go.mod --
module example.com/m

go 1.13
-- vendor/modules.txt --
included
see comment in isVendoredPackage and golang.org/issue/31562.
-- vendor/example.com/x/x.go --
excluded
-- sub/vendor/sub.txt --
excluded
5 changes: 5 additions & 0 deletions zip/testdata/create_from_dir/go_mod_dir.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
path=example.com/m
version=v1.0.0
hash=h1:Mun5l9cBlDnnV6JasTpio2aZJSbFj++h+814mnKC/OM=
-- go.mod/a.go --
package a
3 changes: 3 additions & 0 deletions zip/testdata/create_from_dir/invalid_utf8_mod_path.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
path=� # 0xFF
version=v1.0.0
wantErr=malformed module path "\xff": invalid UTF-8
Loading

0 comments on commit 8827845

Please sign in to comment.