From 11a4b2a253f158a23165aa411fd742c64f51153f Mon Sep 17 00:00:00 2001 From: Ryan Moran Date: Wed, 30 Jun 2021 10:53:11 -0700 Subject: [PATCH] Reorganized vacation package - introduces vacation.NopArchive type for non-archive files - splits vacation implementation files up into type-named files - renames vacation test files to remove stutter - adds vacation.Archive.WithName option to allow NopArchives to have specified names - uses vacation.Archive.WithName option in postal.Service.Deliver to deliver file to location matching the dependency URI basename - includes application/jar mime-type as NopArchive type --- postal/service.go | 3 +- postal/service_test.go | 49 ++ vacation/archive.go | 89 +++ ...cation_archive_test.go => archive_test.go} | 95 +++- vacation/init_test.go | 16 +- vacation/nop_archive.go | 33 ++ vacation/nop_archive_test.go | 56 ++ ...orting_test.go => symlink_sorting_test.go} | 2 +- vacation/tar_archive.go | 176 ++++++ ...cation_tar_test.go => tar_archive_test.go} | 4 +- vacation/tar_bzip2_archive.go | 30 ++ ...zip2_test.go => tar_bzip2_archive_test.go} | 4 +- vacation/tar_gzip_archive.go | 36 ++ ..._gzip_test.go => tar_gzip_archive_test.go} | 4 +- vacation/tar_xz_archive.go | 37 ++ ..._tar_xz_test.go => tar_xz_archive_test.go} | 4 +- vacation/vacation.go | 508 ------------------ vacation/vacation_text_test.go | 49 -- vacation/zip_archive.go | 160 ++++++ ...cation_zip_test.go => zip_archive_test.go} | 6 +- vacation/zipslip.go | 24 + 21 files changed, 806 insertions(+), 579 deletions(-) create mode 100644 vacation/archive.go rename vacation/{vacation_archive_test.go => archive_test.go} (64%) create mode 100644 vacation/nop_archive.go create mode 100644 vacation/nop_archive_test.go rename vacation/{vacation_symlink_sorting_test.go => symlink_sorting_test.go} (98%) create mode 100644 vacation/tar_archive.go rename vacation/{vacation_tar_test.go => tar_archive_test.go} (98%) create mode 100644 vacation/tar_bzip2_archive.go rename vacation/{vacation_tar_bzip2_test.go => tar_bzip2_archive_test.go} (97%) create mode 100644 vacation/tar_gzip_archive.go rename vacation/{vacation_tar_gzip_test.go => tar_gzip_archive_test.go} (97%) create mode 100644 vacation/tar_xz_archive.go rename vacation/{vacation_tar_xz_test.go => tar_xz_archive_test.go} (97%) delete mode 100644 vacation/vacation_text_test.go create mode 100644 vacation/zip_archive.go rename vacation/{vacation_zip_test.go => zip_archive_test.go} (98%) create mode 100644 vacation/zipslip.go diff --git a/postal/service.go b/postal/service.go index 6fbb16e7..91aed8ba 100644 --- a/postal/service.go +++ b/postal/service.go @@ -156,7 +156,8 @@ func (s Service) Deliver(dependency Dependency, cnbPath, layerPath, platformPath validatedReader := cargo.NewValidatedReader(bundle, dependency.SHA256) - err = vacation.NewArchive(validatedReader).StripComponents(dependency.StripComponents).Decompress(layerPath) + name := filepath.Base(dependency.URI) + err = vacation.NewArchive(validatedReader).WithName(name).StripComponents(dependency.StripComponents).Decompress(layerPath) if err != nil { return err } diff --git a/postal/service_test.go b/postal/service_test.go index 7fe7d7cf..8dfaeebb 100644 --- a/postal/service_test.go +++ b/postal/service_test.go @@ -404,6 +404,7 @@ version = "this is super not semver" Expect(err).NotTo(HaveOccurred()) Expect(info.Mode()).To(Equal(os.FileMode(0755))) }) + context("when the dependency has a strip-components value set", func() { it.Before(func() { var err error @@ -484,7 +485,55 @@ version = "this is super not semver" Expect(err).NotTo(HaveOccurred()) Expect(info.Mode()).To(Equal(os.FileMode(0755))) }) + }) + + context("when the dependency should be a named file", func() { + it.Before(func() { + var err error + layerPath, err = os.MkdirTemp("", "path") + Expect(err).NotTo(HaveOccurred()) + + buffer := bytes.NewBuffer(nil) + buffer.WriteString("some-file-contents") + + sum := sha256.Sum256(buffer.Bytes()) + dependencySHA = hex.EncodeToString(sum[:]) + + transport.DropCall.Returns.ReadCloser = io.NopCloser(buffer) + + deliver = func() error { + return service.Deliver(postal.Dependency{ + ID: "some-entry", + Stacks: []string{"some-stack"}, + URI: "https://dependencies.example.com/dependencies/some-file-name.txt", + SHA256: dependencySHA, + Version: "1.2.3", + }, "some-cnb-path", + layerPath, + platformPath, + ) + } + }) + + it.After(func() { + Expect(os.RemoveAll(layerPath)).To(Succeed()) + }) + it("downloads the dependency and copies it into the path with the given name", func() { + err := deliver() + Expect(err).NotTo(HaveOccurred()) + + Expect(transport.DropCall.Receives.Root).To(Equal("some-cnb-path")) + Expect(transport.DropCall.Receives.Uri).To(Equal("https://dependencies.example.com/dependencies/some-file-name.txt")) + + files, err := filepath.Glob(fmt.Sprintf("%s/*", layerPath)) + Expect(err).NotTo(HaveOccurred()) + Expect(files).To(ConsistOf([]string{filepath.Join(layerPath, "some-file-name.txt")})) + + content, err := os.ReadFile(filepath.Join(layerPath, "some-file-name.txt")) + Expect(err).NotTo(HaveOccurred()) + Expect(string(content)).To(Equal("some-file-contents")) + }) }) context("when there is a dependency mapping via binding", func() { diff --git a/vacation/archive.go b/vacation/archive.go new file mode 100644 index 00000000..5c4b61a1 --- /dev/null +++ b/vacation/archive.go @@ -0,0 +1,89 @@ +package vacation + +import ( + "bufio" + "fmt" + "io" + "path/filepath" + + "github.com/gabriel-vasile/mimetype" +) + +type Decompressor interface { + Decompress(destination string) error +} + +// An Archive decompresses tar, gzip, xz, and bzip2 compressed tar, and zip files from +// an input stream. +type Archive struct { + reader io.Reader + components int + name string +} + +// NewArchive returns a new Archive that reads from inputReader. +func NewArchive(inputReader io.Reader) Archive { + return Archive{ + reader: inputReader, + name: "artifact", + } +} + +// Decompress reads from Archive, determines the archive type of the input +// stream, and writes files into the destination specified. +// +// Archive decompression will also handle files that are types "text/plain; +// charset=utf-8" and write the contents of the input stream to a file name +// "artifact" in the destination directory. +func (a Archive) Decompress(destination string) error { + // Convert reader into a buffered read so that the header can be peeked to + // determine the type. + bufferedReader := bufio.NewReader(a.reader) + + // The number 3072 is lifted from the mimetype library and the definition of + // the constant at the time of writing this functionality is listed below. + // https://github.com/gabriel-vasile/mimetype/blob/c64c025a7c2d8d45ba57d3cebb50a1dbedb3ed7e/internal/matchers/matchers.go#L6 + header, err := bufferedReader.Peek(3072) + if err != nil && err != io.EOF { + return err + } + + mime := mimetype.Detect(header) + + // This switch case is reponsible for determining what the decompression + // strategy should be. + var decompressor Decompressor + switch mime.String() { + case "application/x-tar": + decompressor = NewTarArchive(bufferedReader).StripComponents(a.components) + case "application/gzip": + decompressor = NewTarGzipArchive(bufferedReader).StripComponents(a.components) + case "application/x-xz": + decompressor = NewTarXZArchive(bufferedReader).StripComponents(a.components) + case "application/x-bzip2": + decompressor = NewTarBzip2Archive(bufferedReader).StripComponents(a.components) + case "application/zip": + decompressor = NewZipArchive(bufferedReader) + case "text/plain; charset=utf-8", "application/jar": + destination = filepath.Join(destination, a.name) + decompressor = NewNopArchive(bufferedReader) + default: + return fmt.Errorf("unsupported archive type: %s", mime.String()) + } + + return decompressor.Decompress(destination) +} + +// StripComponents behaves like the --strip-components flag on tar command +// removing the first n levels from the final decompression destination. +// Setting this is a no-op for archive types that do not use --strip-components +// (such as zip). +func (a Archive) StripComponents(components int) Archive { + a.components = components + return a +} + +func (a Archive) WithName(name string) Archive { + a.name = name + return a +} diff --git a/vacation/vacation_archive_test.go b/vacation/archive_test.go similarity index 64% rename from vacation/vacation_archive_test.go rename to vacation/archive_test.go index c075e467..f6ea0a9e 100644 --- a/vacation/vacation_archive_test.go +++ b/vacation/archive_test.go @@ -17,7 +17,7 @@ import ( . "github.com/onsi/gomega" ) -func testVacationArchive(t *testing.T, context spec.G, it spec.S) { +func testArchive(t *testing.T, context spec.G, it spec.S) { var ( Expect = NewWithT(t).Expect ) @@ -322,6 +322,99 @@ func testVacationArchive(t *testing.T, context spec.G, it spec.S) { }) }) + context("when passed the reader of a text file", func() { + var ( + archive vacation.Archive + tempDir string + ) + + it.Before(func() { + var err error + tempDir, err = os.MkdirTemp("", "vacation") + Expect(err).NotTo(HaveOccurred()) + + buffer := bytes.NewBuffer([]byte(`some contents`)) + + archive = vacation.NewArchive(buffer) + }) + + it.After(func() { + Expect(os.RemoveAll(tempDir)).To(Succeed()) + }) + + it("writes a text file onto the path", func() { + err := archive.Decompress(tempDir) + Expect(err).NotTo(HaveOccurred()) + + content, err := os.ReadFile(filepath.Join(tempDir, "artifact")) + Expect(err).NotTo(HaveOccurred()) + Expect(content).To(Equal([]byte(`some contents`))) + }) + + context("when given a name", func() { + it.Before(func() { + archive = archive.WithName("some-text-file") + }) + + it("writes a text file onto the path with that name", func() { + err := archive.Decompress(tempDir) + Expect(err).NotTo(HaveOccurred()) + + content, err := os.ReadFile(filepath.Join(tempDir, "some-text-file")) + Expect(err).NotTo(HaveOccurred()) + Expect(content).To(Equal([]byte(`some contents`))) + }) + }) + }) + + context("when passed the reader of a jar file", func() { + var ( + archive vacation.Archive + tempDir string + header []byte + ) + + it.Before(func() { + var err error + tempDir, err = os.MkdirTemp("", "vacation") + Expect(err).NotTo(HaveOccurred()) + + // JAR header copied from https://github.com/gabriel-vasile/mimetype/blob/c4c6791c993e7f509de8ef38f149a59533e30bbc/testdata/jar.jar + header = []byte("\x50\x4b\x03\x04\x14\x00\x08\x08\x08\x00\x59\x71\xbf\x4c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x09\x00\x04\x00\x4d\x45\x54\x41\x2d\x49\x4e\x46\x2f\xfe\xca\x00\x00\x03\x00\x50\x4b\x07\x08\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x50\x4b\x03\x04\x14\x00\x08\x08\x08\x00\x59\x71\xbf\x4c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x14\x00\x00\x00\x4d\x45\x54\x41\x2d\x49\x4e\x46\x2f\x4d\x41\x4e\x49\x46\x45\x53\x54\x2e\x4d\x46\xf3\x4d\xcc\xcb\x4c\x4b\x2d\x2e\xd1\x0d\x4b\x2d\x2a\xce\xcc\xcf\xb3\x52\x30\xd4\x33\xe0\xe5\x72\x2e\x4a\x4d\x2c\x49\x4d\xd1\x75\xaa\x04\x09\x58\xe8\x19\xc4\x1b\x9a\x1a\x2a\x68\xf8\x17\x25\x26\xe7\xa4\x2a\x38\xe7\x17\x15\xe4\x17\x25\x96\x00\xd5\x6b\xf2\x72\xf9\x26\x66\xe6\xe9\x3a\xe7\x24\x16\x17\x5b\x29\x78\xa4\xe6\xe4\xe4\x87\xe7\x17\xe5\xa4\xf0\x72\xf1\x72\x01\x00\x50\x4b\x07\x08\x86\x7d\x5d\xeb\x5c\x00\x00\x00\x5d\x00\x00\x00\x50\x4b\x03\x04\x14\x00\x08\x08\x08\x00\x12\x71\xbf\x4c\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x48\x65\x6c\x6c\x6f\x57\x6f\x72\x6c\x64\x2e\x63\x6c\x61\x73\x73\x6d\x50\x4d\x4b\xc3\x40\x10\x7d\xdb\xa6\x4d\x13\x53\x53\x5b\x53\x3f\x0b\xf6\x50\x88\x22\xe6\xe2\xad\xe2\x45\x10\x0f\x45\x85\x88\x1e\x3c\x6d\xda\xa5\x6c\xd9\x24\x12\x13\xc1\x9f\xa5\x07\x05\x0f\xfe\x00\x7f\x94\x38\x1b\x85\x20\x74\x0f\xb3\x3b\x6f\xde\x9b\x79\xb3\x5f\xdf\x1f\x9f\x00\x8e\x31\xb0\xd1\x84\x6b\xa1\x83\xb5\x16\xba\x36\x7a\x58\x37\xe1\x99\xe8\x33\x34\x4f\x64\x22\xf3\x53\x86\xba\xbf\x7f\xcb\x60\x9c\xa5\x33\xc1\xe0\x4e\x64\x22\x2e\x8b\x38\x12\xd9\x0d\x8f\x14\x21\x46\xcc\x65\xc2\xd0\xf7\xef\x27\x0b\xfe\xc4\x03\xc5\x93\x79\x10\xe6\x99\x4c\xe6\x63\x2d\xb4\xc3\xb4\xc8\xa6\xe2\x5c\x6a\xb2\x7b\x21\x94\x4a\xef\xd2\x4c\xcd\x8e\x34\xdb\x81\x89\x96\x89\x0d\x07\x9b\xd8\x62\x68\x97\xe5\xc3\xbd\x92\x30\x34\xb1\xed\x60\x07\xbb\xd4\xa3\x92\x31\x74\xaa\x31\x57\xd1\x42\x4c\xf3\x7f\x50\xf8\xfc\x98\x8b\x98\x5c\xa7\x05\x15\xbc\x5f\x4f\x32\x0d\xae\xc9\x50\x4e\xb6\x04\x8f\xc7\x0c\xbd\x25\x30\x83\xf9\xa0\x33\x45\xdb\x78\xfe\xb2\x65\x30\x44\x83\xfe\x4b\x9f\x1a\x98\xb6\x4e\xd1\xa2\x6c\x40\x37\xa3\xbb\x71\xf0\x0e\xf6\x42\x0f\xb2\x4c\xb1\x59\x82\x9a\xb2\x02\xe7\x8f\x3a\x2a\xa5\x80\xf5\x8a\x5a\xb7\xfe\x06\xa3\xa2\xdb\x54\xa2\x1e\xd4\x55\x0b\xdb\xe5\x94\xd5\x1f\x50\x4b\x07\x08\xe5\x38\x99\x3f\x21\x01\x00\x00\xab\x01\x00\x00\x50\x4b\x01\x02\x14\x00\x14\x00\x08\x08\x08\x00\x59\x71\xbf\x4c\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x09\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x4d\x45\x54\x41\x2d\x49\x4e\x46\x2f\xfe\xca\x00\x00\x50\x4b\x01\x02\x14\x00\x14\x00\x08\x08\x08\x00\x59\x71\xbf\x4c\x86\x7d\x5d\xeb\x5c\x00\x00\x00\x5d\x00\x00\x00\x14\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x3d\x00\x00\x00\x4d\x45\x54\x41\x2d\x49\x4e\x46\x2f\x4d\x41\x4e\x49\x46\x45\x53\x54\x2e\x4d\x46\x50\x4b\x01\x02\x14\x00\x14\x00\x08\x08\x08\x00\x12\x71\xbf\x4c\xe5\x38\x99\x3f\x21\x01\x00\x00\xab\x01\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xdb\x00\x00\x00\x48\x65\x6c\x6c\x6f\x57\x6f\x72\x6c\x64\x2e\x63\x6c\x61\x73\x73\x50\x4b\x05\x06\x00\x00\x00\x00\x03\x00\x03\x00\xbb\x00\x00\x00\x3a\x02\x00\x00\x00\x00") + buffer := bytes.NewBuffer(header) + + archive = vacation.NewArchive(buffer) + }) + + it.After(func() { + Expect(os.RemoveAll(tempDir)).To(Succeed()) + }) + + it("writes a jar file onto the path", func() { + err := archive.Decompress(tempDir) + Expect(err).NotTo(HaveOccurred()) + + content, err := os.ReadFile(filepath.Join(tempDir, "artifact")) + Expect(err).NotTo(HaveOccurred()) + Expect(content).To(Equal(header)) + }) + + context("when given a name", func() { + it.Before(func() { + archive = archive.WithName("some-jar-file") + }) + + it("writes a jar file onto the path with that name", func() { + err := archive.Decompress(tempDir) + Expect(err).NotTo(HaveOccurred()) + + content, err := os.ReadFile(filepath.Join(tempDir, "some-jar-file")) + Expect(err).NotTo(HaveOccurred()) + Expect(content).To(Equal(header)) + }) + }) + }) + context("failure cases", func() { context("the buffer passed is of are unknown type", func() { var ( diff --git a/vacation/init_test.go b/vacation/init_test.go index 80c3fe81..ba503206 100644 --- a/vacation/init_test.go +++ b/vacation/init_test.go @@ -9,13 +9,13 @@ import ( func TestVacation(t *testing.T) { suite := spec.New("vacation", spec.Report(report.Terminal{})) - suite("VacationArchive", testVacationArchive) - suite("VacationTarBzip2", testVacationTarBzip2) - suite("VacationSymlinkSorting", testVacationSymlinkSorting) - suite("VacationTar", testVacationTar) - suite("VacationTarGzip", testVacationTarGzip) - suite("VacationTarXZ", testVacationTarXZ) - suite("VacationText", testVacationText) - suite("VacationZip", testVacationZip) + suite("Archive", testArchive) + suite("NopArchive", testNopArchive) + suite("SymlinkSorting", testSymlinkSorting) + suite("TarArchive", testTarArchive) + suite("TarBzip2Archive", testTarBzip2Archive) + suite("TarGzipArchive", testTarGzipArchive) + suite("TarXZArchive", testTarXZArchive) + suite("ZipArchive", testZipArchive) suite.Run(t) } diff --git a/vacation/nop_archive.go b/vacation/nop_archive.go new file mode 100644 index 00000000..3e972ee7 --- /dev/null +++ b/vacation/nop_archive.go @@ -0,0 +1,33 @@ +package vacation + +import ( + "io" + "os" +) + +// A NopArchive implements the common archive interface, but acts as a no-op, +// simply copying the reader to the destination. +type NopArchive struct { + reader io.Reader +} + +// NewNopArchive returns a new NopArchive +func NewNopArchive(r io.Reader) NopArchive { + return NopArchive{reader: r} +} + +// Decompress copies the reader contents into the destination specified. +func (na NopArchive) Decompress(destination string) error { + file, err := os.Create(destination) + if err != nil { + return err + } + defer file.Close() + + _, err = io.Copy(file, na.reader) + if err != nil { + return err + } + + return nil +} diff --git a/vacation/nop_archive_test.go b/vacation/nop_archive_test.go new file mode 100644 index 00000000..f9d208eb --- /dev/null +++ b/vacation/nop_archive_test.go @@ -0,0 +1,56 @@ +package vacation_test + +import ( + "bytes" + "os" + "path/filepath" + "testing" + + "github.com/paketo-buildpacks/packit/vacation" + "github.com/sclevine/spec" + + . "github.com/onsi/gomega" +) + +func testNopArchive(t *testing.T, context spec.G, it spec.S) { + var Expect = NewWithT(t).Expect + + context("Decompress", func() { + var ( + archive vacation.NopArchive + tempDir string + ) + + it.Before(func() { + var err error + tempDir, err = os.MkdirTemp("", "vacation") + Expect(err).NotTo(HaveOccurred()) + + buffer := bytes.NewBuffer([]byte(`some contents`)) + + archive = vacation.NewNopArchive(buffer) + }) + + it.After(func() { + Expect(os.RemoveAll(tempDir)).To(Succeed()) + }) + + it("copies the contents of the reader to the destination", func() { + err := archive.Decompress(filepath.Join(tempDir, "some-file")) + Expect(err).NotTo(HaveOccurred()) + + content, err := os.ReadFile(filepath.Join(tempDir, "some-file")) + Expect(err).NotTo(HaveOccurred()) + Expect(content).To(Equal([]byte(`some contents`))) + }) + + context("failure cases", func() { + context("when the destination file cannot be created", func() { + it("returns an error", func() { + err := archive.Decompress("/no/such/path") + Expect(err).To(MatchError(ContainSubstring("no such file or directory"))) + }) + }) + }) + }) +} diff --git a/vacation/vacation_symlink_sorting_test.go b/vacation/symlink_sorting_test.go similarity index 98% rename from vacation/vacation_symlink_sorting_test.go rename to vacation/symlink_sorting_test.go index f31b3904..ca0d6d0a 100644 --- a/vacation/vacation_symlink_sorting_test.go +++ b/vacation/symlink_sorting_test.go @@ -15,7 +15,7 @@ import ( . "github.com/onsi/gomega" ) -func testVacationSymlinkSorting(t *testing.T, context spec.G, it spec.S) { +func testSymlinkSorting(t *testing.T, context spec.G, it spec.S) { var ( Expect = NewWithT(t).Expect ) diff --git a/vacation/tar_archive.go b/vacation/tar_archive.go new file mode 100644 index 00000000..e0a30ca2 --- /dev/null +++ b/vacation/tar_archive.go @@ -0,0 +1,176 @@ +package vacation + +import ( + "archive/tar" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" +) + +// A TarArchive decompresses tar files from an input stream. +type TarArchive struct { + reader io.Reader + components int +} + +// NewTarArchive returns a new TarArchive that reads from inputReader. +func NewTarArchive(inputReader io.Reader) TarArchive { + return TarArchive{reader: inputReader} +} + +// Decompress reads from TarArchive and writes files into the +// destination specified. +func (ta TarArchive) Decompress(destination string) error { + // This map keeps track of what directories have been made already so that we + // only attempt to make them once for a cleaner interaction. This map is + // only necessary in cases where there are no directory headers in the + // tarball, which can be seen in the test around there being no directory + // metadata. + directories := map[string]interface{}{} + + // Struct and slice to collect symlinks and create them after all files have + // been created + type header struct { + name string + linkname string + path string + } + + var symlinkHeaders []header + + tarReader := tar.NewReader(ta.reader) + for { + hdr, err := tarReader.Next() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("failed to read tar response: %s", err) + } + + // Clean the name in the header to prevent './filename' being stripped to + // 'filename' also to skip if the destination it the destination directory + // itself i.e. './' + var name string + if name = filepath.Clean(hdr.Name); name == "." { + continue + } + + err = checkExtractPath(name, destination) + if err != nil { + return err + } + + fileNames := strings.Split(name, "/") + + // Checks to see if file should be written when stripping components + if len(fileNames) <= ta.components { + continue + } + + // Constructs the path that conforms to the stripped components. + path := filepath.Join(append([]string{destination}, fileNames[ta.components:]...)...) + + // This switch case handles all cases for creating the directory structure + // this logic is needed to handle tarballs with no directory headers. + switch hdr.Typeflag { + case tar.TypeDir: + err = os.MkdirAll(path, os.ModePerm) + if err != nil { + return fmt.Errorf("failed to create archived directory: %s", err) + } + + directories[path] = nil + + default: + dir := filepath.Dir(path) + _, ok := directories[dir] + if !ok { + err = os.MkdirAll(dir, os.ModePerm) + if err != nil { + return fmt.Errorf("failed to create archived directory from file path: %s", err) + } + directories[dir] = nil + } + } + + // This switch case handles the creation of files during the untaring process. + switch hdr.Typeflag { + case tar.TypeReg: + file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, hdr.FileInfo().Mode()) + if err != nil { + return fmt.Errorf("failed to create archived file: %s", err) + } + + _, err = io.Copy(file, tarReader) + if err != nil { + return err + } + + err = file.Close() + if err != nil { + return err + } + + case tar.TypeSymlink: + // Collect all of the headers for symlinks so that they can be verified + // after all other files are written + symlinkHeaders = append(symlinkHeaders, header{ + name: hdr.Name, + linkname: hdr.Linkname, + path: path, + }) + } + } + + // Sort the symlinks so that symlinks of symlinks have their base link + // created before they are created. + // + // For example: + // b-sym -> a-sym/x + // a-sym -> z + // c-sym -> d-sym + // d-sym -> z + // + // Will sort to: + // a-sym -> z + // b-sym -> a-sym/x + // d-sym -> z + // c-sym -> d-sym + sort.Slice(symlinkHeaders, func(i, j int) bool { + if filepath.Clean(symlinkHeaders[i].name) == linknameFullPath(symlinkHeaders[j].name, symlinkHeaders[j].linkname) { + return true + } + + if filepath.Clean(symlinkHeaders[j].name) == linknameFullPath(symlinkHeaders[i].name, symlinkHeaders[i].linkname) { + return false + } + + return filepath.Clean(symlinkHeaders[i].name) < linknameFullPath(symlinkHeaders[j].name, symlinkHeaders[j].linkname) + }) + + for _, h := range symlinkHeaders { + // Check to see if the file that will be linked to is valid for symlinking + _, err := filepath.EvalSymlinks(linknameFullPath(h.path, h.linkname)) + if err != nil { + return fmt.Errorf("failed to evaluate symlink %s: %w", h.path, err) + } + + err = os.Symlink(h.linkname, h.path) + if err != nil { + return fmt.Errorf("failed to extract symlink: %s", err) + } + } + + return nil +} + +// StripComponents behaves like the --strip-components flag on tar command +// removing the first n levels from the final decompression destination. +func (ta TarArchive) StripComponents(components int) TarArchive { + ta.components = components + return ta +} diff --git a/vacation/vacation_tar_test.go b/vacation/tar_archive_test.go similarity index 98% rename from vacation/vacation_tar_test.go rename to vacation/tar_archive_test.go index 5d8eaa6e..f30149bd 100644 --- a/vacation/vacation_tar_test.go +++ b/vacation/tar_archive_test.go @@ -14,12 +14,12 @@ import ( . "github.com/onsi/gomega" ) -func testVacationTar(t *testing.T, context spec.G, it spec.S) { +func testTarArchive(t *testing.T, context spec.G, it spec.S) { var ( Expect = NewWithT(t).Expect ) - context("TarArchive.Decompress", func() { + context("Decompress", func() { var ( tempDir string tarArchive vacation.TarArchive diff --git a/vacation/tar_bzip2_archive.go b/vacation/tar_bzip2_archive.go new file mode 100644 index 00000000..cfe54636 --- /dev/null +++ b/vacation/tar_bzip2_archive.go @@ -0,0 +1,30 @@ +package vacation + +import ( + "compress/bzip2" + "io" +) + +// A TarBzip2Archive decompresses bzip2 files from an input stream. +type TarBzip2Archive struct { + reader io.Reader + components int +} + +// NewTarBzip2Archive returns a new Bzip2Archive that reads from inputReader. +func NewTarBzip2Archive(inputReader io.Reader) TarBzip2Archive { + return TarBzip2Archive{reader: inputReader} +} + +// Decompress reads from TarBzip2Archive and writes files into the destination +// specified. +func (tbz TarBzip2Archive) Decompress(destination string) error { + return NewTarArchive(bzip2.NewReader(tbz.reader)).StripComponents(tbz.components).Decompress(destination) +} + +// StripComponents behaves like the --strip-components flag on tar command +// removing the first n levels from the final decompression destination. +func (tbz TarBzip2Archive) StripComponents(components int) TarBzip2Archive { + tbz.components = components + return tbz +} diff --git a/vacation/vacation_tar_bzip2_test.go b/vacation/tar_bzip2_archive_test.go similarity index 97% rename from vacation/vacation_tar_bzip2_test.go rename to vacation/tar_bzip2_archive_test.go index 1d21f2d3..5cf4e3ea 100644 --- a/vacation/vacation_tar_bzip2_test.go +++ b/vacation/tar_bzip2_archive_test.go @@ -15,12 +15,12 @@ import ( . "github.com/onsi/gomega" ) -func testVacationTarBzip2(t *testing.T, context spec.G, it spec.S) { +func testTarBzip2Archive(t *testing.T, context spec.G, it spec.S) { var ( Expect = NewWithT(t).Expect ) - context("TarBzip2Archive.Decompress", func() { + context("Decompress", func() { var ( tempDir string tarBzip2Archive vacation.TarBzip2Archive diff --git a/vacation/tar_gzip_archive.go b/vacation/tar_gzip_archive.go new file mode 100644 index 00000000..f5a756a0 --- /dev/null +++ b/vacation/tar_gzip_archive.go @@ -0,0 +1,36 @@ +package vacation + +import ( + "compress/gzip" + "fmt" + "io" +) + +// A TarGzipArchive decompresses gziped tar files from an input stream. +type TarGzipArchive struct { + reader io.Reader + components int +} + +// NewTarGzipArchive returns a new TarGzipArchive that reads from inputReader. +func NewTarGzipArchive(inputReader io.Reader) TarGzipArchive { + return TarGzipArchive{reader: inputReader} +} + +// Decompress reads from TarGzipArchive and writes files into the destination +// specified. +func (gz TarGzipArchive) Decompress(destination string) error { + gzr, err := gzip.NewReader(gz.reader) + if err != nil { + return fmt.Errorf("failed to create gzip reader: %w", err) + } + + return NewTarArchive(gzr).StripComponents(gz.components).Decompress(destination) +} + +// StripComponents behaves like the --strip-components flag on tar command +// removing the first n levels from the final decompression destination. +func (gz TarGzipArchive) StripComponents(components int) TarGzipArchive { + gz.components = components + return gz +} diff --git a/vacation/vacation_tar_gzip_test.go b/vacation/tar_gzip_archive_test.go similarity index 97% rename from vacation/vacation_tar_gzip_test.go rename to vacation/tar_gzip_archive_test.go index 63d2e135..75503d5a 100644 --- a/vacation/vacation_tar_gzip_test.go +++ b/vacation/tar_gzip_archive_test.go @@ -15,12 +15,12 @@ import ( . "github.com/onsi/gomega" ) -func testVacationTarGzip(t *testing.T, context spec.G, it spec.S) { +func testTarGzipArchive(t *testing.T, context spec.G, it spec.S) { var ( Expect = NewWithT(t).Expect ) - context("TarGzipArchive.Decompress", func() { + context("Decompress", func() { var ( tempDir string tarGzipArchive vacation.TarGzipArchive diff --git a/vacation/tar_xz_archive.go b/vacation/tar_xz_archive.go new file mode 100644 index 00000000..73c308ad --- /dev/null +++ b/vacation/tar_xz_archive.go @@ -0,0 +1,37 @@ +package vacation + +import ( + "fmt" + "io" + + "github.com/ulikunitz/xz" +) + +// A TarXZArchive decompresses xz tar files from an input stream. +type TarXZArchive struct { + reader io.Reader + components int +} + +// NewTarXZArchive returns a new TarXZArchive that reads from inputReader. +func NewTarXZArchive(inputReader io.Reader) TarXZArchive { + return TarXZArchive{reader: inputReader} +} + +// Decompress reads from TarXZArchive and writes files into the destination +// specified. +func (txz TarXZArchive) Decompress(destination string) error { + xzr, err := xz.NewReader(txz.reader) + if err != nil { + return fmt.Errorf("failed to create xz reader: %w", err) + } + + return NewTarArchive(xzr).StripComponents(txz.components).Decompress(destination) +} + +// StripComponents behaves like the --strip-components flag on tar command +// removing the first n levels from the final decompression destination. +func (txz TarXZArchive) StripComponents(components int) TarXZArchive { + txz.components = components + return txz +} diff --git a/vacation/vacation_tar_xz_test.go b/vacation/tar_xz_archive_test.go similarity index 97% rename from vacation/vacation_tar_xz_test.go rename to vacation/tar_xz_archive_test.go index 1d4e3b7a..f9e9ea73 100644 --- a/vacation/vacation_tar_xz_test.go +++ b/vacation/tar_xz_archive_test.go @@ -15,12 +15,12 @@ import ( . "github.com/onsi/gomega" ) -func testVacationTarXZ(t *testing.T, context spec.G, it spec.S) { +func testTarXZArchive(t *testing.T, context spec.G, it spec.S) { var ( Expect = NewWithT(t).Expect ) - context("TarXZArchive.Decompress", func() { + context("Decompress", func() { var ( tempDir string tarXZArchive vacation.TarXZArchive diff --git a/vacation/vacation.go b/vacation/vacation.go index bd08113e..55373106 100644 --- a/vacation/vacation.go +++ b/vacation/vacation.go @@ -3,511 +3,3 @@ // from decompression from either a file or any other byte stream, which is // useful for decompressing files that are being downloaded. package vacation - -import ( - "archive/tar" - "archive/zip" - "bufio" - "compress/bzip2" - "compress/gzip" - "fmt" - "io" - "os" - "path/filepath" - "sort" - "strings" - - "github.com/gabriel-vasile/mimetype" - "github.com/ulikunitz/xz" -) - -// An Archive decompresses tar, gzip, xz, and bzip2 compressed tar, and zip files from -// an input stream. -type Archive struct { - reader io.Reader - components int -} - -// A TarArchive decompresses tar files from an input stream. -type TarArchive struct { - reader io.Reader - components int -} - -// A TarGzipArchive decompresses gziped tar files from an input stream. -type TarGzipArchive struct { - reader io.Reader - components int -} - -// A TarXZArchive decompresses xz tar files from an input stream. -type TarXZArchive struct { - reader io.Reader - components int -} - -// A TarBzip2Archive decompresses bzip2 files from an input stream. -type TarBzip2Archive struct { - reader io.Reader - components int -} - -// NewArchive returns a new Archive that reads from inputReader. -func NewArchive(inputReader io.Reader) Archive { - return Archive{reader: inputReader} -} - -// NewTarArchive returns a new TarArchive that reads from inputReader. -func NewTarArchive(inputReader io.Reader) TarArchive { - return TarArchive{reader: inputReader} -} - -// NewTarGzipArchive returns a new TarGzipArchive that reads from inputReader. -func NewTarGzipArchive(inputReader io.Reader) TarGzipArchive { - return TarGzipArchive{reader: inputReader} -} - -// NewTarXZArchive returns a new TarXZArchive that reads from inputReader. -func NewTarXZArchive(inputReader io.Reader) TarXZArchive { - return TarXZArchive{reader: inputReader} -} - -// NewTarBzip2Archive returns a new Bzip2Archive that reads from inputReader. -func NewTarBzip2Archive(inputReader io.Reader) TarBzip2Archive { - return TarBzip2Archive{reader: inputReader} -} - -// Decompress reads from TarArchive and writes files into the -// destination specified. -func (ta TarArchive) Decompress(destination string) error { - // This map keeps track of what directories have been made already so that we - // only attempt to make them once for a cleaner interaction. This map is - // only necessary in cases where there are no directory headers in the - // tarball, which can be seen in the test around there being no directory - // metadata. - directories := map[string]interface{}{} - - // Struct and slice to collect symlinks and create them after all files have - // been created - type header struct { - name string - linkname string - path string - } - - var symlinkHeaders []header - - tarReader := tar.NewReader(ta.reader) - for { - hdr, err := tarReader.Next() - if err == io.EOF { - break - } - if err != nil { - return fmt.Errorf("failed to read tar response: %s", err) - } - - // Clean the name in the header to prevent './filename' being stripped to - // 'filename' also to skip if the destination it the destination directory - // itself i.e. './' - var name string - if name = filepath.Clean(hdr.Name); name == "." { - continue - } - - err = checkExtractPath(name, destination) - if err != nil { - return err - } - - fileNames := strings.Split(name, "/") - - // Checks to see if file should be written when stripping components - if len(fileNames) <= ta.components { - continue - } - - // Constructs the path that conforms to the stripped components. - path := filepath.Join(append([]string{destination}, fileNames[ta.components:]...)...) - - // This switch case handles all cases for creating the directory structure - // this logic is needed to handle tarballs with no directory headers. - switch hdr.Typeflag { - case tar.TypeDir: - err = os.MkdirAll(path, os.ModePerm) - if err != nil { - return fmt.Errorf("failed to create archived directory: %s", err) - } - - directories[path] = nil - - default: - dir := filepath.Dir(path) - _, ok := directories[dir] - if !ok { - err = os.MkdirAll(dir, os.ModePerm) - if err != nil { - return fmt.Errorf("failed to create archived directory from file path: %s", err) - } - directories[dir] = nil - } - } - - // This switch case handles the creation of files during the untaring process. - switch hdr.Typeflag { - case tar.TypeReg: - file, err := os.OpenFile(path, os.O_RDWR|os.O_CREATE|os.O_TRUNC, hdr.FileInfo().Mode()) - if err != nil { - return fmt.Errorf("failed to create archived file: %s", err) - } - - _, err = io.Copy(file, tarReader) - if err != nil { - return err - } - - err = file.Close() - if err != nil { - return err - } - - case tar.TypeSymlink: - // Collect all of the headers for symlinks so that they can be verified - // after all other files are written - symlinkHeaders = append(symlinkHeaders, header{ - name: hdr.Name, - linkname: hdr.Linkname, - path: path, - }) - } - } - - // Sort the symlinks so that symlinks of symlinks have their base link - // created before they are created. - // - // For example: - // b-sym -> a-sym/x - // a-sym -> z - // c-sym -> d-sym - // d-sym -> z - // - // Will sort to: - // a-sym -> z - // b-sym -> a-sym/x - // d-sym -> z - // c-sym -> d-sym - sort.Slice(symlinkHeaders, func(i, j int) bool { - if filepath.Clean(symlinkHeaders[i].name) == linknameFullPath(symlinkHeaders[j].name, symlinkHeaders[j].linkname) { - return true - } - - if filepath.Clean(symlinkHeaders[j].name) == linknameFullPath(symlinkHeaders[i].name, symlinkHeaders[i].linkname) { - return false - } - - return filepath.Clean(symlinkHeaders[i].name) < linknameFullPath(symlinkHeaders[j].name, symlinkHeaders[j].linkname) - }) - - for _, h := range symlinkHeaders { - // Check to see if the file that will be linked to is valid for symlinking - _, err := filepath.EvalSymlinks(linknameFullPath(h.path, h.linkname)) - if err != nil { - return fmt.Errorf("failed to evaluate symlink %s: %w", h.path, err) - } - - err = os.Symlink(h.linkname, h.path) - if err != nil { - return fmt.Errorf("failed to extract symlink: %s", err) - } - } - - return nil -} - -// Decompress reads from Archive, determines the archive type of the input -// stream, and writes files into the destination specified. -// -// Archive decompression will also handle files that are types "text/plain; -// charset=utf-8" and write the contents of the input stream to a file name -// "artifact" in the destination directory. -func (a Archive) Decompress(destination string) error { - // Convert reader into a buffered read so that the header can be peeked to - // determine the type. - bufferedReader := bufio.NewReader(a.reader) - - // The number 3072 is lifted from the mimetype library and the definition of - // the constant at the time of writing this functionality is listed below. - // https://github.com/gabriel-vasile/mimetype/blob/c64c025a7c2d8d45ba57d3cebb50a1dbedb3ed7e/internal/matchers/matchers.go#L6 - header, err := bufferedReader.Peek(3072) - if err != nil && err != io.EOF { - return err - } - - mime := mimetype.Detect(header) - - // This switch case is reponsible for determining what the decompression - // startegy should be. - switch mime.String() { - case "application/x-tar": - return NewTarArchive(bufferedReader).StripComponents(a.components).Decompress(destination) - case "application/gzip": - return NewTarGzipArchive(bufferedReader).StripComponents(a.components).Decompress(destination) - case "application/x-xz": - return NewTarXZArchive(bufferedReader).StripComponents(a.components).Decompress(destination) - case "application/x-bzip2": - return NewTarBzip2Archive(bufferedReader).StripComponents(a.components).Decompress(destination) - case "application/zip": - return NewZipArchive(bufferedReader).Decompress(destination) - case "text/plain; charset=utf-8": - // This function will write the contents of the reader to file called - // "artifact" in the destination directory - return writeTextFile(bufferedReader, destination) - default: - return fmt.Errorf("unsupported archive type: %s", mime.String()) - } -} - -// Decompress reads from TarGzipArchive and writes files into the destination -// specified. -func (gz TarGzipArchive) Decompress(destination string) error { - gzr, err := gzip.NewReader(gz.reader) - if err != nil { - return fmt.Errorf("failed to create gzip reader: %w", err) - } - - return NewTarArchive(gzr).StripComponents(gz.components).Decompress(destination) -} - -// Decompress reads from TarXZArchive and writes files into the destination -// specified. -func (txz TarXZArchive) Decompress(destination string) error { - xzr, err := xz.NewReader(txz.reader) - if err != nil { - return fmt.Errorf("failed to create xz reader: %w", err) - } - - return NewTarArchive(xzr).StripComponents(txz.components).Decompress(destination) -} - -// Decompress reads from TarBzip2Archive and writes files into the destination -// specified. -func (tbz TarBzip2Archive) Decompress(destination string) error { - return NewTarArchive(bzip2.NewReader(tbz.reader)).StripComponents(tbz.components).Decompress(destination) -} - -func writeTextFile(reader io.Reader, destination string) error { - file, err := os.Create(filepath.Join(destination, "artifact")) - if err != nil { - panic(err) - } - - _, err = io.Copy(file, reader) - if err != nil { - return err - } - - return nil -} - -// StripComponents behaves like the --strip-components flag on tar command -// removing the first n levels from the final decompression destination. -// Setting this is a no-op for archive types that do not use --strip-components -// (such as zip). -func (a Archive) StripComponents(components int) Archive { - a.components = components - return a -} - -// StripComponents behaves like the --strip-components flag on tar command -// removing the first n levels from the final decompression destination. -func (ta TarArchive) StripComponents(components int) TarArchive { - ta.components = components - return ta -} - -// StripComponents behaves like the --strip-components flag on tar command -// removing the first n levels from the final decompression destination. -func (gz TarGzipArchive) StripComponents(components int) TarGzipArchive { - gz.components = components - return gz -} - -// StripComponents behaves like the --strip-components flag on tar command -// removing the first n levels from the final decompression destination. -func (txz TarXZArchive) StripComponents(components int) TarXZArchive { - txz.components = components - return txz -} - -// StripComponents behaves like the --strip-components flag on tar command -// removing the first n levels from the final decompression destination. -func (tbz TarBzip2Archive) StripComponents(components int) TarBzip2Archive { - tbz.components = components - return tbz -} - -// A ZipArchive decompresses zip files from an input stream. -type ZipArchive struct { - reader io.Reader -} - -// NewZipArchive returns a new ZipArchive that reads from inputReader. -func NewZipArchive(inputReader io.Reader) ZipArchive { - return ZipArchive{reader: inputReader} -} - -// Decompress reads from ZipArchive and writes files into the destination -// specified. -func (z ZipArchive) Decompress(destination string) error { - // Struct and slice to collect symlinks and create them after all files have - // been created - type header struct { - name string - linkname string - path string - } - - var symlinkHeaders []header - - // Use an os.File to buffer the zip contents. This is needed because - // zip.NewReader requires an io.ReaderAt so that it can jump around within - // the file as it decompresses. - buffer, err := os.CreateTemp("", "") - if err != nil { - return err - } - defer os.Remove(buffer.Name()) - - size, err := io.Copy(buffer, z.reader) - if err != nil { - return err - } - - zr, err := zip.NewReader(buffer, size) - if err != nil { - return fmt.Errorf("failed to create zip reader: %w", err) - } - - for _, f := range zr.File { - // Clean the name in the header to prevent './filename' being stripped to - // 'filename' also to skip if the destination it the destination directory - // itself i.e. './' - var name string - if name = filepath.Clean(f.Name); name == "." { - continue - } - - err = checkExtractPath(name, destination) - if err != nil { - return err - } - - path := filepath.Join(destination, name) - - switch { - case f.FileInfo().IsDir(): - err = os.MkdirAll(path, os.ModePerm) - if err != nil { - return fmt.Errorf("failed to unzip directory: %w", err) - } - case f.FileInfo().Mode()&os.ModeSymlink != 0: - fd, err := f.Open() - if err != nil { - return err - } - - linkname, err := io.ReadAll(fd) - if err != nil { - return err - } - - // Collect all of the headers for symlinks so that they can be verified - // after all other files are written - symlinkHeaders = append(symlinkHeaders, header{ - name: f.Name, - linkname: string(linkname), - path: path, - }) - - default: - err = os.MkdirAll(filepath.Dir(path), os.ModePerm) - if err != nil { - return fmt.Errorf("failed to unzip directory that was part of file path: %w", err) - } - - dst, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) - if err != nil { - return fmt.Errorf("failed to unzip file: %w", err) - } - defer dst.Close() - - src, err := f.Open() - if err != nil { - return err - } - defer src.Close() - - _, err = io.Copy(dst, src) - if err != nil { - return err - } - } - } - - // Sort the symlinks so that symlinks of symlinks have their base link - // created before they are created. - // - // For example: - // b-sym -> a-sym/x - // a-sym -> z - // c-sym -> d-sym - // d-sym -> z - // - // Will sort to: - // a-sym -> z - // b-sym -> a-sym/x - // d-sym -> z - // c-sym -> d-sym - sort.Slice(symlinkHeaders, func(i, j int) bool { - if filepath.Clean(symlinkHeaders[i].name) == linknameFullPath(symlinkHeaders[j].name, symlinkHeaders[j].linkname) { - return true - } - - if filepath.Clean(symlinkHeaders[j].name) == linknameFullPath(symlinkHeaders[i].name, symlinkHeaders[i].linkname) { - return false - } - - return filepath.Clean(symlinkHeaders[i].name) < linknameFullPath(symlinkHeaders[j].name, symlinkHeaders[j].linkname) - }) - - for _, h := range symlinkHeaders { - // Check to see if the file that will be linked to is valid for symlinking - _, err := filepath.EvalSymlinks(linknameFullPath(h.path, h.linkname)) - if err != nil { - return fmt.Errorf("failed to evaluate symlink %s: %w", h.path, err) - } - - err = os.Symlink(h.linkname, h.path) - if err != nil { - return fmt.Errorf("failed to unzip symlink: %w", err) - } - } - - return nil -} - -// This function checks to see that the given path is within the destination -// directory -func checkExtractPath(tarFilePath string, destination string) error { - osPath := filepath.FromSlash(tarFilePath) - destpath := filepath.Join(destination, osPath) - if !strings.HasPrefix(destpath, filepath.Clean(destination)+string(os.PathSeparator)) { - return fmt.Errorf("illegal file path %q: the file path does not occur within the destination directory", tarFilePath) - } - return nil -} - -// Generates the full path for a symlink from the linkname and the symlink path -func linknameFullPath(path, linkname string) string { - return filepath.Clean(filepath.Join(filepath.Dir(path), linkname)) -} diff --git a/vacation/vacation_text_test.go b/vacation/vacation_text_test.go deleted file mode 100644 index 24496a46..00000000 --- a/vacation/vacation_text_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package vacation_test - -import ( - "bytes" - "os" - "path/filepath" - "testing" - - "github.com/paketo-buildpacks/packit/vacation" - "github.com/sclevine/spec" - - . "github.com/onsi/gomega" -) - -func testVacationText(t *testing.T, context spec.G, it spec.S) { - var ( - Expect = NewWithT(t).Expect - ) - - context("when passed the reader of a text file", func() { - var ( - archive vacation.Archive - tempDir string - ) - - it.Before(func() { - var err error - tempDir, err = os.MkdirTemp("", "vacation") - Expect(err).NotTo(HaveOccurred()) - - buffer := bytes.NewBuffer([]byte(`some contents`)) - - archive = vacation.NewArchive(buffer) - }) - - it.After(func() { - Expect(os.RemoveAll(tempDir)).To(Succeed()) - }) - - it("writes a text file onto the path", func() { - err := archive.Decompress(tempDir) - Expect(err).NotTo(HaveOccurred()) - - content, err := os.ReadFile(filepath.Join(tempDir, "artifact")) - Expect(err).NotTo(HaveOccurred()) - Expect(content).To(Equal([]byte(`some contents`))) - }) - }) -} diff --git a/vacation/zip_archive.go b/vacation/zip_archive.go new file mode 100644 index 00000000..cc504cfb --- /dev/null +++ b/vacation/zip_archive.go @@ -0,0 +1,160 @@ +package vacation + +import ( + "archive/zip" + "fmt" + "io" + "os" + "path/filepath" + "sort" +) + +// A ZipArchive decompresses zip files from an input stream. +type ZipArchive struct { + reader io.Reader +} + +// NewZipArchive returns a new ZipArchive that reads from inputReader. +func NewZipArchive(inputReader io.Reader) ZipArchive { + return ZipArchive{reader: inputReader} +} + +// Decompress reads from ZipArchive and writes files into the destination +// specified. +func (z ZipArchive) Decompress(destination string) error { + // Struct and slice to collect symlinks and create them after all files have + // been created + type header struct { + name string + linkname string + path string + } + + var symlinkHeaders []header + + // Use an os.File to buffer the zip contents. This is needed because + // zip.NewReader requires an io.ReaderAt so that it can jump around within + // the file as it decompresses. + buffer, err := os.CreateTemp("", "") + if err != nil { + return err + } + defer os.Remove(buffer.Name()) + + size, err := io.Copy(buffer, z.reader) + if err != nil { + return err + } + + zr, err := zip.NewReader(buffer, size) + if err != nil { + return fmt.Errorf("failed to create zip reader: %w", err) + } + + for _, f := range zr.File { + // Clean the name in the header to prevent './filename' being stripped to + // 'filename' also to skip if the destination it the destination directory + // itself i.e. './' + var name string + if name = filepath.Clean(f.Name); name == "." { + continue + } + + err = checkExtractPath(name, destination) + if err != nil { + return err + } + + path := filepath.Join(destination, name) + + switch { + case f.FileInfo().IsDir(): + err = os.MkdirAll(path, os.ModePerm) + if err != nil { + return fmt.Errorf("failed to unzip directory: %w", err) + } + case f.FileInfo().Mode()&os.ModeSymlink != 0: + fd, err := f.Open() + if err != nil { + return err + } + + linkname, err := io.ReadAll(fd) + if err != nil { + return err + } + + // Collect all of the headers for symlinks so that they can be verified + // after all other files are written + symlinkHeaders = append(symlinkHeaders, header{ + name: f.Name, + linkname: string(linkname), + path: path, + }) + + default: + err = os.MkdirAll(filepath.Dir(path), os.ModePerm) + if err != nil { + return fmt.Errorf("failed to unzip directory that was part of file path: %w", err) + } + + dst, err := os.OpenFile(path, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) + if err != nil { + return fmt.Errorf("failed to unzip file: %w", err) + } + defer dst.Close() + + src, err := f.Open() + if err != nil { + return err + } + defer src.Close() + + _, err = io.Copy(dst, src) + if err != nil { + return err + } + } + } + + // Sort the symlinks so that symlinks of symlinks have their base link + // created before they are created. + // + // For example: + // b-sym -> a-sym/x + // a-sym -> z + // c-sym -> d-sym + // d-sym -> z + // + // Will sort to: + // a-sym -> z + // b-sym -> a-sym/x + // d-sym -> z + // c-sym -> d-sym + sort.Slice(symlinkHeaders, func(i, j int) bool { + if filepath.Clean(symlinkHeaders[i].name) == linknameFullPath(symlinkHeaders[j].name, symlinkHeaders[j].linkname) { + return true + } + + if filepath.Clean(symlinkHeaders[j].name) == linknameFullPath(symlinkHeaders[i].name, symlinkHeaders[i].linkname) { + return false + } + + return filepath.Clean(symlinkHeaders[i].name) < linknameFullPath(symlinkHeaders[j].name, symlinkHeaders[j].linkname) + }) + + for _, h := range symlinkHeaders { + // Check to see if the file that will be linked to is valid for symlinking + _, err := filepath.EvalSymlinks(linknameFullPath(h.path, h.linkname)) + if err != nil { + return fmt.Errorf("failed to evaluate symlink %s: %w", h.path, err) + } + + err = os.Symlink(h.linkname, h.path) + if err != nil { + return fmt.Errorf("failed to unzip symlink: %w", err) + } + } + + return nil +} diff --git a/vacation/vacation_zip_test.go b/vacation/zip_archive_test.go similarity index 98% rename from vacation/vacation_zip_test.go rename to vacation/zip_archive_test.go index a0718eb6..03a1784b 100644 --- a/vacation/vacation_zip_test.go +++ b/vacation/zip_archive_test.go @@ -14,12 +14,12 @@ import ( . "github.com/onsi/gomega" ) -func testVacationZip(t *testing.T, context spec.G, it spec.S) { +func testZipArchive(t *testing.T, context spec.G, it spec.S) { var ( Expect = NewWithT(t).Expect ) - context("ZipArchive.Decompress", func() { + context("Decompress", func() { var ( tempDir string zipArchive vacation.ZipArchive @@ -248,7 +248,7 @@ func testVacationZip(t *testing.T, context spec.G, it spec.S) { // Create a symlink in the target to force the new symlink create to // fail - Expect(os.WriteFile(filepath.Join(tempDir, "some-file"), nil, 0644)).To(Succeed()) + Expect(os.WriteFile(filepath.Join(tempDir, "some-file"), nil, 0600)).To(Succeed()) Expect(os.Symlink("some-file", filepath.Join(tempDir, "symlink"))).To(Succeed()) }) diff --git a/vacation/zipslip.go b/vacation/zipslip.go new file mode 100644 index 00000000..88448857 --- /dev/null +++ b/vacation/zipslip.go @@ -0,0 +1,24 @@ +package vacation + +import ( + "fmt" + "os" + "path/filepath" + "strings" +) + +// This function checks to see that the given path is within the destination +// directory +func checkExtractPath(tarFilePath string, destination string) error { + osPath := filepath.FromSlash(tarFilePath) + destpath := filepath.Join(destination, osPath) + if !strings.HasPrefix(destpath, filepath.Clean(destination)+string(os.PathSeparator)) { + return fmt.Errorf("illegal file path %q: the file path does not occur within the destination directory", tarFilePath) + } + return nil +} + +// Generates the full path for a symlink from the linkname and the symlink path +func linknameFullPath(path, linkname string) string { + return filepath.Clean(filepath.Join(filepath.Dir(path), linkname)) +}