From f920a042722d261e6197893ebbf9b732a0841a85 Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Mon, 19 Feb 2024 08:04:18 -0700 Subject: [PATCH 1/7] fix: multi-part tarballs being mismatched sizes --- src/pkg/utils/io.go | 28 ++++++++++++++++------------ src/test/e2e/05_tarball_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/pkg/utils/io.go b/src/pkg/utils/io.go index 7bda78ff7f..bb6ae79db2 100755 --- a/src/pkg/utils/io.go +++ b/src/pkg/utils/io.go @@ -339,7 +339,7 @@ func ReadFileByChunks(path string, chunkSizeBytes int) (chunks [][]byte, sha256s // - fileNames: list of file paths srcFile was split across // - sha256sum: sha256sum of the srcFile before splitting // - err: any errors encountered -func SplitFile(srcFile string, chunkSizeBytes int) (err error) { +func SplitFile(srcPath string, chunkSizeBytes int) (err error) { var fileNames []string var sha256sum string hash := sha256.New() @@ -353,7 +353,7 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { buf := make([]byte, bufferSize) // get file size - fi, err := os.Stat(srcFile) + fi, err := os.Stat(srcPath) if err != nil { return err } @@ -364,15 +364,15 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { progressBar := message.NewProgressBar(fileSize, title) defer progressBar.Stop() - // open file - file, err := os.Open(srcFile) - defer file.Close() + // open srcFile + srcFile, err := os.Open(srcPath) if err != nil { return err } + defer srcFile.Close() // create file path starting from part 001 - path := fmt.Sprintf("%s.part001", srcFile) + path := fmt.Sprintf("%s.part001", srcPath) chunkFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err @@ -384,7 +384,7 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { chunkBytesRemaining := chunkSizeBytes // Loop over the tarball hashing as we go and breaking it into chunks based on the chunkSizeBytes for { - bytesRead, err := file.Read(buf) + bytesRead, err := srcFile.Read(buf) if err != nil { if err == io.EOF { @@ -404,10 +404,14 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { if err != nil { return err } + err = chunkFile.Close() + if err != nil { + return err + } // create new file - path = fmt.Sprintf("%s.part%03d", srcFile, len(fileNames)+1) - chunkFile, err := os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) + path = fmt.Sprintf("%s.part%03d", srcPath, len(fileNames)+1) + chunkFile, err = os.OpenFile(path, os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return err } @@ -435,8 +439,8 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { title := fmt.Sprintf("[%d/%d] MB bytes written", progressBar.GetCurrent()/1000/1000, fileSize/1000/1000) progressBar.UpdateTitle(title) } - file.Close() - _ = os.RemoveAll(srcFile) + srcFile.Close() + _ = os.RemoveAll(srcPath) // calculate sha256 sum sha256sum = fmt.Sprintf("%x", hash.Sum(nil)) @@ -452,7 +456,7 @@ func SplitFile(srcFile string, chunkSizeBytes int) (err error) { } // write header file - path = fmt.Sprintf("%s.part000", srcFile) + path = fmt.Sprintf("%s.part000", srcPath) if err := os.WriteFile(path, jsonData, 0644); err != nil { return fmt.Errorf("unable to write the file %s: %w", path, err) } diff --git a/src/test/e2e/05_tarball_test.go b/src/test/e2e/05_tarball_test.go index 1e0e3c8473..caf2cf90ba 100644 --- a/src/test/e2e/05_tarball_test.go +++ b/src/test/e2e/05_tarball_test.go @@ -5,6 +5,7 @@ package test import ( + "encoding/json" "fmt" "os" "path/filepath" @@ -36,6 +37,21 @@ func TestMultiPartPackage(t *testing.T) { require.NoError(t, err) // Length is 7 because there are 6 parts and 1 manifest require.Len(t, parts, 7) + // Check the file sizes are even + part1FileInfo, err := os.Stat(parts[1]) + require.NoError(t, err) + require.Equal(t, int64(1000000), part1FileInfo.Size()) + part2FileInfo, err := os.Stat(parts[2]) + require.NoError(t, err) + require.Equal(t, int64(1000000), part2FileInfo.Size()) + // Check the package data is correct + pkgData := types.ZarfSplitPackageData{} + part0File, err := os.ReadFile(parts[0]) + require.NoError(t, err) + err = json.Unmarshal(part0File, &pkgData) + require.NoError(t, err) + require.Equal(t, pkgData.Count, 6) + fmt.Printf("%#v", pkgData) stdOut, stdErr, err = e2e.Zarf("package", "deploy", deployPath, "--confirm") require.NoError(t, err, stdOut, stdErr) @@ -45,6 +61,17 @@ func TestMultiPartPackage(t *testing.T) { // deploying package combines parts back into single archive, check dir again to find all files parts, err = filepath.Glob("zarf-package-multi-part-*") + require.NoError(t, err) + // Length is 1 because `zarf package deploy` will recombine the file + require.Len(t, parts, 1) + // Ensure that the number of pkgData bytes was correct + fullFileInfo, err := os.Stat(parts[0]) + require.NoError(t, err) + require.Equal(t, pkgData.Bytes, fullFileInfo.Size()) + // Ensure that the pkgData shasum was correct (should be checked during deploy as well, but this is to double check) + err = utils.SHAsMatch(parts[0], pkgData.Sha256Sum) + require.NoError(t, err) + e2e.CleanFiles(parts...) e2e.CleanFiles(outputFile) } From 29cc6e2f349e10b30d9d960cd5e2761346b632e7 Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Mon, 19 Feb 2024 14:07:36 -0700 Subject: [PATCH 2/7] update test to overcome 16MB buffer --- src/test/e2e/05_tarball_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/e2e/05_tarball_test.go b/src/test/e2e/05_tarball_test.go index caf2cf90ba..f5bb5f2020 100644 --- a/src/test/e2e/05_tarball_test.go +++ b/src/test/e2e/05_tarball_test.go @@ -30,27 +30,27 @@ func TestMultiPartPackage(t *testing.T) { e2e.CleanFiles(deployPath, outputFile) // Create the package with a max size of 1MB - stdOut, stdErr, err := e2e.Zarf("package", "create", createPath, "--max-package-size=1", "--confirm") + stdOut, stdErr, err := e2e.Zarf("package", "create", createPath, "--max-package-size=2", "--confirm") require.NoError(t, err, stdOut, stdErr) parts, err := filepath.Glob("zarf-package-multi-part-*") require.NoError(t, err) - // Length is 7 because there are 6 parts and 1 manifest - require.Len(t, parts, 7) + // Length is 4 because there are 3 parts and 1 manifest + require.Len(t, parts, 4) // Check the file sizes are even part1FileInfo, err := os.Stat(parts[1]) require.NoError(t, err) - require.Equal(t, int64(1000000), part1FileInfo.Size()) + require.Equal(t, int64(2000000), part1FileInfo.Size()) part2FileInfo, err := os.Stat(parts[2]) require.NoError(t, err) - require.Equal(t, int64(1000000), part2FileInfo.Size()) + require.Equal(t, int64(2000000), part2FileInfo.Size()) // Check the package data is correct pkgData := types.ZarfSplitPackageData{} part0File, err := os.ReadFile(parts[0]) require.NoError(t, err) err = json.Unmarshal(part0File, &pkgData) require.NoError(t, err) - require.Equal(t, pkgData.Count, 6) + require.Equal(t, pkgData.Count, 3) fmt.Printf("%#v", pkgData) stdOut, stdErr, err = e2e.Zarf("package", "deploy", deployPath, "--confirm") From 9e7e080335588ff662ffe65f5d9ff401898ce294 Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Mon, 19 Feb 2024 15:28:28 -0700 Subject: [PATCH 3/7] dynamically make a 50MiB file (more performant than a download) --- .gitignore | 1 + src/test/e2e/05_tarball_test.go | 8 ++++---- src/test/packages/05-multi-part/zarf.yaml | 9 ++++++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/.gitignore b/.gitignore index 4270dbb1fb..3d96988028 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ *.bak *.key *.crt +*.dat *.run.zstd *.tar *.tar.gz diff --git a/src/test/e2e/05_tarball_test.go b/src/test/e2e/05_tarball_test.go index f5bb5f2020..02715455cd 100644 --- a/src/test/e2e/05_tarball_test.go +++ b/src/test/e2e/05_tarball_test.go @@ -24,13 +24,13 @@ func TestMultiPartPackage(t *testing.T) { var ( createPath = "src/test/packages/05-multi-part" deployPath = fmt.Sprintf("zarf-package-multi-part-%s.tar.zst.part000", e2e.Arch) - outputFile = "multi-part-demo.dat" + outputFile = "devops.stackexchange.com_en_all_2023-05.zim" ) e2e.CleanFiles(deployPath, outputFile) // Create the package with a max size of 1MB - stdOut, stdErr, err := e2e.Zarf("package", "create", createPath, "--max-package-size=2", "--confirm") + stdOut, stdErr, err := e2e.Zarf("package", "create", createPath, "--max-package-size=20", "--confirm") require.NoError(t, err, stdOut, stdErr) parts, err := filepath.Glob("zarf-package-multi-part-*") @@ -40,10 +40,10 @@ func TestMultiPartPackage(t *testing.T) { // Check the file sizes are even part1FileInfo, err := os.Stat(parts[1]) require.NoError(t, err) - require.Equal(t, int64(2000000), part1FileInfo.Size()) + require.Equal(t, int64(20000000), part1FileInfo.Size()) part2FileInfo, err := os.Stat(parts[2]) require.NoError(t, err) - require.Equal(t, int64(2000000), part2FileInfo.Size()) + require.Equal(t, int64(20000000), part2FileInfo.Size()) // Check the package data is correct pkgData := types.ZarfSplitPackageData{} part0File, err := os.ReadFile(parts[0]) diff --git a/src/test/packages/05-multi-part/zarf.yaml b/src/test/packages/05-multi-part/zarf.yaml index 78b47ba606..0fb4a75652 100644 --- a/src/test/packages/05-multi-part/zarf.yaml +++ b/src/test/packages/05-multi-part/zarf.yaml @@ -6,8 +6,11 @@ metadata: components: - name: big-ol-file required: true - description: Single 5 MB file needed to demonstrate a multi-part package + description: Include a 50 MB file needed to demonstrate a multi-part package + actions: + onCreate: + before: + - cmd: dd if=/dev/urandom of=multi-part-demo.dat bs=1048576 count=50 files: - - source: https://zarf-public.s3-us-gov-west-1.amazonaws.com/examples/multi-part-demo.dat - shasum: 22ebd38c2f5e04821c87c924c910be57d2169c292f85b2936d53cae24ebf8055 + - source: multi-part-demo.dat target: multi-part-demo.dat From 3157403d32e658616a36cc9abb792bdd03c3aba1 Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Mon, 19 Feb 2024 15:29:10 -0700 Subject: [PATCH 4/7] dynamically make a 50MiB file (more performant than a download) --- src/test/e2e/05_tarball_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/e2e/05_tarball_test.go b/src/test/e2e/05_tarball_test.go index 02715455cd..22d52d848a 100644 --- a/src/test/e2e/05_tarball_test.go +++ b/src/test/e2e/05_tarball_test.go @@ -24,7 +24,7 @@ func TestMultiPartPackage(t *testing.T) { var ( createPath = "src/test/packages/05-multi-part" deployPath = fmt.Sprintf("zarf-package-multi-part-%s.tar.zst.part000", e2e.Arch) - outputFile = "devops.stackexchange.com_en_all_2023-05.zim" + outputFile = "multi-part-demo.dat" ) e2e.CleanFiles(deployPath, outputFile) From 87794df9d101bde92b01e2d4fa5e3d1aca5ddff8 Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Mon, 19 Feb 2024 15:52:08 -0700 Subject: [PATCH 5/7] close the file for windows file handlers --- src/pkg/packager/sources/split.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pkg/packager/sources/split.go b/src/pkg/packager/sources/split.go index 1aade2eee5..152e28a17b 100644 --- a/src/pkg/packager/sources/split.go +++ b/src/pkg/packager/sources/split.go @@ -85,6 +85,12 @@ func (s *SplitTarballSource) Collect(dir string) (string, error) { if _, err = io.Copy(pkgFile, f); err != nil { return "", fmt.Errorf("unable to copy file %s: %w", file, err) } + + // Close the file when done copying + err = f.Close() + if err != nil { + return "", fmt.Errorf("unable to close file %s: %w", file, err) + } } if err := utils.SHAsMatch(reassembled, pkgData.Sha256Sum); err != nil { From 36b5b8871b96d8731c83a492a07344356f26f8ca Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Tue, 20 Feb 2024 11:08:06 -0700 Subject: [PATCH 6/7] Update src/pkg/packager/sources/split.go Co-authored-by: Austin Abro <37223396+AustinAbro321@users.noreply.github.com> --- src/pkg/packager/sources/split.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pkg/packager/sources/split.go b/src/pkg/packager/sources/split.go index 152e28a17b..34d736fabb 100644 --- a/src/pkg/packager/sources/split.go +++ b/src/pkg/packager/sources/split.go @@ -87,8 +87,7 @@ func (s *SplitTarballSource) Collect(dir string) (string, error) { } // Close the file when done copying - err = f.Close() - if err != nil { + if err := f.Close(); err != nil { return "", fmt.Errorf("unable to close file %s: %w", file, err) } } From 21f1d8af1616a1683e19b3f1d204032f95d0fa78 Mon Sep 17 00:00:00 2001 From: Wayne Starr Date: Tue, 20 Feb 2024 11:08:12 -0700 Subject: [PATCH 7/7] Update src/test/e2e/05_tarball_test.go Co-authored-by: Lucas Rodriguez --- src/test/e2e/05_tarball_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/e2e/05_tarball_test.go b/src/test/e2e/05_tarball_test.go index 22d52d848a..f6df5f7e1b 100644 --- a/src/test/e2e/05_tarball_test.go +++ b/src/test/e2e/05_tarball_test.go @@ -29,7 +29,7 @@ func TestMultiPartPackage(t *testing.T) { e2e.CleanFiles(deployPath, outputFile) - // Create the package with a max size of 1MB + // Create the package with a max size of 20MB stdOut, stdErr, err := e2e.Zarf("package", "create", createPath, "--max-package-size=20", "--confirm") require.NoError(t, err, stdOut, stdErr)