From 4905c3351a88b4da83ce562769d9128391e7ff1b Mon Sep 17 00:00:00 2001 From: Vladimir Palevich Date: Wed, 13 Mar 2024 16:36:35 +0100 Subject: [PATCH 1/3] use host module cache and verify it --- cmd/fetch_repo/module.go | 19 ++- internal/go_repository.bzl | 4 +- internal/go_repository_cache.bzl | 6 + repository.md | 4 +- vendor/golang.org/x/mod/sumdb/dirhash/hash.go | 135 ++++++++++++++++++ vendor/modules.txt | 1 + 6 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 vendor/golang.org/x/mod/sumdb/dirhash/hash.go diff --git a/cmd/fetch_repo/module.go b/cmd/fetch_repo/module.go index 123db3610..3ff2fedc9 100644 --- a/cmd/fetch_repo/module.go +++ b/cmd/fetch_repo/module.go @@ -27,6 +27,8 @@ import ( "path/filepath" "runtime" "strings" + + "golang.org/x/mod/sumdb/dirhash" ) func fetchModule(dest, importpath, version, sum string) error { @@ -114,7 +116,22 @@ func fetchModule(dest, importpath, version, sum string) error { } // Copy the module to the destination. - return copyTree(dest, dl.Dir) + err = copyTree(dest, dl.Dir) + if err != nil { + return fmt.Errorf("failed copying repo: %w", err) + } + + // Verify sum + repoSum, err := dirhash.HashDir(dest, importpath+"@"+version, dirhash.Hash1) + if err != nil { + return fmt.Errorf("failed computing sum: %w", err) + } + + if repoSum != sum { + return fmt.Errorf("resulting module with sum %s; expected sum %s", repoSum, sum) + } + + return nil } func copyTree(destRoot, srcRoot string) error { diff --git a/internal/go_repository.bzl b/internal/go_repository.bzl index 3d0b41a67..82754265a 100644 --- a/internal/go_repository.bzl +++ b/internal/go_repository.bzl @@ -25,7 +25,9 @@ When `go_repository` is in module mode, it saves downloaded modules in a shared, internal cache within Bazel's cache. It may be cleared with `bazel clean --expunge`. By setting the environment variable `GO_REPOSITORY_USE_HOST_CACHE=1`, you can force `go_repository` to use the module cache on the host system in the location -returned by `go env GOPATH`. +returned by `go env GOPATH`. Alternatively, by setting the environment variable +`GO_REPOSITORY_USE_HOST_CACHE=1`, you can force `go_repository` to use only +the module cache on the host system in the location returned by `go env GOMODCACHE`. **Example** diff --git a/internal/go_repository_cache.bzl b/internal/go_repository_cache.bzl index 5f01ccce3..cb3368537 100644 --- a/internal/go_repository_cache.bzl +++ b/internal/go_repository_cache.bzl @@ -39,6 +39,12 @@ def _go_repository_cache_impl(ctx): go_path = str(ctx.path(".")) go_cache = str(ctx.path("gocache")) go_mod_cache = "" + if ctx.os.environ.get("GO_REPOSITORY_USE_HOST_MODCACHE", "") == "1": + extension = executable_extension(ctx) + go_tool = go_root + "/bin/go" + extension + go_mod_cache = read_go_env(ctx, go_tool, "GOMODCACHE") + if not go_mod_cache: + fail("GOMODCACHE must be set when GO_REPOSITORY_USE_HOST_MODCACHE is enabled.") if ctx.os.environ.get("GO_REPOSITORY_USE_HOST_CACHE", "") == "1": extension = executable_extension(ctx) go_tool = go_root + "/bin/go" + extension diff --git a/repository.md b/repository.md index ea7ea2175..31e116555 100644 --- a/repository.md +++ b/repository.md @@ -115,7 +115,9 @@ When `go_repository` is in module mode, it saves downloaded modules in a shared, internal cache within Bazel's cache. It may be cleared with `bazel clean --expunge`. By setting the environment variable `GO_REPOSITORY_USE_HOST_CACHE=1`, you can force `go_repository` to use the module cache on the host system in the location -returned by `go env GOPATH`. +returned by `go env GOPATH`. Alternatively, by setting the environment variable +`GO_REPOSITORY_USE_HOST_CACHE=1`, you can force `go_repository` to use only +the module cache on the host system in the location returned by `go env GOMODCACHE`. **Example** diff --git a/vendor/golang.org/x/mod/sumdb/dirhash/hash.go b/vendor/golang.org/x/mod/sumdb/dirhash/hash.go new file mode 100644 index 000000000..51ec4db87 --- /dev/null +++ b/vendor/golang.org/x/mod/sumdb/dirhash/hash.go @@ -0,0 +1,135 @@ +// Copyright 2018 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package dirhash defines hashes over directory trees. +// These hashes are recorded in go.sum files and in the Go checksum database, +// to allow verifying that a newly-downloaded module has the expected content. +package dirhash + +import ( + "archive/zip" + "crypto/sha256" + "encoding/base64" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" +) + +// DefaultHash is the default hash function used in new go.sum entries. +var DefaultHash Hash = Hash1 + +// A Hash is a directory hash function. +// It accepts a list of files along with a function that opens the content of each file. +// It opens, reads, hashes, and closes each file and returns the overall directory hash. +type Hash func(files []string, open func(string) (io.ReadCloser, error)) (string, error) + +// Hash1 is the "h1:" directory hash function, using SHA-256. +// +// Hash1 is "h1:" followed by the base64-encoded SHA-256 hash of a summary +// prepared as if by the Unix command: +// +// sha256sum $(find . -type f | sort) | sha256sum +// +// More precisely, the hashed summary contains a single line for each file in the list, +// ordered by sort.Strings applied to the file names, where each line consists of +// the hexadecimal SHA-256 hash of the file content, +// two spaces (U+0020), the file name, and a newline (U+000A). +// +// File names with newlines (U+000A) are disallowed. +func Hash1(files []string, open func(string) (io.ReadCloser, error)) (string, error) { + h := sha256.New() + files = append([]string(nil), files...) + sort.Strings(files) + for _, file := range files { + if strings.Contains(file, "\n") { + return "", errors.New("dirhash: filenames with newlines are not supported") + } + r, err := open(file) + if err != nil { + return "", err + } + hf := sha256.New() + _, err = io.Copy(hf, r) + r.Close() + if err != nil { + return "", err + } + fmt.Fprintf(h, "%x %s\n", hf.Sum(nil), file) + } + return "h1:" + base64.StdEncoding.EncodeToString(h.Sum(nil)), nil +} + +// HashDir returns the hash of the local file system directory dir, +// replacing the directory name itself with prefix in the file names +// used in the hash function. +func HashDir(dir, prefix string, hash Hash) (string, error) { + files, err := DirFiles(dir, prefix) + if err != nil { + return "", err + } + osOpen := func(name string) (io.ReadCloser, error) { + return os.Open(filepath.Join(dir, strings.TrimPrefix(name, prefix))) + } + return hash(files, osOpen) +} + +// DirFiles returns the list of files in the tree rooted at dir, +// replacing the directory name dir with prefix in each name. +// The resulting names always use forward slashes. +func DirFiles(dir, prefix string) ([]string, error) { + var files []string + dir = filepath.Clean(dir) + err := filepath.Walk(dir, func(file string, info os.FileInfo, err error) error { + if err != nil { + return err + } + if info.IsDir() { + return nil + } else if file == dir { + return fmt.Errorf("%s is not a directory", dir) + } + + rel := file + if dir != "." { + rel = file[len(dir)+1:] + } + f := filepath.Join(prefix, rel) + files = append(files, filepath.ToSlash(f)) + return nil + }) + if err != nil { + return nil, err + } + return files, nil +} + +// HashZip returns the hash of the file content in the named zip file. +// Only the file names and their contents are included in the hash: +// the exact zip file format encoding, compression method, +// per-file modification times, and other metadata are ignored. +func HashZip(zipfile string, hash Hash) (string, error) { + z, err := zip.OpenReader(zipfile) + if err != nil { + return "", err + } + defer z.Close() + var files []string + zfiles := make(map[string]*zip.File) + for _, file := range z.File { + files = append(files, file.Name) + zfiles[file.Name] = file + } + zipOpen := func(name string) (io.ReadCloser, error) { + f := zfiles[name] + if f == nil { + return nil, fmt.Errorf("file %q not found in zip", name) // should never happen + } + return f.Open() + } + return hash(files, zipOpen) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 55409795f..a38a8c202 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -29,6 +29,7 @@ golang.org/x/mod/internal/lazyregexp golang.org/x/mod/modfile golang.org/x/mod/module golang.org/x/mod/semver +golang.org/x/mod/sumdb/dirhash # golang.org/x/sync v0.5.0 ## explicit; go 1.18 golang.org/x/sync/errgroup From d1577854bd9e415dac7202f5c6cd14a5d752a95f Mon Sep 17 00:00:00 2001 From: Vladimir Palevich Date: Wed, 13 Mar 2024 16:49:53 +0100 Subject: [PATCH 2/3] fix BUILD file --- cmd/fetch_repo/BUILD.bazel | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/fetch_repo/BUILD.bazel b/cmd/fetch_repo/BUILD.bazel index be0864aff..c488904c4 100644 --- a/cmd/fetch_repo/BUILD.bazel +++ b/cmd/fetch_repo/BUILD.bazel @@ -9,7 +9,10 @@ go_library( ], importpath = "github.com/bazelbuild/bazel-gazelle/cmd/fetch_repo", visibility = ["//visibility:private"], - deps = ["@org_golang_x_tools_go_vcs//:vcs"], + deps = [ + "@org_golang_x_mod//sumdb/dirhash", + "@org_golang_x_tools_go_vcs//:vcs", + ], ) go_binary( From b9ad8ede4bcbbb980a23bff0123f5f6f3a316648 Mon Sep 17 00:00:00 2001 From: scaiper Date: Wed, 13 Mar 2024 17:05:24 +0100 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Fabian Meumertzheim --- internal/go_repository.bzl | 2 +- repository.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/go_repository.bzl b/internal/go_repository.bzl index 82754265a..d7e74f90e 100644 --- a/internal/go_repository.bzl +++ b/internal/go_repository.bzl @@ -26,7 +26,7 @@ internal cache within Bazel's cache. It may be cleared with `bazel clean --expun By setting the environment variable `GO_REPOSITORY_USE_HOST_CACHE=1`, you can force `go_repository` to use the module cache on the host system in the location returned by `go env GOPATH`. Alternatively, by setting the environment variable -`GO_REPOSITORY_USE_HOST_CACHE=1`, you can force `go_repository` to use only +`GO_REPOSITORY_USE_HOST_MODCACHE=1`, you can force `go_repository` to use only the module cache on the host system in the location returned by `go env GOMODCACHE`. **Example** diff --git a/repository.md b/repository.md index 31e116555..b5c2eb926 100644 --- a/repository.md +++ b/repository.md @@ -116,7 +116,7 @@ internal cache within Bazel's cache. It may be cleared with `bazel clean --expun By setting the environment variable `GO_REPOSITORY_USE_HOST_CACHE=1`, you can force `go_repository` to use the module cache on the host system in the location returned by `go env GOPATH`. Alternatively, by setting the environment variable -`GO_REPOSITORY_USE_HOST_CACHE=1`, you can force `go_repository` to use only +`GO_REPOSITORY_USE_HOST_MODCACHE=1`, you can force `go_repository` to use only the module cache on the host system in the location returned by `go env GOMODCACHE`. **Example**