From ba66945126da8d897b7204c2db5ab13315003425 Mon Sep 17 00:00:00 2001 From: Alex Eagle Date: Tue, 3 Oct 2023 13:00:58 -0700 Subject: [PATCH] feat: add a tar toolchain (#468) * feat: add a BSD tar toolchain @thesayyn discovered that it has a feature which should make it a drop-in replacement for pkg_tar including fine-grained file permissions and symlinks: https://man.freebsd.org/cgi/man.cgi?mtree(8) * show example of mtree usage * feat: introduce tar rule * cleanup and get test passing * more cleanup * chore: add support for compress flags * chore: add docs * chore: add docs * feat: implement linux bsdtar toolchain (#566) * chore: improve target naming * WIP: args * feat: generate mtree spec Also allow arbitrary args * refactor: mtree is required * refactor: style nits * fix: support mix of source and generated artifacts * feat: demonstrate strip_prefix * chore: regen docs * fix: make host toolchain a fallback toolchain * fix: include libarchive13.so when installing BSD tar * chore: buildifier * fix: aarch64 cpu constraint * fix(ci): include libarchive13.so when running tar * chore: add libnettle * refactor: inputs mutated less * refactor: remove unneeded substitution arg * refactor: don't advertise unsupported modes * fix: hack enough to make it run on my machine * chore: dynamic libraries included in sh_binary under toolchain * make sh_binary work * refactor: drop arm64 for now * fix toolchain * fix test * chore: improve test naming scheme --------- Co-authored-by: Sahin Yort --- docs/BUILD.bazel | 5 + docs/repositories.md | 19 +++ docs/tar.md | 112 +++++++++++++++ internal_deps.bzl | 3 +- lib/BUILD.bazel | 15 ++ lib/extensions.bzl | 13 ++ lib/private/docs/BUILD.bazel | 13 ++ lib/private/tar.bzl | 131 +++++++++++++++++ lib/private/tar_toolchain.bzl | 263 ++++++++++++++++++++++++++++++++++ lib/repositories.bzl | 27 +++- lib/tar.bzl | 87 +++++++++++ lib/tests/tar/BUILD.bazel | 143 ++++++++++++++++++ lib/tests/tar/asserts.bzl | 29 ++++ lib/tests/tar/src_file | 1 + 14 files changed, 859 insertions(+), 2 deletions(-) create mode 100644 docs/tar.md create mode 100644 lib/private/tar.bzl create mode 100644 lib/private/tar_toolchain.bzl create mode 100644 lib/tar.bzl create mode 100644 lib/tests/tar/BUILD.bazel create mode 100644 lib/tests/tar/asserts.bzl create mode 100644 lib/tests/tar/src_file diff --git a/docs/BUILD.bazel b/docs/BUILD.bazel index fe8d7d201..e3f37fb6f 100644 --- a/docs/BUILD.bazel +++ b/docs/BUILD.bazel @@ -59,6 +59,11 @@ stardoc_with_diff_test( bzl_library_target = "//lib:lists", ) +stardoc_with_diff_test( + name = "tar", + bzl_library_target = "//lib:tar", +) + stardoc_with_diff_test( name = "utils", bzl_library_target = "//lib:utils", diff --git a/docs/repositories.md b/docs/repositories.md index 18714d107..e760b9ad9 100644 --- a/docs/repositories.md +++ b/docs/repositories.md @@ -117,6 +117,25 @@ Registers jq toolchain and repositories | register | whether to call through to native.register_toolchains. Should be True for WORKSPACE users, but false when used under bzlmod extension | True | + + +## register_tar_toolchains + +
+register_tar_toolchains(name, register)
+
+ +Registers bsdtar toolchain and repositories + +**PARAMETERS** + + +| Name | Description | Default Value | +| :------------- | :------------- | :------------- | +| name | override the prefix for the generated toolchain repositories | "bsd_tar" | +| register | whether to call through to native.register_toolchains. Should be True for WORKSPACE users, but false when used under bzlmod extension | True | + + ## register_yq_toolchains diff --git a/docs/tar.md b/docs/tar.md new file mode 100644 index 000000000..e33da2457 --- /dev/null +++ b/docs/tar.md @@ -0,0 +1,112 @@ + + +General-purpose rule to create tar archives. + +Unlike [pkg_tar from rules_pkg](https://github.com/bazelbuild/rules_pkg/blob/main/docs/latest.md#pkg_tar) +this: + +- Does not depend on any Python interpreter setup +- The "manifest" specification is a mature public API and uses a compact tabular format, fixing + https://github.com/bazelbuild/rules_pkg/pull/238 +- Does not have any custom program to produce the output, instead + we rely on a well-known C++ program called "tar". + Specifically, we use the BSD variant of tar since it provides a means + of controlling mtimes, uid, symlinks, etc. + +We also provide full control for tar'ring binaries including their runfiles. + +TODO: +- Ensure we are reproducible, see https://reproducible-builds.org/docs/archives/ +- Provide convenience for rules_pkg users to re-use or replace pkg_files trees + + + + +## mtree_spec + +
+mtree_spec(name, out, srcs)
+
+ +Create an mtree specification to map a directory hierarchy. See https://man.freebsd.org/cgi/man.cgi?mtree(8) + +**ATTRIBUTES** + + +| Name | Description | Type | Mandatory | Default | +| :------------- | :------------- | :------------- | :------------- | :------------- | +| name | A unique name for this target. | Name | required | | +| out | Resulting specification file to write | Label | optional | | +| srcs | Files that are placed into the tar | List of labels | required | | + + + + +## tar_rule + +
+tar_rule(name, args, compress, mode, mtree, out, srcs)
+
+ +Rule that executes BSD `tar`. Most users should use the [`tar`](#tar) macro, rather than load this directly. + +**ATTRIBUTES** + + +| Name | Description | Type | Mandatory | Default | +| :------------- | :------------- | :------------- | :------------- | :------------- | +| name | A unique name for this target. | Name | required | | +| args | Additional flags permitted by BSD tar; see the man page. | List of strings | optional | [] | +| compress | Compress the archive file with a supported algorithm. | String | optional | "" | +| mode | A mode indicator from the following list, copied from the tar manpage:

- create: Create a new archive containing the specified items. - append: Like create, but new entries are appended to the archive. Note that this only works on uncompressed archives stored in regular files. The -f option is required. - list: List archive contents to stdout. - update: Like append, but new entries are added only if they have a modification date newer than the corresponding entry in the archive. Note that this only works on uncompressed archives stored in regular files. The -f option is required. - extract: Extract to disk from the archive. If a file with the same name appears more than once in the archive, each copy will be extracted, with later copies overwriting (replacing) earlier copies. | String | optional | "create" | +| mtree | An mtree specification file | Label | required | | +| out | Resulting tar file to write. If absent, [name].tar is written. | Label | optional | | +| srcs | Files and directories that are placed into the tar | List of labels | required | | + + + + +## tar + +
+tar(name, mtree, kwargs)
+
+ +Wrapper macro around [`tar_rule`](#tar_rule). + +Options for mtree +----------------- + +mtree provides the "specification" or manifest of a tar file. +See https://man.freebsd.org/cgi/man.cgi?mtree(8) +Because BSD tar doesn't have a flag to set modification times to a constant, +we must always supply an mtree input to get reproducible builds. +See https://reproducible-builds.org/docs/archives/ for more explanation. + +1. By default, mtree is "auto" which causes the macro to create an `mtree` rule. + +2. `mtree` may also be supplied as an array literal of lines, e.g. + +``` +mtree =[ + "usr/bin uid=0 gid=0 mode=0755 type=dir", + "usr/bin/ls uid=0 gid=0 mode=0755 time=0 type=file content={}/a".format(package_name()), +], +``` + +For the format of a line, see "There are four types of lines in a specification" on the man page for BSD mtree, +https://man.freebsd.org/cgi/man.cgi?mtree(8) + +3. `mtree` may be a label of a file containing the specification lines. + + +**PARAMETERS** + + +| Name | Description | Default Value | +| :------------- | :------------- | :------------- | +| name | name of resulting tar_rule | none | +| mtree | "auto", or an array of specification lines, or a label of a file that contains the lines. | "auto" | +| kwargs | additional named parameters to pass to tar_rule | none | + + diff --git a/internal_deps.bzl b/internal_deps.bzl index 6e73a1f8f..f87af07ed 100644 --- a/internal_deps.bzl +++ b/internal_deps.bzl @@ -4,7 +4,7 @@ Users should *not* need to install these. If users see a load() statement from these, that's a bug in our distribution. """ -load("//lib:repositories.bzl", "register_coreutils_toolchains", "register_jq_toolchains", "register_yq_toolchains") +load("//lib:repositories.bzl", "register_coreutils_toolchains", "register_jq_toolchains", "register_tar_toolchains", "register_yq_toolchains") load("//lib:utils.bzl", http_archive = "maybe_http_archive") # buildifier: disable=unnamed-macro @@ -73,3 +73,4 @@ def bazel_lib_internal_deps(): register_jq_toolchains() register_yq_toolchains() register_coreutils_toolchains() + register_tar_toolchains() diff --git a/lib/BUILD.bazel b/lib/BUILD.bazel index 3a525cce7..4f29dc34f 100644 --- a/lib/BUILD.bazel +++ b/lib/BUILD.bazel @@ -79,6 +79,10 @@ toolchain_type( name = "expand_template_toolchain_type", ) +toolchain_type( + name = "tar_toolchain_type", +) + bzl_library( name = "docs", srcs = ["docs.bzl"], @@ -125,6 +129,16 @@ bzl_library( deps = ["//lib/private/docs:utils"], ) +bzl_library( + name = "tar", + srcs = ["tar.bzl"], + deps = [ + "//lib/private/docs:tar", + "@bazel_skylib//lib:types", + "@bazel_skylib//rules:write_file", + ], +) + bzl_library( name = "jq", srcs = ["jq.bzl"], @@ -260,6 +274,7 @@ bzl_library( "//lib/private/docs:jq_toolchain", "//lib/private/docs:local_config_platform", "//lib/private/docs:source_toolchains_repo", + "//lib/private/docs:tar_toolchain", "//lib/private/docs:yq_toolchain", ], ) diff --git a/lib/extensions.bzl b/lib/extensions.bzl index 45aac6ace..7a8dd077d 100644 --- a/lib/extensions.bzl +++ b/lib/extensions.bzl @@ -9,6 +9,7 @@ load( "DEFAULT_EXPAND_TEMPLATE_REPOSITORY", "DEFAULT_JQ_REPOSITORY", "DEFAULT_JQ_VERSION", + "DEFAULT_TAR_REPOSITORY", "DEFAULT_YQ_REPOSITORY", "DEFAULT_YQ_VERSION", "register_copy_directory_toolchains", @@ -16,6 +17,7 @@ load( "register_coreutils_toolchains", "register_expand_template_toolchains", "register_jq_toolchains", + "register_tar_toolchains", "register_yq_toolchains", ) load("//lib/private:extension_utils.bzl", "extension_utils") @@ -27,6 +29,7 @@ def _toolchain_extension(mctx): register_jq_toolchains(register = False) register_yq_toolchains(register = False) register_coreutils_toolchains(register = False) + register_tar_toolchains(register = False) register_expand_template_toolchains(register = False) create_host_repo = False @@ -101,6 +104,15 @@ def _toolchains_extension_impl(mctx): toolchain_repos_fn = lambda name, version: register_coreutils_toolchains(name = name, version = version, register = False), ) + extension_utils.toolchain_repos_bfs( + mctx = mctx, + get_tag_fn = lambda tags: tags.tar, + toolchain_name = "tar", + default_repository = DEFAULT_TAR_REPOSITORY, + toolchain_repos_fn = lambda name, version: register_tar_toolchains(name = name, register = False), + get_version_fn = lambda attr: None, + ) + extension_utils.toolchain_repos_bfs( mctx = mctx, get_tag_fn = lambda tags: tags.expand_template, @@ -117,6 +129,7 @@ toolchains = module_extension( "jq": tag_class(attrs = {"name": attr.string(default = DEFAULT_JQ_REPOSITORY), "version": attr.string(default = DEFAULT_JQ_VERSION)}), "yq": tag_class(attrs = {"name": attr.string(default = DEFAULT_YQ_REPOSITORY), "version": attr.string(default = DEFAULT_YQ_VERSION)}), "coreutils": tag_class(attrs = {"name": attr.string(default = DEFAULT_COREUTILS_REPOSITORY), "version": attr.string(default = DEFAULT_COREUTILS_VERSION)}), + "tar": tag_class(attrs = {"name": attr.string(default = DEFAULT_TAR_REPOSITORY)}), "expand_template": tag_class(attrs = {"name": attr.string(default = DEFAULT_EXPAND_TEMPLATE_REPOSITORY)}), }, ) diff --git a/lib/private/docs/BUILD.bazel b/lib/private/docs/BUILD.bazel index cbb505fdd..dfa64f475 100644 --- a/lib/private/docs/BUILD.bazel +++ b/lib/private/docs/BUILD.bazel @@ -121,6 +121,13 @@ bzl_library( ], ) +bzl_library( + name = "tar", + srcs = [ + "//lib/private:tar.bzl", + ], +) + bzl_library( name = "utils", srcs = [ @@ -211,6 +218,12 @@ bzl_library( deps = [":repo_utils"], ) +bzl_library( + name = "tar_toolchain", + srcs = ["//lib/private:tar_toolchain.bzl"], + deps = [":repo_utils"], +) + bzl_library( name = "repo_utils", srcs = ["//lib/private:repo_utils.bzl"], diff --git a/lib/private/tar.bzl b/lib/private/tar.bzl new file mode 100644 index 000000000..bc581e956 --- /dev/null +++ b/lib/private/tar.bzl @@ -0,0 +1,131 @@ +"Implementation of tar rule" +_tar_attrs = { + "args": attr.string_list( + doc = "Additional flags permitted by BSD tar; see the man page.", + ), + "srcs": attr.label_list( + doc = "Files and directories that are placed into the tar", + mandatory = True, + allow_files = True, + ), + "mode": attr.string( + doc = """A mode indicator from the following list, copied from the tar manpage: + + - create: Create a new archive containing the specified items. + - append: Like `create`, but new entries are appended to the archive. + Note that this only works on uncompressed archives stored in regular files. + The -f option is required. + - list: List archive contents to stdout. + - update: Like `append`, but new entries are added only if they have a + modification date newer than the corresponding entry in the archive. + Note that this only works on uncompressed archives stored in + regular files. The -f option is required. + - extract: Extract to disk from the archive. If a file with the same name + appears more than once in the archive, each copy will be extracted, + with later copies overwriting (replacing) earlier copies. + """, + values = ["create"], # TODO: support other modes: ["append", "list", "update", "extract"] + default = "create", + ), + "mtree": attr.label( + doc = "An mtree specification file", + allow_single_file = True, + # Mandatory since it's the only way to set constant timestamps + mandatory = True, + ), + "out": attr.output( + doc = "Resulting tar file to write. If absent, `[name].tar` is written.", + ), + "compress": attr.string( + doc = "Compress the archive file with a supported algorithm.", + values = ["bzip2", "compress", "gzip", "lrzip", "lz4", "lzma", "lzop", "xz", "zstd"], + ), +} + +_mtree_attrs = { + "srcs": attr.label_list(doc = "Files that are placed into the tar", mandatory = True, allow_files = True), + "out": attr.output(doc = "Resulting specification file to write"), +} + +def _add_compress_options(compress, args): + if compress == "bzip2": + args.add("--bzip2") + if compress == "compress": + args.add("--compress") + if compress == "gzip": + args.add("--gzip") + if compress == "lrzip": + args.add("--lrzip") + if compress == "lzma": + args.add("--lzma") + if compress == "lz4": + args.add("--lz4") + if compress == "lzop": + args.add("--lzop") + if compress == "xz": + args.add("--xz") + if compress == "zstd": + args.add("--zstd") + +def _tar_impl(ctx): + bsdtar = ctx.toolchains["@aspect_bazel_lib//lib:tar_toolchain_type"] + inputs = ctx.files.srcs[:] + args = ctx.actions.args() + + # Set mode + args.add("--" + ctx.attr.mode) + + # User-provided args first + args.add_all(ctx.attr.args) + + # Compression args + _add_compress_options(ctx.attr.compress, args) + + out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ".tar") + args.add_all(["--file", out.path]) + + args.add("@" + ctx.file.mtree.path) + inputs.append(ctx.file.mtree) + + ctx.actions.run( + executable = bsdtar.tarinfo.binary, + inputs = depset(direct = inputs, transitive = [bsdtar.default.files]), + outputs = [out], + arguments = [args], + mnemonic = "Tar", + ) + + return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out])) + +def _mtree_line(file, uid = "0", gid = "0", time = "1672560000", mode = "0755"): + return " ".join([ + file.short_path, + "uid=" + uid, + "gid=" + gid, + "time=" + time, + "mode=" + mode, + "type=" + ("dir" if file.is_directory else "file"), + "content=" + file.path, + ]) + +def _mtree_impl(ctx): + specification = [] + out = ctx.outputs.out or ctx.actions.declare_file(ctx.attr.name + ".spec") + for s in ctx.files.srcs: + specification.append(_mtree_line(s)) + ctx.actions.write(out, "\n".join(specification + [""])) + return DefaultInfo(files = depset([out]), runfiles = ctx.runfiles([out])) + +tar_lib = struct( + attrs = _tar_attrs, + implementation = _tar_impl, + mtree_attrs = _mtree_attrs, + mtree_implementation = _mtree_impl, +) + +tar = rule( + doc = "Rule that executes BSD `tar`. Most users should use the [`tar`](#tar) macro, rather than load this directly.", + implementation = tar_lib.implementation, + attrs = tar_lib.attrs, + toolchains = ["@aspect_bazel_lib//lib:tar_toolchain_type"], +) diff --git a/lib/private/tar_toolchain.bzl b/lib/private/tar_toolchain.bzl new file mode 100644 index 000000000..6ab88cf50 --- /dev/null +++ b/lib/private/tar_toolchain.bzl @@ -0,0 +1,263 @@ +"Provide access to a BSD tar" + +load(":repo_utils.bzl", "repo_utils") + +BSDTAR_PLATFORMS = { + "linux_amd64": struct( + compatible_with = [ + "@platforms//os:linux", + "@platforms//cpu:x86_64", + ], + ), + # TODO(alexeagle): download from somewhere + "linux_arm64": struct( + compatible_with = [ + "@platforms//os:linux", + "@platforms//cpu:aarch64", + ], + ), + # TODO(alexeagle): download from libarchive github releases. + "windows_amd64": struct( + release_platform = "win64", + compatible_with = [ + "@platforms//os:windows", + "@platforms//cpu:x86_64", + ], + ), + # WARNING: host toolchain should always come last to make it a fallback toolchain. + "host": struct( + # loaded by the macro + compatible_with = "HOST_CONSTRAINTS", + ), +} + +# note, using Ubuntu Focal packages as they link with older glibc versions. +# Ubuntu Jammy packages will fail on ubuntu 20.02 with +# bsdtar: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.33' not found +# bsdtar: /lib/x86_64-linux-gnu/libc.so.6: version `GLIBC_2.34' not found +# +# TODO: this is only a partial listing of the transitive deps of libarchive-tools +# so we expect a bunch of compress modes are broken, for example. + +LINUX_LIB_DEPS = { + "linux_arm64": [ + ( + "6d18525e248e84b8a4ee39a226fd1195ca9b9d0d5a1c7909ae4f997d46378848", + "http://ports.ubuntu.com/pool/main/n/nettle/libnettle7_3.5.1+really3.5.1-2ubuntu0.2_arm64.deb", + ), + ( + "aa5e31d05a9d6bde8093137bd1c82b5a20a5f470bd5109642014f895c20f323a", + "http://ports.ubuntu.com/pool/main/liba/libarchive/libarchive13_3.4.0-2ubuntu1_arm64.deb", + ), + ( + "6d089f878507b536d8ca51b1ad80a80706a1dd7dbbcce7600800d3f9f98be2ab", + "http://ports.ubuntu.com/pool/main/liba/libarchive/libarchive-tools_3.2.1-2~ubuntu16.04.1_arm64.deb", + ), + ( + "6242892cb032859044ddfcfbe61bac5678a95c585d8fff4525acaf45512e3d39", + "http://ports.ubuntu.com/pool/main/libx/libxml2/libxml2_2.9.10+dfsg-5_arm64.deb", + ), + ( + "6302e309ab002af30ddfa0d68de26c68f7c034ed2f45b1d97a712bff1a03999a", + "http://ports.ubuntu.com/pool/main/i/icu/libicu66_66.1-2ubuntu2_arm64.deb", + ), + ], + "linux_amd64": [ + # https://packages.ubuntu.com/focal/amd64/libarchive-tools/download + ( + "12a19878d34b407e6f4893d3b26b7758a26c5534a066d76184c8b764b2df1652", + "http://security.ubuntu.com/ubuntu/pool/universe/liba/libarchive/libarchive-tools_3.4.0-2ubuntu1.2_amd64.deb", + ), + # https://packages.ubuntu.com/focal/amd64/libarchive13/download + ( + "8ba7507f61bb3ea8da488702ec0badcbfb726d36ea6886e3421ac59082aaf2d1", + "http://security.ubuntu.com/ubuntu/pool/main/liba/libarchive/libarchive13_3.4.0-2ubuntu1.2_amd64.deb", + ), + # https://packages.ubuntu.com/focal/amd64/libnettle7/download + ( + "3496aed83407fde71e0dc5988b28e8fd7f07a2f27fcf3e0f214c7cd86667eecd", + "http://security.ubuntu.com/ubuntu/pool/main/n/nettle/libnettle7_3.5.1+really3.5.1-2ubuntu0.2_amd64.deb", + ), + # https://packages.ubuntu.com/focal/amd64/libxml2/download + ( + "a8cbd10a0d74ff8ec43a7e6c09ad07629f20efea9972799d9ff7f63c4e82bfcf", + "http://security.ubuntu.com/ubuntu/pool/main/libx/libxml2/libxml2_2.9.10+dfsg-5ubuntu0.20.04.6_amd64.deb", + ), + # https://packages.ubuntu.com/focal/amd64/libicu66/download + ( + "00d0de456134668f41bd9ea308a076bc0a6a805180445af8a37209d433f41efe", + "http://security.ubuntu.com/ubuntu/pool/main/i/icu/libicu66_66.1-2ubuntu2.1_amd64.deb", + ), + ], +} + +def _find_usable_system_tar(rctx, tar_name): + tar = rctx.which(tar_name) + if not tar: + fail("tar not found on PATH, and we don't handle this case yet") + + # Run tar --version and see if we are satisfied to use it + tar_version = rctx.execute([tar, "--version"]).stdout.strip() + + # TODO: also check if it's really ancient or compiled without gzip support or something? + # TODO: document how users could fetch the source and compile it themselves + if tar_version.find("bsdtar") >= 0: + return tar + + fail("tar isn't a BSD tar") + +def _bsdtar_binary_repo(rctx): + tar_name = "tar.exe" if repo_utils.is_windows(rctx) else "tar" + build_header = """\ +# @generated by @aspect_bazel_lib//lib/private:tar_toolchain.bzl + +load("@aspect_bazel_lib//lib/private:tar_toolchain.bzl", "tar_toolchain") + +package(default_visibility = ["//visibility:public"]) + +""" + + # On MacOS, the system `tar` binary on the PATH should already work + if rctx.attr.platform == "host": + tar = _find_usable_system_tar(rctx, tar_name) + output = rctx.path(tar_name) + rctx.symlink(tar, output) + rctx.file("BUILD.bazel", build_header + """tar_toolchain(name = "bsdtar_toolchain", binary = "tar")""") + return + + # Other platforms, we have more work to do. + libs_dir = "usr/lib/x86_64-linux-gnu" if rctx.attr.platform.endswith("amd64") else "usr/lib/aarch64-linux-gnu" + + # TODO: windows + + for lib in LINUX_LIB_DEPS[rctx.attr.platform]: + rctx.download_and_extract( + url = lib[1], + type = "deb", + sha256 = lib[0], + ) + rctx.extract("data.tar.xz") + + rctx.file("bsdtar.sh", """#!/usr/bin/env bash +readonly wksp="$(dirname "${{BASH_SOURCE[0]}}")" +LD_LIBRARY_PATH=$wksp/{libs_dir} exec $wksp/usr/bin/bsdtar $@ +""".format(name = rctx.name, libs_dir = libs_dir)) + + rctx.file("BUILD.bazel", build_header + """\ +tar_toolchain( + name = "bsdtar_toolchain", + files = glob(["{libs}/*.so.*"]) + ["usr/bin/bsdtar"], + binary = "bsdtar.sh", + visibility = ["//visibility:public"], +) +""".format(libs = libs_dir, name = rctx.name)) + +bsdtar_binary_repo = repository_rule( + implementation = _bsdtar_binary_repo, + attrs = { + "platform": attr.string(mandatory = True, values = BSDTAR_PLATFORMS.keys()), + }, +) + +TarInfo = provider( + doc = "Provide info for executing BSD tar", + fields = { + "binary": "bsdtar executable", + }, +) + +def _tar_toolchain_impl(ctx): + binary = ctx.executable.binary + + # Make the $(BSDTAR_BIN) variable available in places like genrules. + # See https://docs.bazel.build/versions/main/be/make-variables.html#custom_variables + template_variables = platform_common.TemplateVariableInfo({ + "BSDTAR_BIN": binary.path, + }) + + default_info = DefaultInfo( + files = depset(ctx.files.binary + ctx.files.files), + ) + tarinfo = TarInfo( + binary = binary, + ) + + # Export all the providers inside our ToolchainInfo + # so the resolved_toolchain rule can grab and re-export them. + toolchain_info = platform_common.ToolchainInfo( + tarinfo = tarinfo, + template_variables = template_variables, + default = default_info, + ) + + return [toolchain_info, template_variables, default_info] + +tar_toolchain = rule( + implementation = _tar_toolchain_impl, + attrs = { + "binary": attr.label( + doc = "a command to find on the system path", + allow_files = True, + executable = True, + cfg = "exec", + ), + "files": attr.label_list(allow_files = True), + }, +) + +def _tar_toolchains_repo_impl(rctx): + # Expose a concrete toolchain which is the result of Bazel resolving the toolchain + # for the execution or target platform. + # Workaround for https://github.com/bazelbuild/bazel/issues/14009 + starlark_content = """\ +# @generated by @aspect_bazel_lib//lib/private:tar_toolchain.bzl + +# Forward all the providers +def _resolved_toolchain_impl(ctx): + toolchain_info = ctx.toolchains["@aspect_bazel_lib//lib:tar_toolchain_type"] + return [ + toolchain_info, + toolchain_info.default, + toolchain_info.tarinfo, + toolchain_info.template_variables, + ] + +# Copied from java_toolchain_alias +# https://cs.opensource.google/bazel/bazel/+/master:tools/jdk/java_toolchain_alias.bzl +resolved_toolchain = rule( + implementation = _resolved_toolchain_impl, + toolchains = ["@aspect_bazel_lib//lib:tar_toolchain_type"], + incompatible_use_toolchain_transition = True, +) +""" + rctx.file("defs.bzl", starlark_content) + + build_content = """# @generated by @aspect_bazel_lib//lib/private:tar_toolchain.bzl +load(":defs.bzl", "resolved_toolchain") +load("@local_config_platform//:constraints.bzl", "HOST_CONSTRAINTS") + +resolved_toolchain(name = "resolved_toolchain", visibility = ["//visibility:public"])""" + + for [platform, meta] in BSDTAR_PLATFORMS.items(): + build_content += """ +toolchain( + name = "{platform}_toolchain", + exec_compatible_with = {compatible_with}, + toolchain = "@{user_repository_name}_{platform}//:bsdtar_toolchain", + toolchain_type = "@aspect_bazel_lib//lib:tar_toolchain_type", +) +""".format( + platform = platform, + user_repository_name = rctx.attr.user_repository_name, + compatible_with = meta.compatible_with, + ) + + rctx.file("BUILD.bazel", build_content) + +tar_toolchains_repo = repository_rule( + _tar_toolchains_repo_impl, + doc = """Creates a repository that exposes a tar_toolchain_type target.""", + attrs = { + "user_repository_name": attr.string(doc = "Base name for toolchains repository"), + }, +) diff --git a/lib/repositories.bzl b/lib/repositories.bzl index 9f013f68f..5409a4c56 100644 --- a/lib/repositories.bzl +++ b/lib/repositories.bzl @@ -8,6 +8,7 @@ load("//lib/private:expand_template_toolchain.bzl", "EXPAND_TEMPLATE_PLATFORMS", load("//lib/private:jq_toolchain.bzl", "JQ_PLATFORMS", "jq_host_alias_repo", "jq_platform_repo", "jq_toolchains_repo", _DEFAULT_JQ_VERSION = "DEFAULT_JQ_VERSION") load("//lib/private:local_config_platform.bzl", "local_config_platform") load("//lib/private:source_toolchains_repo.bzl", "source_toolchains_repo") +load("//lib/private:tar_toolchain.bzl", "BSDTAR_PLATFORMS", "bsdtar_binary_repo", "tar_toolchains_repo") load("//lib/private:yq_toolchain.bzl", "YQ_PLATFORMS", "yq_host_alias_repo", "yq_platform_repo", "yq_toolchains_repo", _DEFAULT_YQ_VERSION = "DEFAULT_YQ_VERSION") load("//tools:version.bzl", "VERSION") @@ -45,8 +46,9 @@ def aspect_bazel_lib_dependencies(override_local_config_platform = False): # Always register the expand_template toolchain register_expand_template_toolchains() - # Always register the coreutils toolchain + # Always register the coreutils toolchain and the tar toolchain register_coreutils_toolchains() + register_tar_toolchains() # Re-export the default versions DEFAULT_JQ_VERSION = _DEFAULT_JQ_VERSION @@ -107,6 +109,29 @@ def register_yq_toolchains(name = DEFAULT_YQ_REPOSITORY, version = DEFAULT_YQ_VE user_repository_name = name, ) +DEFAULT_TAR_REPOSITORY = "bsd_tar" + +def register_tar_toolchains(name = DEFAULT_TAR_REPOSITORY, register = True): + """Registers bsdtar toolchain and repositories + + Args: + name: override the prefix for the generated toolchain repositories + register: whether to call through to native.register_toolchains. + Should be True for WORKSPACE users, but false when used under bzlmod extension + """ + for [platform, meta] in BSDTAR_PLATFORMS.items(): + bsdtar_binary_repo( + name = "%s_%s" % (name, platform), + platform = platform, + ) + if register: + native.register_toolchains("@%s_toolchains//:%s_toolchain" % (name, platform)) + + tar_toolchains_repo( + name = "%s_toolchains" % name, + user_repository_name = name, + ) + DEFAULT_COREUTILS_REPOSITORY = "coreutils" def register_coreutils_toolchains(name = DEFAULT_COREUTILS_REPOSITORY, version = DEFAULT_COREUTILS_VERSION, register = True): diff --git a/lib/tar.bzl b/lib/tar.bzl new file mode 100644 index 000000000..3c169f85c --- /dev/null +++ b/lib/tar.bzl @@ -0,0 +1,87 @@ +"""General-purpose rule to create tar archives. + +Unlike [pkg_tar from rules_pkg](https://github.com/bazelbuild/rules_pkg/blob/main/docs/latest.md#pkg_tar) +this: + +- Does not depend on any Python interpreter setup +- The "manifest" specification is a mature public API and uses a compact tabular format, fixing + https://github.com/bazelbuild/rules_pkg/pull/238 +- Does not have any custom program to produce the output, instead + we rely on a well-known C++ program called "tar". + Specifically, we use the BSD variant of tar since it provides a means + of controlling mtimes, uid, symlinks, etc. + +We also provide full control for tar'ring binaries including their runfiles. + +TODO: +- Ensure we are reproducible, see https://reproducible-builds.org/docs/archives/ +- Provide convenience for rules_pkg users to re-use or replace pkg_files trees +""" + +load("@bazel_skylib//lib:types.bzl", "types") +load("@bazel_skylib//rules:write_file.bzl", "write_file") +load("//lib/private:tar.bzl", "tar_lib", _tar = "tar") + +mtree_spec = rule( + doc = "Create an mtree specification to map a directory hierarchy. See https://man.freebsd.org/cgi/man.cgi?mtree(8)", + implementation = tar_lib.mtree_implementation, + attrs = tar_lib.mtree_attrs, +) + +tar_rule = _tar + +def tar(name, mtree = "auto", **kwargs): + """Wrapper macro around [`tar_rule`](#tar_rule). + + Options for mtree + ----------------- + + mtree provides the "specification" or manifest of a tar file. + See https://man.freebsd.org/cgi/man.cgi?mtree(8) + Because BSD tar doesn't have a flag to set modification times to a constant, + we must always supply an mtree input to get reproducible builds. + See https://reproducible-builds.org/docs/archives/ for more explanation. + + 1. By default, mtree is "auto" which causes the macro to create an `mtree` rule. + + 2. `mtree` may also be supplied as an array literal of lines, e.g. + + ``` + mtree =[ + "usr/bin uid=0 gid=0 mode=0755 type=dir", + "usr/bin/ls uid=0 gid=0 mode=0755 time=0 type=file content={}/a".format(package_name()), + ], + ``` + + For the format of a line, see "There are four types of lines in a specification" on the man page for BSD mtree, + https://man.freebsd.org/cgi/man.cgi?mtree(8) + + 3. `mtree` may be a label of a file containing the specification lines. + + Args: + name: name of resulting `tar_rule` + mtree: "auto", or an array of specification lines, or a label of a file that contains the lines. + **kwargs: additional named parameters to pass to `tar_rule` + """ + mtree_target = "_{}.mtree".format(name) + if mtree == "auto": + mtree_spec( + name = mtree_target, + srcs = kwargs["srcs"], + out = "{}.txt".format(mtree_target), + ) + elif types.is_list(mtree): + write_file( + name = mtree_target, + out = "{}.txt".format(mtree_target), + # Ensure there's a trailing newline, as bsdtar will ignore a last line without one + content = mtree + [""], + ) + else: + mtree_target = mtree + + tar_rule( + name = name, + mtree = mtree_target, + **kwargs + ) diff --git a/lib/tests/tar/BUILD.bazel b/lib/tests/tar/BUILD.bazel new file mode 100644 index 000000000..3dd3da855 --- /dev/null +++ b/lib/tests/tar/BUILD.bazel @@ -0,0 +1,143 @@ +load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar") +load("@aspect_bazel_lib//lib:testing.bzl", "assert_archive_contains") +load("@bazel_skylib//rules:write_file.bzl", "write_file") +load(":asserts.bzl", "assert_tar_listing") + +write_file( + name = "fixture1", + out = "a", + content = ["hello a"], +) + +# Case 1: Show that you can run any `tar` command you like, using a genrule. +genrule( + name = "tar_genrule", + srcs = [ + ":fixture1", + "src_file", + ], + outs = ["1.tar"], + cmd = "$(BSDTAR_BIN) --create --dereference --file $@ -s '#$(BINDIR)##' $(execpath :fixture1) $(execpath src_file)", + toolchains = ["@bsd_tar_toolchains//:resolved_toolchain"], +) + +assert_archive_contains( + name = "test_genrule", + archive = "1.tar", + expected = [ + "lib/tests/tar/a", + "lib/tests/tar/src_file", + ], +) + +# Case 2: demonstrate using a custom mtree formatted specification. +# Copied from the output of `man tar`: +# An input file in mtree(5) format can be used to create an output +# archive with arbitrary ownership, permissions, or names that differ +# from existing data on disk: +# $ cat input.mtree +# #mtree +# usr/bin uid=0 gid=0 mode=0755 type=dir +# usr/bin/ls uid=0 gid=0 mode=0755 type=file content=myls +# $ tar -cvf output.tar @input.mtree +tar( + name = "tar_custom_mtree", + srcs = ["src_file"], + mtree = [ + "usr/bin uid=0 gid=0 mode=0755 time=1672560000 type=dir", + "usr/bin/ls uid=0 gid=0 mode=0755 time=1672560000 type=file content={}/src_file".format(package_name()), + ], +) + +assert_tar_listing( + name = "test_custom_mtree", + actual = "tar_custom_mtree", + expected = [ + "drwxr-xr-x 0 0 0 0 Jan 1 2023 usr/bin/", + "-rwxr-xr-x 0 0 0 21 Jan 1 2023 usr/bin/ls", + ], +) + +# Case 3: compression +tar( + name = "tar_compress", + srcs = ["a"], + out = "3.tgz", + compress = "gzip", +) + +assert_archive_contains( + name = "test_compress", + archive = "3.tgz", + expected = ["lib/tests/tar/a"], + type = "tar", +) + +# Case 4: permit arbitrary flags +write_file( + name = "fixture4", + out = ".git", + content = ["it's a folder"], +) + +tar( + name = "tar_flags", + srcs = [ + ".git", + "a", + "src_file", + ], + out = "4.tar", + # Due to this argument, .git should not appear in the resulting tar + args = ["--exclude-vcs"], +) + +assert_tar_listing( + name = "test_flags", + actual = "tar_flags", + expected = [ + "-rwxr-xr-x 0 0 0 7 Jan 1 2023 lib/tests/tar/a", + "-rwxr-xr-x 0 0 0 21 Jan 1 2023 lib/tests/tar/src_file", + ], +) + +# Case 5: strip_prefix +_SRCS5 = [ + ":fixture1", + "src_file", +] + +mtree_spec( + name = "mtree5", + srcs = _SRCS5, +) + +# This is a very simple way to mutate the mtree specification, just using regex. +# In theory, this can be used for arbitrary replacements like using mode=644 or something, +# but we'll probably have to add a richer API to the mtree_spec rule to make this more ergonomic. +genrule( + name = "strip_prefix", + srcs = ["mtree5"], + outs = ["mtree5.stripped"], + # Modify lines starting with the package name, e.g. + # lib/tests/tar/a uid=0 gid=0 time=1672560000 mode=0755 type=file content=bazel-out/darwin_arm64-opt/bin/lib/tests/tar/a + # -> + # a uid=0 gid=0 time=1672560000 mode=0755 type=file content=bazel-out/darwin_arm64-opt/bin/lib/tests/tar/a + cmd = "sed s#^{}/## <$< >$@".format(package_name()), +) + +tar( + name = "tar_strip_prefix", + srcs = _SRCS5, + out = "5.tar", + mtree = "strip_prefix", +) + +assert_tar_listing( + name = "test_strip_prefix", + actual = "tar_strip_prefix", + expected = [ + "-rwxr-xr-x 0 0 0 7 Jan 1 2023 a", + "-rwxr-xr-x 0 0 0 21 Jan 1 2023 src_file", + ], +) diff --git a/lib/tests/tar/asserts.bzl b/lib/tests/tar/asserts.bzl new file mode 100644 index 000000000..c0d9e6fff --- /dev/null +++ b/lib/tests/tar/asserts.bzl @@ -0,0 +1,29 @@ +"Make shorter assertions" + +load("@bazel_skylib//rules:diff_test.bzl", "diff_test") +load("@bazel_skylib//rules:write_file.bzl", "write_file") + +# buildifier: disable=function-docstring +def assert_tar_listing(name, actual, expected): + actual_listing = "_{}_listing".format(name) + expected_listing = "_{}_expected".format(name) + + native.genrule( + name = actual_listing, + srcs = [actual], + outs = ["_{}.listing".format(name)], + cmd = "$(BSDTAR_BIN) -tvf $(execpath {}) >$@".format(actual), + toolchains = ["@bsd_tar_toolchains//:resolved_toolchain"], + ) + + write_file( + name = expected_listing, + out = "_{}.expected".format(name), + content = expected + [""], + ) + + diff_test( + name = name, + file1 = actual_listing, + file2 = expected_listing, + ) diff --git a/lib/tests/tar/src_file b/lib/tests/tar/src_file new file mode 100644 index 000000000..9f0bde3ad --- /dev/null +++ b/lib/tests/tar/src_file @@ -0,0 +1 @@ +contents of src_file