From 0ec7a314ed1dfd91cc0b01d0424d2d5838cf524e Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Mon, 27 Jul 2020 11:25:03 -0700 Subject: [PATCH 1/4] Infers PKG_VERSION from GitHub releases If no PKG_VERSION is set, then assume we're trying to build from the very latest. --- scripts/build-debianpackage | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/scripts/build-debianpackage b/scripts/build-debianpackage index 333e5412..b1a5fef3 100755 --- a/scripts/build-debianpackage +++ b/scripts/build-debianpackage @@ -27,6 +27,8 @@ TOP_BUILDDIR="$HOME/debbuild/packaging" mkdir -p "$TOP_BUILDDIR" rm -rf "${TOP_BUILDDIR:?}/${PKG_NAME}" mkdir -p "${TOP_BUILDDIR}/${PKG_NAME}" +# Move changelog into place (we have separate changelogs for each platform) +PLATFORM="$(lsb_release -sc)" # Validate required args. if [[ -z "${PKG_NAME:-}" ]]; then @@ -34,9 +36,23 @@ if [[ -z "${PKG_NAME:-}" ]]; then exit 1 fi + +function find_latest_version() { + repo_url="https://github.com/freedomofpress/${PKG_NAME}/releases" + curl -s "$repo_url" \ + | perl -nE '$_ =~ m#/releases/tag/(v?[\d\.]+)\"# and say $1' \ + | head -n 1 +} + if [[ -z "${PKG_VERSION:-}" ]]; then - echo "Set PKG_VERSION of the build"; - exit 1 + echo "PKG_VERSION not set, inferring from recent releases..." + PKG_VERSION="$(find_latest_version)" + if [[ -z "$PKG_VERSION" ]]; then + echo "Failed to infer version" + exit 1 + else + echo "Using PKG_VERSION: $PKG_VERSION" + fi fi # Copy over the debian directory (including new changelog) from repo @@ -80,8 +96,6 @@ fi printf "Building package '%s' from version '%s'...\\n" "$PKG_NAME" "$PKG_VERSION" -# Move changelog into place (we have separate changelogs for each platform) -PLATFORM="$(lsb_release -sc)" echo "$TOP_BUILDDIR/$PKG_NAME/" mv "$TOP_BUILDDIR/$PKG_NAME/debian/changelog-$PLATFORM" "$TOP_BUILDDIR/$PKG_NAME/debian/changelog" From c26cde770bef5f57716ce3c9b62badba6799f58c Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Mon, 27 Jul 2020 15:38:54 -0700 Subject: [PATCH 2/4] Infers PKG_PATH by building tarball from source If no PKG_PATH is set, then let's build it from source, using the upsream package repository. For Python projects, this amounts to: 1. Cloning the repo 2. Verifying the tag for a specific version 3. Checking out that tag 4. Running 'python setup.py sdist' to build tarball Once that's done, we can pass that tarball to the Debian package build logic. The tarball is not byte-for-byte identical after multiple builds, due to metadata discrepancies such as timestamps, but Debian package build logic *is* reproducible, given support of the SOURCE_DATE_EPOCH. So, even when using a newly built tarball with slightly different timestamps, rebuilding the same package will yield an identical checksum. --- scripts/build-debianpackage | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/scripts/build-debianpackage b/scripts/build-debianpackage index b1a5fef3..c5723f64 100755 --- a/scripts/build-debianpackage +++ b/scripts/build-debianpackage @@ -58,14 +58,26 @@ fi # Copy over the debian directory (including new changelog) from repo cp -r "$CUR_DIR/$PKG_NAME/" "$TOP_BUILDDIR/" +function build_source_tarball() { + repo_url="https://github.com/freedomofpress/${PKG_NAME}" + build_dir="/tmp/${PKG_NAME}" + rm -rf "$build_dir" + git clone "$repo_url" "$build_dir" + git -C "$build_dir" tag --verify "$PKG_VERSION" 1>&2 + git -C "$build_dir" checkout "$PKG_VERSION" 1>&2 + (cd "$build_dir" && python setup.py sdist 1>&2) + find "${build_dir}/dist/" | grep -P '\.tar.gz$' | head -n1 +} + # If the package is contained in the list, it should be a python package. In # that case, we should extract tarball, and validate wheel hashes. if [[ "${PKG_NAME}" =~ ^(securedrop-client|securedrop-proxy|securedrop-export|securedrop-log)$ ]]; then echo "${PKG_NAME} is a Python package" if [[ -z "${PKG_PATH:-}" ]]; then - # Try to find tarball in a reasonable location - candidate_pkg_path="$(realpath "${CUR_DIR}/../${PKG_NAME}/dist/${PKG_NAME}-${PKG_VERSION}.tar.gz")" + # Build from source + echo "PKG_PATH not set, building from source (version $PKG_VERSION)..." + candidate_pkg_path="$(build_source_tarball)" if [[ -f "$candidate_pkg_path" ]]; then PKG_PATH="$candidate_pkg_path" echo "Found tarball at $PKG_PATH, override with PKG_PATH..." From 8e9616a5cfd2aa9d8443d5fde4a29c5dc00c9c6a Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Tue, 4 Aug 2020 16:07:43 -0700 Subject: [PATCH 3/4] Ensures source tarballs are reproducible When building tarballs dynamically, let's take the time to ensure that they're fully reproducible. We still run 'python setup.py sdist', but since that tool doesn't (yet) support SOURCE_DATE_EPOCH, we'll manually repack the archive with native tar & gzip, forcing predictable timestamps from the git info, resulting in a deterministic build. --- scripts/build-debianpackage | 43 ++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/scripts/build-debianpackage b/scripts/build-debianpackage index c5723f64..b9f4bcd7 100755 --- a/scripts/build-debianpackage +++ b/scripts/build-debianpackage @@ -37,6 +37,7 @@ if [[ -z "${PKG_NAME:-}" ]]; then fi +# Look up most recent release from GitHub repo function find_latest_version() { repo_url="https://github.com/freedomofpress/${PKG_NAME}/releases" curl -s "$repo_url" \ @@ -58,15 +59,46 @@ fi # Copy over the debian directory (including new changelog) from repo cp -r "$CUR_DIR/$PKG_NAME/" "$TOP_BUILDDIR/" +# Ensures that a given git tag is signed with the prod release key +# If "rc" is in the tag name, this will fail. +function verify_git_tag() { + local d + local t + d="$1" + t="$2" + prod_fingerprint="22245C81E3BAEB4138B36061310F561200F4AD77" + git -C "$build_dir" tag --verify "$PKG_VERSION" 2>&1 \ + | grep -q -F "using RSA key $prod_fingerprint" +} + +# Dynamically generate a tarball, from the Python source code, +# that is byte-for-byte reproducible. Infers timestamp +# from the changelog, same as for the deb package. function build_source_tarball() { repo_url="https://github.com/freedomofpress/${PKG_NAME}" build_dir="/tmp/${PKG_NAME}" rm -rf "$build_dir" git clone "$repo_url" "$build_dir" - git -C "$build_dir" tag --verify "$PKG_VERSION" 1>&2 - git -C "$build_dir" checkout "$PKG_VERSION" 1>&2 - (cd "$build_dir" && python setup.py sdist 1>&2) - find "${build_dir}/dist/" | grep -P '\.tar.gz$' | head -n1 + + # Verify tag, using only the prod key + verify_git_tag "$build_dir" "$PKG_VERSION" + + # Tag is verified, proceed with checkout + git -C "$build_dir" checkout "$PKG_VERSION" + (cd "$build_dir" && LC_ALL="C.UTF-8" python setup.py sdist) + + # Initial tarball will contain timestamps from NOW, let's repack + # with timestamps from the changelog, which is static. + raw_tarball="$(find "${build_dir}/dist/" | grep -P '\.tar.gz$' | head -n1)" + dch_time="$(date "+%Y-%m-%d %H:%M:%S %z" -d@$(dpkg-parsechangelog --file $PKG_NAME/debian/changelog-$PLATFORM -STimestamp)) " + (cd "$build_dir" && tar -xzf "dist/$(basename $raw_tarball)") + tarball_basename="$(basename "$raw_tarball")" + # Repack with tar only, so env vars are respected + (cd "$build_dir" && tar -cf "${tarball_basename%.gz}" --mode=go=rX,u+rw,a-s --mtime="$dch_time" --sort=name --owner=root:0 --group=root:0 "${tarball_basename%.tar.gz}" 1>&2) + # Then gzip it separately, so we can pass args + (cd "$build_dir" && gzip --no-name "${tarball_basename%.gz}") + (cd "$build_dir" && mv "$tarball_basename" dist/) + echo "$raw_tarball" } # If the package is contained in the list, it should be a python package. In @@ -77,7 +109,8 @@ if [[ "${PKG_NAME}" =~ ^(securedrop-client|securedrop-proxy|securedrop-export|se if [[ -z "${PKG_PATH:-}" ]]; then # Build from source echo "PKG_PATH not set, building from source (version $PKG_VERSION)..." - candidate_pkg_path="$(build_source_tarball)" + build_source_tarball + candidate_pkg_path="$(find /tmp/$PKG_NAME/dist -type f -iname '*.tar.gz')" if [[ -f "$candidate_pkg_path" ]]; then PKG_PATH="$candidate_pkg_path" echo "Found tarball at $PKG_PATH, override with PKG_PATH..." From bca2d57346a6e778513879dfa343406ef01ef250 Mon Sep 17 00:00:00 2001 From: Conor Schaefer Date: Mon, 17 Aug 2020 08:42:34 -0700 Subject: [PATCH 4/4] Adds error message if tag doesn't verify Requested by @kushaldas during review. We can't easily pass through the stderr from the git verify process, since we redirect that stderr to stdout so that grep can inspect it. So let's just detect the failure and report that the tag failed to verify, which is accurate, albeit neither explicit nor verbose. --- scripts/build-debianpackage | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/build-debianpackage b/scripts/build-debianpackage index b9f4bcd7..befc8498 100755 --- a/scripts/build-debianpackage +++ b/scripts/build-debianpackage @@ -67,8 +67,11 @@ function verify_git_tag() { d="$1" t="$2" prod_fingerprint="22245C81E3BAEB4138B36061310F561200F4AD77" - git -C "$build_dir" tag --verify "$PKG_VERSION" 2>&1 \ - | grep -q -F "using RSA key $prod_fingerprint" + if ! git -C "$build_dir" tag --verify "$PKG_VERSION" 2>&1 \ + | grep -q -F "using RSA key $prod_fingerprint" ; then + echo "Failed to verify $PKG_VERSION, not signed with $prod_fingerprint" >&2 + exit 2 + fi } # Dynamically generate a tarball, from the Python source code,