feat[tool]: archive format (#3891)

this commit adds several output formats to aid with build reproducibility and source code verification: - `-f archive` - `-f solc_json` - `-f integrity` - `--base64` `-f archive` creates a "vyper archive" using the zipfile format. it emits the metadata associated with the build (settings, search path, compiler version, integrity hash) in the `MANIFEST/` folder inside the archive. `--base64` is only usable with `-f archive` and produces a base64-encoded archive (which is easier to copy-paste). both the base64 and binary versions of the archive round-trip. that is, if you provide an archive directly to the vyper compiler (e.g. `vyper contract.zip` or `vyper contract.zip.b64`), it should produce exactly the same output as running `vyper contract.vy` on the local machine with the same settings+environment that produced the archive. `-f solc_json` is for compatibility with standard json that a lot of verifiers and tooling currently uses. it uses the same "output bundle" machinery as `-f archive`, but spits out in "standard-json" format (consumable by `--standard-json`). both of these use an `OutputBundle` abstraction, which abstracts collecting the inputs to the build. these include - settings (whatever is on the Settings object) - search path - compiler version - integrity hash importantly, `OutputBundle` recovers and anonymizes search paths used during compilation. this is done to minimize leaking of user information in archives. however, it comes with a tradeoff -- because of how the anonymization works, it is possible to have a build where search paths are not recoverable (specifically, if a module "escapes" its package with too many `..`, the resulting anonymized path will be bogus). several methods were tried to prevent this, but in the end this method was chosen, which prioritizes minimizing leakage over handling edge cases. `-f integrity` produces an "integrity hash", which is basically a checksum over the source file inputs. it is intended to let consumers of the compiler know when any input in the dependency tree has changed and recompilation is necessary. it is conservative by design; it works by recursively hashing source text as opposed to AST or any other semantic representation of source code. it can also be used by tooling as a check to determine if the source tree in an archive is the same as expected. this would likely be an additional check in addition to bytecode comparison, since there could differences in source code (e.g. comments) which affect the integrity hash but not the bytecode. the integrity hash computation currently depends on all frontend analysis to complete. in theory, since it only depends on source code, it could be refactored into another preliminary pass in the compiler, whose sole job is to resolve (and hash) imports. however, it would be additional maintenance work. we could revisit if the performance of this method becomes reported as an issue (note: current numbers are that this method operates at roughly 2500 lloc per second). currently, there are two places where build reproducibility might fail - in checking the integrity hash of an archive or during archive construction itself (if there is a compile-time failure, this could happen for example if the user is trying to send a reproduction of an error). it was decided that the most user-friendly thing to do is to emit a warning in these cases, rather than adding additional compilation flags that control whether to bail out or not. the tentative canonical suffix for vyper archive (the zipfile version) is `.vyz`, although this is subject to change (several alternatives were also considered, including `.den` - as in "a den of vypers"!).
vyperlang · May 8, 2024 · 75c75c5 · 75c75c5
1 parent 3af5390
commit 75c75c5
Show file tree

Hide file tree

Showing 20 changed files with 869 additions and 137 deletions.
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -56,10 +56,11 @@ def pytest_addoption(parser):
 @pytest.fixture(scope="module")
 def output_formats():
     output_formats = compiler.OUTPUT_FORMATS.copy()
-    del output_formats["bb"]
-    del output_formats["bb_runtime"]
-    del output_formats["cfg"]
-    del output_formats["cfg_runtime"]
+
+    to_drop = ("bb", "bb_runtime", "cfg", "cfg_runtime", "archive", "archive_b64", "solc_json")
+    for s in to_drop:
+        del output_formats[s]
+
     return output_formats
 
 

diff --git a/tests/unit/cli/vyper_compile/test_compile_files.py b/tests/unit/cli/vyper_compile/test_compile_files.py
@@ -1,14 +1,19 @@
 import contextlib
 import sys
+import zipfile
 from pathlib import Path
 
 import pytest
 
-from tests.utils import working_directory
 from vyper.cli.vyper_compile import compile_files
+from vyper.cli.vyper_json import compile_json
+from vyper.compiler.input_bundle import FilesystemInputBundle
+from vyper.compiler.output_bundle import OutputBundle
+from vyper.compiler.phases import CompilerData
+from vyper.utils import sha256sum
 
 
-def test_combined_json_keys(tmp_path, make_file):
+def test_combined_json_keys(chdir_tmp_path, make_file):
     make_file("bar.vy", "")
 
     combined_keys = {
@@ -22,7 +27,7 @@ def test_combined_json_keys(tmp_path, make_file):
         "userdoc",
         "devdoc",
     }
-    compile_data = compile_files(["bar.vy"], ["combined_json"], paths=[tmp_path])
+    compile_data = compile_files(["bar.vy"], ["combined_json"])
 
     assert set(compile_data.keys()) == {Path("bar.vy"), "version"}
     assert set(compile_data[Path("bar.vy")].keys()) == combined_keys
@@ -72,12 +77,12 @@ def bar() -> FooStruct:
 
 
 @pytest.mark.parametrize("import_stmt,alias", SAME_FOLDER_IMPORT_STMT)
-def test_import_same_folder(import_stmt, alias, tmp_path, make_file):
+def test_import_same_folder(import_stmt, alias, chdir_tmp_path, make_file):
     foo = "contracts/foo.vy"
     make_file("contracts/foo.vy", CONTRACT_CODE.format(import_stmt=import_stmt, alias=alias))
     make_file("contracts/IFoo.vyi", INTERFACE_CODE)
 
-    assert compile_files([foo], ["combined_json"], paths=[tmp_path])
+    assert compile_files([foo], ["combined_json"]) is not None
 
 
 SUBFOLDER_IMPORT_STMT = [
@@ -95,13 +100,13 @@ def test_import_same_folder(import_stmt, alias, tmp_path, make_file):
 
 
 @pytest.mark.parametrize("import_stmt, alias", SUBFOLDER_IMPORT_STMT)
-def test_import_subfolder(import_stmt, alias, tmp_path, make_file):
+def test_import_subfolder(import_stmt, alias, chdir_tmp_path, make_file):
     foo = make_file(
         "contracts/foo.vy", (CONTRACT_CODE.format(import_stmt=import_stmt, alias=alias))
     )
     make_file("contracts/other/IFoo.vyi", INTERFACE_CODE)
 
-    assert compile_files([foo], ["combined_json"], paths=[tmp_path])
+    assert compile_files([foo], ["combined_json"]) is not None
 
 
 OTHER_FOLDER_IMPORT_STMT = [
@@ -118,7 +123,7 @@ def test_import_other_folder(import_stmt, alias, tmp_path, make_file):
     foo = make_file("contracts/foo.vy", CONTRACT_CODE.format(import_stmt=import_stmt, alias=alias))
     make_file("interfaces/IFoo.vyi", INTERFACE_CODE)
 
-    assert compile_files([foo], ["combined_json"], paths=[tmp_path])
+    assert compile_files([foo], ["combined_json"], paths=[tmp_path]) is not None
 
 
 def test_import_parent_folder(tmp_path, make_file):
@@ -128,21 +133,20 @@ def test_import_parent_folder(tmp_path, make_file):
     )
     make_file("IFoo.vyi", INTERFACE_CODE)
 
-    assert compile_files([foo], ["combined_json"], paths=[tmp_path])
+    assert compile_files([foo], ["combined_json"], paths=[tmp_path]) is not None
 
     # perform relative import outside of base folder
     compile_files([foo], ["combined_json"], paths=[tmp_path / "contracts"])
 
 
-def test_import_search_paths(tmp_path, make_file):
-    with working_directory(tmp_path):
-        contract_code = CONTRACT_CODE.format(import_stmt="from utils import IFoo", alias="IFoo")
-        contract_filename = "dir1/baz/foo.vy"
-        interface_filename = "dir2/utils/IFoo.vyi"
-        make_file(interface_filename, INTERFACE_CODE)
-        make_file(contract_filename, contract_code)
+def test_import_search_paths(chdir_tmp_path, make_file):
+    contract_code = CONTRACT_CODE.format(import_stmt="from utils import IFoo", alias="IFoo")
+    contract_filename = "dir1/baz/foo.vy"
+    interface_filename = "dir2/utils/IFoo.vyi"
+    make_file(interface_filename, INTERFACE_CODE)
+    make_file(contract_filename, contract_code)
 
-        assert compile_files([contract_filename], ["combined_json"], paths=["dir2"])
+    assert compile_files([contract_filename], ["combined_json"], paths=["dir2"]) is not None
 
 
 META_IMPORT_STMT = [
@@ -181,7 +185,7 @@ def be_known() -> ISelf.FooStruct:
     make_file("contracts/ISelf.vyi", interface_code)
     meta = make_file("contracts/Self.vy", code)
 
-    assert compile_files([meta], ["combined_json"], paths=[tmp_path])
+    assert compile_files([meta], ["combined_json"], paths=[tmp_path]) is not None
 
 
 # implement IFoo in another contract for fun
@@ -201,10 +205,10 @@ def bar(_foo: address) -> {alias}.FooStruct:
     make_file("contracts/IFoo.vyi", INTERFACE_CODE)
     baz = make_file("contracts/Baz.vy", baz_code)
 
-    assert compile_files([baz], ["combined_json"], paths=[tmp_path])
+    assert compile_files([baz], ["combined_json"], paths=[tmp_path]) is not None
 
 
-def test_local_namespace(make_file, tmp_path):
+def test_local_namespace(make_file, chdir_tmp_path):
     # interface code namespaces should be isolated
     # all of these contract should be able to compile together
     codes = [
@@ -229,15 +233,15 @@ def test_local_namespace(make_file, tmp_path):
     for file_name in ("foo.vyi", "bar.vyi"):
         make_file(file_name, INTERFACE_CODE)
 
-    assert compile_files(paths, ["combined_json"], paths=[tmp_path])
+    assert compile_files(paths, ["combined_json"]) is not None
 
 
 def test_compile_outside_root_path(tmp_path, make_file):
     # absolute paths relative to "."
     make_file("ifoo.vyi", INTERFACE_CODE)
     foo = make_file("foo.vy", CONTRACT_CODE.format(import_stmt="import ifoo as IFoo", alias="IFoo"))
 
-    assert compile_files([foo], ["combined_json"], paths=None)
+    assert compile_files([foo], ["combined_json"], paths=None) is not None
 
 
 def test_import_library(tmp_path, make_file):
@@ -270,23 +274,153 @@ def mock_sys_path(path):
         sys.path.pop()
 
 
-def test_import_sys_path(tmp_path_factory, make_file):
+@pytest.fixture
+def input_files(tmp_path_factory, make_file, chdir_tmp_path):
     library_source = """
 @internal
 def foo() -> uint256:
     return block.number + 1
+    """
+    json_source = """
+[
+  {
+    "stateMutability": "nonpayable",
+    "type": "function",
+    "name": "test_json",
+    "inputs": [ { "name": "", "type": "uint256" } ],
+    "outputs": [ { "name": "", "type": "uint256" } ]
+  }
+]
     """
     contract_source = """
 import lib
+import jsonabi
 
 @external
 def foo() -> uint256:
     return lib.foo()
+
+@external
+def bar(x: uint256) -> uint256:
+    return extcall jsonabi(msg.sender).test_json(x)
     """
-    tmpdir = tmp_path_factory.mktemp("test-sys-path")
+    tmpdir = tmp_path_factory.mktemp("fake-package")
     with open(tmpdir / "lib.vy", "w") as f:
         f.write(library_source)
+    with open(tmpdir / "jsonabi.json", "w") as f:
+        f.write(json_source)
 
     contract_file = make_file("contract.vy", contract_source)
+
+    return (tmpdir, tmpdir / "lib.vy", tmpdir / "jsonabi.json", contract_file)
+
+
+def test_import_sys_path(input_files):
+    tmpdir, _, _, contract_file = input_files
     with mock_sys_path(tmpdir):
         assert compile_files([contract_file], ["combined_json"]) is not None
+
+
+def test_archive_output(input_files):
+    tmpdir, _, _, contract_file = input_files
+    search_paths = [".", tmpdir]
+
+    s = compile_files([contract_file], ["archive"], paths=search_paths)
+    archive_bytes = s[contract_file]["archive"]
+
+    archive_path = Path("foo.zip")
+    with archive_path.open("wb") as f:
+        f.write(archive_bytes)
+
+    assert zipfile.is_zipfile(archive_path)
+
+    # compare compiling the two input bundles
+    out = compile_files([contract_file], ["integrity", "bytecode"], paths=search_paths)
+    out2 = compile_files([archive_path], ["integrity", "bytecode"])
+    assert out[contract_file] == out2[archive_path]
+
+
+def test_archive_b64_output(input_files):
+    tmpdir, _, _, contract_file = input_files
+    search_paths = [".", tmpdir]
+
+    out = compile_files(
+        [contract_file], ["archive_b64", "integrity", "bytecode"], paths=search_paths
+    )
+
+    archive_b64 = out[contract_file].pop("archive_b64")
+
+    archive_path = Path("foo.zip.b64")
+    with archive_path.open("w") as f:
+        f.write(archive_b64)
+
+    # compare compiling the two input bundles
+    out2 = compile_files([archive_path], ["integrity", "bytecode"])
+    assert out[contract_file] == out2[archive_path]
+
+
+def test_solc_json_output(input_files):
+    tmpdir, _, _, contract_file = input_files
+    search_paths = [".", tmpdir]
+
+    out = compile_files([contract_file], ["solc_json"], paths=search_paths)
+
+    json_input = out[contract_file]["solc_json"]
+
+    # check that round-tripping solc_json thru standard json produces
+    # the same as compiling directly
+    json_out = compile_json(json_input)["contracts"]["contract.vy"]
+    json_out_bytecode = json_out["contract"]["evm"]["bytecode"]["object"]
+
+    out2 = compile_files([contract_file], ["integrity", "bytecode"], paths=search_paths)
+
+    assert out2[contract_file]["bytecode"] == json_out_bytecode
+
+
+# maybe this belongs in tests/unit/compiler?
+def test_integrity_sum(input_files):
+    tmpdir, library_file, jsonabi_file, contract_file = input_files
+    search_paths = [".", tmpdir]
+
+    out = compile_files([contract_file], ["integrity"], paths=search_paths)
+
+    with library_file.open() as f, contract_file.open() as g, jsonabi_file.open() as h:
+        library_contents = f.read()
+        contract_contents = g.read()
+        jsonabi_contents = h.read()
+
+    contract_hash = sha256sum(contract_contents)
+    library_hash = sha256sum(library_contents)
+    jsonabi_hash = sha256sum(jsonabi_contents)
+    expected = sha256sum(contract_hash + sha256sum(library_hash) + jsonabi_hash)
+    assert out[contract_file]["integrity"] == expected
+
+
+# does this belong in tests/unit/compiler?
+def test_archive_search_path(tmp_path_factory, make_file, chdir_tmp_path):
+    lib1 = """
+x: uint256
+    """
+    lib2 = """
+y: uint256
+    """
+    dir1 = tmp_path_factory.mktemp("dir1")
+    dir2 = tmp_path_factory.mktemp("dir2")
+    make_file(dir1 / "lib.vy", lib1)
+    make_file(dir2 / "lib.vy", lib2)
+
+    main = """
+import lib
+    """
+    pwd = Path(".")
+    make_file(pwd / "main.vy", main)
+    for search_paths in ([pwd, dir1, dir2], [pwd, dir2, dir1]):
+        input_bundle = FilesystemInputBundle(search_paths)
+        file_input = input_bundle.load_file("main.vy")
+
+        # construct CompilerData manually
+        compiler_data = CompilerData(file_input, input_bundle)
+        output_bundle = OutputBundle(compiler_data)
+
+        used_dir = search_paths[-1].stem  # either dir1 or dir2
+        assert output_bundle.used_search_paths == [".", "0/" + used_dir]
diff --git a/tests/unit/cli/vyper_json/test_compile_json.py b/tests/unit/cli/vyper_json/test_compile_json.py
@@ -227,11 +227,6 @@ def test_different_outputs(input_bundle, input_json):
     assert foo["evm"]["methodIdentifiers"] == method_identifiers
 
 
-def test_root_folder_not_exists(input_json):
-    with pytest.raises(FileNotFoundError):
-        compile_json(input_json, root_folder="/path/that/does/not/exist")
-
-
 def test_wrong_language():
     with pytest.raises(JSONError):
         compile_json({"language": "Solidity"})

diff --git a/tests/unit/compiler/test_input_bundle.py b/tests/unit/compiler/test_input_bundle.py
@@ -73,13 +73,13 @@ def test_load_abi(make_file, input_bundle, tmp_path):
 
     file = input_bundle.load_file("foo.json")
     assert isinstance(file, ABIInput)
-    assert file == ABIInput(0, "foo.json", path, "some string")
+    assert file == ABIInput(0, "foo.json", path, contents, "some string")
 
     # suffix doesn't matter
     path = make_file("foo.txt", contents)
     file = input_bundle.load_file("foo.txt")
     assert isinstance(file, ABIInput)
-    assert file == ABIInput(1, "foo.txt", path, "some string")
+    assert file == ABIInput(1, "foo.txt", path, contents, "some string")
 
 
 # check that unique paths give unique source ids
@@ -126,29 +126,31 @@ def test_source_id_json_input(make_file, input_bundle, tmp_path):
 
     file = input_bundle.load_file("foo.json")
     assert isinstance(file, ABIInput)
-    assert file == ABIInput(0, "foo.json", foopath, "some string")
+    assert file == ABIInput(0, "foo.json", foopath, contents, "some string")
 
     file2 = input_bundle.load_file("bar.json")
     assert isinstance(file2, ABIInput)
-    assert file2 == ABIInput(1, "bar.json", barpath, ["some list"])
+    assert file2 == ABIInput(1, "bar.json", barpath, contents2, ["some list"])
 
     file3 = input_bundle.load_file("foo.json")
     assert file3.source_id == 0
-    assert file3 == ABIInput(0, "foo.json", foopath, "some string")
+    assert file3 == ABIInput(0, "foo.json", foopath, contents, "some string")
 
     # test source id is stable across different search paths
     with working_directory(tmp_path):
         with input_bundle.search_path(Path(".")):
             file4 = input_bundle.load_file("foo.json")
             assert file4.source_id == 0
-            assert file4 == ABIInput(0, "foo.json", foopath, "some string")
+            assert file4 == ABIInput(0, "foo.json", foopath, contents, "some string")
 
     # test source id is stable even when requested filename is different
     with working_directory(tmp_path.parent):
         with input_bundle.search_path(Path(".")):
             file5 = input_bundle.load_file(Path(tmp_path.stem) / "foo.json")
             assert file5.source_id == 0
-            assert file5 == ABIInput(0, Path(tmp_path.stem) / "foo.json", foopath, "some string")
+            assert file5 == ABIInput(
+                0, Path(tmp_path.stem) / "foo.json", foopath, contents, "some string"
+            )
 
 
 # test some pathological case where the file changes underneath
@@ -238,7 +240,8 @@ def test_json_input_abi():
     input_bundle = JSONInputBundle(files, [PurePath(".")])
 
     file = input_bundle.load_file(foopath)
-    assert file == ABIInput(0, foopath, foopath, some_abi)
+    abi_contents = json.dumps({"abi": some_abi})
+    assert file == ABIInput(0, foopath, foopath, abi_contents, some_abi)
 
     file = input_bundle.load_file(barpath)
-    assert file == ABIInput(1, barpath, barpath, some_abi)
+    assert file == ABIInput(1, barpath, barpath, some_abi_str, some_abi)