Skip to content

Commit

Permalink
Add in-toto format as hash of shard hashes
Browse files Browse the repository at this point in the history
This converts model serialization manifests that record every model file
shard hash into an in-toto payload that can then be passed to Sigstore's
`sign_intoto` for signing to generate a Sigstore `Bundle` (if using
Sigstore).

To identify the models, we compute a hash of all hashes of the file
shards and use that as the subject. The individual file hashes are used
as the payload and we would have the verifier check them as part of the
verification process.

Signed-off-by: Mihai Maruseac <mihaimaruseac@google.com>
  • Loading branch information
mihaimaruseac committed Jul 27, 2024
1 parent 0dd7eb4 commit 51463cd
Show file tree
Hide file tree
Showing 9 changed files with 345 additions and 0 deletions.
98 changes: 98 additions & 0 deletions model_signing/signing/in_toto.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,3 +253,101 @@ def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
predicate_top_level_name="files",
)
return cls(statement)


class DigestOfShardDigestsIntotoPayload(IntotoPayload):
"""In-toto payload where the subject is a digest of digests of file shards.
This payload is supposed to be used for manifests where every file shard in
the model is matched with a digest. Because existing tooling only supports
established hashing algorithms, we record every such digest in the predicate
part and compute a hash for the subject by using sha256 on the concatenation
of the shard hashes. To ensure determinism, the hashes are sorted
by file shard (alphabetically by name, then ordered by start offset).
Example:
```json
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "18b5a4..."
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "6efa14...",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f0:0:16"
},
{
"digest": "a9bc14...",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f1:0:16"
},
{
"digest": "5f597e...",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f2:0:16"
},
{
"digest": "eaf677...",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f3:0:16"
}
]
}
}
```
A missing predicate, or a predicate for which an entry does not have valid
name, digest, or algorithm should be considered invalid and fail integrity
verification.
See also https://github.com/sigstore/sigstore-python/issues/1018.
"""

predicate_type: Final[str] = (
"https://model_signing/DigestOfShardDigests/v0.1"
)

def __init__(self, statement: statement.Statement):
"""Builds an instance of this in-toto payload.
Don't call this directly in production. Use `from_manifest()` instead.
Args:
statement: The DSSE statement representing this in-toto payload.
"""
self.statement = statement

@classmethod
@override
def from_manifest(cls, manifest: manifest_module.Manifest) -> Self:
"""Converts a manifest to the signing payload used for signing.
The manifest must be one where every model shard is paired with its own
digest. Currently, this is only `ShardLevelManifest`.
Args:
manifest: the manifest to convert to signing payload.
Returns:
An instance of `DigestOfDigestsIntotoPayload`.
Raises:
TypeError: If the manifest is not `ShardLevelManifest`.
"""
if not isinstance(manifest, manifest_module.ShardLevelManifest):
raise TypeError("Only ShardLevelManifest is supported")

statement = _convert_descriptors_to_hashed_statement(
manifest,
predicate_type=cls.predicate_type,
predicate_top_level_name="shards",
)
return cls(statement)
65 changes: 65 additions & 0 deletions model_signing/signing/in_toto_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from model_signing.hashing import hashing
from model_signing.manifest import manifest as manifest_module
from model_signing.serialization import serialize_by_file
from model_signing.serialization import serialize_by_file_shard
from model_signing.signing import in_toto


Expand Down Expand Up @@ -145,3 +146,67 @@ def test_only_runs_on_expected_manifest_types(self):
match="Only FileLevelManifest is supported",
):
in_toto.DigestOfDigestsIntotoPayload.from_manifest(manifest)


class TestDigestOfShardDigestsIntotoPayload:

def _hasher_factory(
self, path: pathlib.Path, start: int, end: int
) -> file.ShardedFileHasher:
return file.ShardedFileHasher(
path, memory.SHA256(), start=start, end=end
)

@pytest.mark.parametrize("model_fixture_name", test_support.all_test_models)
def test_known_models(self, request, model_fixture_name):
# Set up variables (arrange)
testdata_path = request.path.parent / "testdata"
test_path = testdata_path / "in_toto"
test_class_path = test_path / "TestDigestOfShardDigestsIntotoPayload"
golden_path = test_class_path / model_fixture_name
should_update = request.config.getoption("update_goldens")
model = request.getfixturevalue(model_fixture_name)

# Compute payload (act)
serializer = serialize_by_file_shard.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(model)
payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(
manifest
)

# Compare with golden, or write to golden (approximately "assert")
if should_update:
with open(golden_path, "w", encoding="utf-8") as f:
f.write(f"{json_format.MessageToJson(payload.statement.pb)}\n")
else:
with open(golden_path, "r", encoding="utf-8") as f:
json_contents = f.read()
expected_proto = json_format.Parse(
json_contents, statement_pb2.Statement()
)

assert payload.statement.pb == expected_proto

def test_produces_valid_statements(self, sample_model_folder):
serializer = serialize_by_file_shard.ManifestSerializer(
self._hasher_factory, allow_symlinks=True
)
manifest = serializer.serialize(sample_model_folder)

payload = in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(
manifest
)

payload.statement.validate()

def test_only_runs_on_expected_manifest_types(self):
digest = hashing.Digest("test", b"test_digest")
manifest = manifest_module.DigestManifest(digest)

with pytest.raises(
TypeError,
match="Only ShardLevelManifest is supported",
):
in_toto.DigestOfShardDigestsIntotoPayload.from_manifest(manifest)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "18b5a45fe7983f7194e8ffd96c80f5f0ec53191bf4a32b6aff293f043e816d7a"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "6efa14bb03544fcb76045c55f25b9315b6eb5be2d8a85f703193a76b7874c6ff",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f0:0:16"
},
{
"digest": "a9bc149b70b9d325cd68d275d582cfdb98c0347d3ce54590aa6533368daed3d2",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f1:0:16"
},
{
"digest": "5f597e6a92d1324d9adbed43d527926d11d0131487baf315e65ae1ef3b1ca3c0",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f2:0:16"
},
{
"digest": "eaf677c35fec6b87889d9e4563d8bb65dcb9869ca0225697c9cc44cf49dca008",
"algorithm": "file-sha256-1000000",
"name": "d0/d1/d2/d3/d4/f3:0:16"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": []
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": []
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": []
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
"algorithm": "file-sha256-1000000",
"name": ".:0:22"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "143cc682e555951649f18e2761c3d526d2502996f5e32dc187ef7f8a614f8df7"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "fdd8925354242a7fd1515e79534317b800015607a609cd306e0b4dcfe6c92249",
"algorithm": "file-sha256-1000000",
"name": "d0/f00:0:23"
},
{
"digest": "e16940b5e44ce981150bda37c4ba95881a749a521b4a297c5cdf97bdcfe965e6",
"algorithm": "file-sha256-1000000",
"name": "d0/f01:0:23"
},
{
"digest": "407822246ea8f9e26380842c3f4cd10d7b23e78f1fe7c74c293608682886a426",
"algorithm": "file-sha256-1000000",
"name": "d0/f02:0:23"
},
{
"digest": "6a3b08b5df77c4d418ceee1ac136a9ad49fc7c41358b5e82c1176daccb21ff3f",
"algorithm": "file-sha256-1000000",
"name": "d1/f10:0:23"
},
{
"digest": "a484b3d8ea5e99b75f9f123f9a42c882388693edc7d85d82ccba54834712cadf",
"algorithm": "file-sha256-1000000",
"name": "d1/f11:0:23"
},
{
"digest": "8f577930f5f40c2c2133cb299d36f9527fde98c1608569017cae6b5bcd01abb3",
"algorithm": "file-sha256-1000000",
"name": "d1/f12:0:23"
},
{
"digest": "997b37cc51f1ca1c7a270466607e26847429cd7264c30148c1b9352e224083fc",
"algorithm": "file-sha256-1000000",
"name": "f0:0:24"
},
{
"digest": "c88a04d48353133fb065ba2c8ab369abab21395b9526aa20373ad828915fa7ae",
"algorithm": "file-sha256-1000000",
"name": "f1:0:24"
},
{
"digest": "700e3ba5065d8dd47e41fd928ea086670d628f891ba363be0ca3c31d20d7d719",
"algorithm": "file-sha256-1000000",
"name": "f2:0:24"
},
{
"digest": "912bcf5ebdf44dc7b4085b07940e0a81d157fba24b276e73fd911121d4544c4a",
"algorithm": "file-sha256-1000000",
"name": "f3:0:24"
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"_type": "https://in-toto.io/Statement/v1",
"subject": [
{
"digest": {
"sha256": "002d162f867c5eee944e5080d25829b6625be0e3f081f6fbafc7dd655ca2e178"
}
}
],
"predicateType": "https://model_signing/DigestOfShardDigests/v0.1",
"predicate": {
"shards": [
{
"digest": "3aab065c7181a173b5dd9e9d32a9f79923440b413be1e1ffcdba26a7365f719b",
"algorithm": "file-sha256-1000000",
"name": "symlink_file:0:22"
}
]
}
}

0 comments on commit 51463cd

Please sign in to comment.