Skip to content

Commit

Permalink
Merge pull request #34 from infosiftr/cache-data
Browse files Browse the repository at this point in the history
Update registry cache to (ab)use `Data` field of `Descriptor` objects
  • Loading branch information
yosifkit authored Mar 27, 2024
2 parents d54fee8 + 46e854a commit 530a108
Show file tree
Hide file tree
Showing 11 changed files with 728 additions and 169 deletions.
423 changes: 359 additions & 64 deletions .test/lookup-test.json

Large diffs are not rendered by default.

44 changes: 43 additions & 1 deletion .test/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,50 @@ lookup=(
# tianon/test:index-no-platform-smaller - a "broken" index with *zero* platform objects in it (so every manifest requires a platform lookup)
'tianon/test@sha256:347290ddd775c1b85a3e381b09edde95242478eb65153e9b17225356f4c072ac'
# (doing these in the same run means the manifest from above should be cached and exercise more codepaths for better coverage)

--type manifest 'tianon/test@sha256:347290ddd775c1b85a3e381b09edde95242478eb65153e9b17225356f4c072ac' # same manifest again, but without SynthesizeIndex
--type blob 'tianon/test@sha256:d2c94e258dcb3c5ac2798d32e1249e42ef01cba4841c2234249495f87264ac5a' # first config blob from the above
# and again, but this time HEADs
--head --type manifest 'tianon/test@sha256:347290ddd775c1b85a3e381b09edde95242478eb65153e9b17225356f4c072ac'
--head --type blob 'tianon/test@sha256:d2c94e258dcb3c5ac2798d32e1249e42ef01cba4841c2234249495f87264ac5a'

# again with things that aren't cached yet (tianon/true:oci, specifically)
--head --type blob 'tianon/true@sha256:25be82253336f0b8c4347bc4ecbbcdc85d0e0f118ccf8dc2e119c0a47a0a486e' # config blob
--head --type manifest 'tianon/true:oci@sha256:9ef42f1d602fb423fad935aac1caa0cfdbce1ad7edce64d080a4eb7b13f7cd9d'
--type blob 'tianon/true@sha256:25be82253336f0b8c4347bc4ecbbcdc85d0e0f118ccf8dc2e119c0a47a0a486e' # config blob
--type manifest 'tianon/true:oci@sha256:9ef42f1d602fb423fad935aac1caa0cfdbce1ad7edce64d080a4eb7b13f7cd9d'
'tianon/true:oci@sha256:9ef42f1d602fb423fad935aac1caa0cfdbce1ad7edce64d080a4eb7b13f7cd9d'

# tag lookup! (but with a hopefully stable example tag -- a build of notary:server)
--head 'oisupport/staging-amd64:71756dd75e41c4bc5144b64d36b4834a5a960c495470915eb69f96e9f2cb6694'
--head 'oisupport/staging-amd64:71756dd75e41c4bc5144b64d36b4834a5a960c495470915eb69f96e9f2cb6694' # twice, to exercise "tag is cached" case
--type manifest 'oisupport/staging-amd64:71756dd75e41c4bc5144b64d36b4834a5a960c495470915eb69f96e9f2cb6694'
'oisupport/staging-amd64:71756dd75e41c4bc5144b64d36b4834a5a960c495470915eb69f96e9f2cb6694'

# exercise 404 codepaths
"tianon/this-is-a-repository-that-will-never-ever-exist-$RANDOM-$RANDOM:$RANDOM-$RANDOM"
--head "tianon/this-is-a-repository-that-will-never-ever-exist-$RANDOM-$RANDOM:$RANDOM-$RANDOM"
'tianon/test@sha256:0000000000000000000000000000000000000000000000000000000000000000'
)
"$dir/../bin/lookup" "${lookup[@]}" | jq -s > "$dir/lookup-test.json"
"$dir/../bin/lookup" "${lookup[@]}" | jq -s '
[
reduce (
$ARGS.positional[]
| if startswith("tianon/this-is-a-repository-that-will-never-ever-exist-") then
gsub("[0-9]+"; "$RANDOM")
else . end
) as $a ([];
if .[-1][-1] == "--type" then
.[-1][-1] += " " + $a
elif length > 0 and (.[-1][-1] | startswith("--")) then
.[-1] += [$a]
else
. += [[$a]]
end
),
.
] | transpose
' --args -- "${lookup[@]}" > "$dir/lookup-test.json"

# don't leave around the "-cover" versions of these binaries
rm -f "$dir/../bin/builds" "$dir/../bin/lookup"
Expand Down
69 changes: 64 additions & 5 deletions cmd/lookup/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package main
import (
"context"
"encoding/json"
"io"
"os"
"os/signal"

Expand All @@ -15,21 +16,79 @@ func main() {
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt)
defer stop()

for _, img := range os.Args[1:] {
var (
zeroOpts registry.LookupOptions
opts = zeroOpts
)

args := os.Args[1:]
for len(args) > 0 {
img := args[0]
args = args[1:]
switch img {
case "--type":
opts.Type = registry.LookupType(args[0])
args = args[1:]
continue
case "--head":
opts.Head = true
continue
}

ref, err := registry.ParseRef(img)
if err != nil {
panic(err)
}

index, err := registry.SynthesizeIndex(ctx, ref)
if err != nil {
panic(err)
var obj any
if opts == zeroOpts {
// if we have no explicit type and didn't request a HEAD, invoke SynthesizeIndex instead of Lookup
obj, err = registry.SynthesizeIndex(ctx, ref)
if err != nil {
panic(err)
}
} else {
r, err := registry.Lookup(ctx, ref, &opts)
if err != nil {
panic(err)
}
if r != nil {
desc := r.Descriptor()
if opts.Head {
obj = desc
} else {
b, err := io.ReadAll(r)
if err != nil {
r.Close()
panic(err)
}
if opts.Type == registry.LookupTypeManifest {
// if it was a manifest lookup, cast the byte slice to json.RawMessage so we get the actual JSON (not base64)
obj = json.RawMessage(b)
} else {
obj = b
}
}
err = r.Close()
if err != nil {
panic(err)
}
} else {
obj = nil
}
}

e := json.NewEncoder(os.Stdout)
e.SetIndent("", "\t")
if err := e.Encode(index); err != nil {
if err := e.Encode(obj); err != nil {
panic(err)
}

// reset state
opts = zeroOpts
}

if opts != zeroOpts {
panic("dangling --type, --head, etc (without a following reference for it to apply to)")
}
}
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,5 @@ require (
google.golang.org/protobuf v1.28.1 // indirect
)

// https://github.com/cue-labs/oci/pull/27
replace cuelabs.dev/go/oci/ociregistry => github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240216044210-8aa0c990bd77
// https://github.com/cue-labs/oci/pull/29
replace cuelabs.dev/go/oci/ociregistry => github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240322151419-7d3242933116
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ github.com/sirupsen/logrus v1.9.0/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVs
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240216044210-8aa0c990bd77 h1:9EPZm+sGlYHo6LleMXWR6s3P8SJEYA7/aovpJ76JSpw=
github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240216044210-8aa0c990bd77/go.mod h1:ApHceQLLwcOkCEXM1+DyCXTHEJhNGDpJ2kmV6axsx24=
github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240322151419-7d3242933116 h1:ZDy4uRAhzODJXRo4EoNpJTCiSeOs8wwrkfMJy3JyDps=
github.com/tianon/cuelabs-oci/ociregistry v0.0.0-20240322151419-7d3242933116/go.mod h1:pK23AUVXuNzzTpfMCA06sxZGeVQ/75FdVtW249de9Uo=
golang.org/x/net v0.7.0 h1:rJrUqqhjsgNp7KqAIc25s9pZnjU7TUcSY7HcVZjdn1g=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
Expand Down
138 changes: 99 additions & 39 deletions registry/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ func RegistryCache(r ociregistry.Interface) ociregistry.Interface {
registry: r, // TODO support "nil" here so this can be a poor-man's ocimem implementation? 👀 see also https://github.com/cue-labs/oci/issues/24
has: map[string]bool{},
tags: map[string]ociregistry.Digest{},
types: map[ociregistry.Digest]string{},
data: map[ociregistry.Digest][]byte{},
data: map[ociregistry.Digest]ociregistry.Descriptor{},
}
}

Expand All @@ -32,11 +31,10 @@ type registryCache struct {
registry ociregistry.Interface

// https://github.com/cue-labs/oci/issues/24
mu sync.Mutex // TODO some kind of per-object/name/digest mutex so we don't request the same object from the upstream registry concurrently (on *top* of our maps mutex)?
has map[string]bool // "repo/name@digest" => true (whether a given repo has the given digest)
tags map[string]ociregistry.Digest // "repo/name:tag" => digest
types map[ociregistry.Digest]string // digest => "mediaType" (most recent *storing* / "cache-miss" lookup wins, in the case of upstream/cross-repo ambiguity)
data map[ociregistry.Digest][]byte // digest => data
mu sync.Mutex // TODO some kind of per-object/name/digest mutex so we don't request the same object from the upstream registry concurrently (on *top* of our maps mutex)?
has map[string]bool // "repo/name@digest" => true (whether a given repo has the given digest)
tags map[string]ociregistry.Digest // "repo/name:tag" => digest
data map[ociregistry.Digest]ociregistry.Descriptor // digest => mediaType+size(+data) (most recent *storing* / "cache-miss" lookup wins, in the case of upstream/cross-repo ambiguity)
}

func cacheKeyDigest(repo string, digest ociregistry.Digest) string {
Expand All @@ -52,41 +50,38 @@ func (rc *registryCache) getBlob(ctx context.Context, repo string, digest ocireg
rc.mu.Lock()
defer rc.mu.Unlock()

if b, ok := rc.data[digest]; ok && rc.has[cacheKeyDigest(repo, digest)] {
return ocimem.NewBytesReader(b, ociregistry.Descriptor{
MediaType: rc.types[digest],
Digest: digest,
Size: int64(len(b)),
}), nil
if desc, ok := rc.data[digest]; ok && desc.Data != nil && rc.has[cacheKeyDigest(repo, digest)] {
return ocimem.NewBytesReader(desc.Data, desc), nil
}

r, err := f(ctx, repo, digest)
if err != nil {
return nil, err
}
//defer r.Close()
// defer r.Close() happens later when we know we aren't making Close the caller's responsibility

desc := r.Descriptor()
digest = desc.Digest // if this isn't a no-op, we've got a naughty registry

rc.has[cacheKeyDigest(repo, desc.Digest)] = true
rc.types[desc.Digest] = desc.MediaType
rc.has[cacheKeyDigest(repo, digest)] = true

if desc.Size > manifestSizeLimit {
rc.data[digest] = desc
return r, nil
}
defer r.Close()

b, err := io.ReadAll(r)
desc.Data, err = io.ReadAll(r)
if err != nil {
r.Close()
return nil, err
}
if err := r.Close(); err != nil {
return nil, err
}

if len(b) <= manifestSizeLimit {
rc.data[desc.Digest] = b
} else {
delete(rc.data, desc.Digest)
}
rc.data[digest] = desc

return ocimem.NewBytesReader(b, desc), nil
return ocimem.NewBytesReader(desc.Data, desc), nil
}

func (rc *registryCache) GetBlob(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.BlobReader, error) {
Expand All @@ -104,43 +99,108 @@ func (rc *registryCache) GetTag(ctx context.Context, repo string, tag string) (o
tagKey := cacheKeyTag(repo, tag)

if digest, ok := rc.tags[tagKey]; ok {
if b, ok := rc.data[digest]; ok {
return ocimem.NewBytesReader(b, ociregistry.Descriptor{
MediaType: rc.types[digest],
Digest: digest,
Size: int64(len(b)),
}), nil
if desc, ok := rc.data[digest]; ok && desc.Data != nil {
return ocimem.NewBytesReader(desc.Data, desc), nil
}
}

r, err := rc.registry.GetTag(ctx, repo, tag)
if err != nil {
return nil, err
}
//defer r.Close()
// defer r.Close() happens later when we know we aren't making Close the caller's responsibility

desc := r.Descriptor()

rc.has[cacheKeyDigest(repo, desc.Digest)] = true
rc.tags[tagKey] = desc.Digest
rc.types[desc.Digest] = desc.MediaType

b, err := io.ReadAll(r)
if desc.Size > manifestSizeLimit {
rc.data[desc.Digest] = desc
return r, nil
}
defer r.Close()

desc.Data, err = io.ReadAll(r)
if err != nil {
r.Close()
return nil, err
}
if err := r.Close(); err != nil {
return nil, err
}

if len(b) <= manifestSizeLimit {
rc.data[desc.Digest] = b
} else {
delete(rc.data, desc.Digest)
rc.data[desc.Digest] = desc

return ocimem.NewBytesReader(desc.Data, desc), nil
}

func (rc *registryCache) resolveBlob(ctx context.Context, repo string, digest ociregistry.Digest, f func(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.Descriptor, error)) (ociregistry.Descriptor, error) {
rc.mu.Lock()
defer rc.mu.Unlock()

if desc, ok := rc.data[digest]; ok && rc.has[cacheKeyDigest(repo, digest)] {
return desc, nil
}

desc, err := f(ctx, repo, digest)
if err != nil {
return desc, err
}

digest = desc.Digest // if this isn't a no-op, we've got a naughty registry

rc.has[cacheKeyDigest(repo, digest)] = true

// carefully copy only valid Resolve* fields such that any other existing fields are kept (this matters more if we ever make our mutexes better/less aggressive 👀)
if d, ok := rc.data[digest]; ok {
d.MediaType = desc.MediaType
d.Digest = desc.Digest
d.Size = desc.Size
desc = d
}
rc.data[digest] = desc

return desc, nil
}

func (rc *registryCache) ResolveManifest(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.Descriptor, error) {
return rc.resolveBlob(ctx, repo, digest, rc.registry.ResolveManifest)
}

func (rc *registryCache) ResolveBlob(ctx context.Context, repo string, digest ociregistry.Digest) (ociregistry.Descriptor, error) {
return rc.resolveBlob(ctx, repo, digest, rc.registry.ResolveBlob)
}

func (rc *registryCache) ResolveTag(ctx context.Context, repo string, tag string) (ociregistry.Descriptor, error) {
rc.mu.Lock()
defer rc.mu.Unlock()

tagKey := cacheKeyTag(repo, tag)

if digest, ok := rc.tags[tagKey]; ok {
if desc, ok := rc.data[digest]; ok {
return desc, nil
}
}

desc, err := rc.registry.ResolveTag(ctx, repo, tag)
if err != nil {
return desc, err
}

rc.has[cacheKeyDigest(repo, desc.Digest)] = true
rc.tags[tagKey] = desc.Digest

// carefully copy only valid Resolve* fields such that any other existing fields are kept (this matters more if we ever make our mutexes better/less aggressive 👀)
if d, ok := rc.data[desc.Digest]; ok {
d.MediaType = desc.MediaType
d.Digest = desc.Digest
d.Size = desc.Size
desc = d
}
rc.data[desc.Digest] = desc

return ocimem.NewBytesReader(b, desc), nil
return desc, nil
}

// TODO more methods (currently only implements what's actually necessary for SynthesizeIndex)
Loading

0 comments on commit 530a108

Please sign in to comment.