From c2f815c6870ca9ff00132598c533ef762b07ee0a Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 19 Oct 2016 23:26:32 -0700 Subject: [PATCH] schema: Use Validators map and prepare to extend beyond JSON Schema With image-tools split off into its own repository, the plan seems to be to keep all intra-blob JSON validation in this repository and to move all other validation (e.g. for layers or for walking Merkle trees) in image-tools [1]. All the non-validation logic currently in image/ is moving into image-tools as well [2]. Some requirements (e.g. multi-parameter checks like allowed OS/arch pairs [3]) are difficult to handle in JSON Schema but easy to handle in Go. And callers won't care if we're using JSON Schema or not; they just want to know if their blob is valid. This commit restructures intra-blob validation to ease the path going forward (although it doesn't actually change the current validation significantly). The old method: func (v Validator) Validate(src io.Reader) error is now a new Validator type: type Validator(blob io.Reader, descriptor *v1.Descriptor, strict bool) (err error) and instead of instantiating an old Validator instance: schema.MediaTypeImageConfig.Validate(reader) there's a Validators registry mapping from the media type strings to the appropriate Validator instance (which may or may not use JSON Schema under the hood). And there's a Validate function (with the same Validator interface) that looks up the appropriate entry in Validators for you so you have: schema.Validate(reader, descriptor, true) By using a Validators map, we make it easy for library consumers to register (or override) intra-blob validators for a particular type. Locations that call Validate(...) will automatically pick up the new validators without needing local changes. All of the old validation was based on JSON Schema, so currently all Validators values are ValidateJSONSchema. As the schema package grows non-JSON-Schema validation, entries will start to look like: var Validators = map[string]Validator{ v1.MediaTypeImageConfig: ValidateConfig, ... } although ValidateConfig will probably use ValidateJSONSchema internally. By passing through a descriptor, we get a chance to validate the digest and size (which we were not doing before). Digest and size validation for a byte array are also exposed directly (as ValidateByteDigest and ValidateByteSize) for use in validators that are not based on ValidateJSONSchema. Access to the digest also gives us a way to print specific error messages on failures. In situations where you don't know the blob digest, the new DigestByte will help you calculate it (for a byte array). There is also a new 'strict' parameter to distinguish between compliant images (which should always pass when strict is false) and images that only use features which the spec requires implementations to support (which should only pass if strict is true). The current JSON Schemas are not strict, but the config/layer media type checks in ValidateManifest exercise this distinction. [1]: http://ircbot.wl.linuxfoundation.org/meetings/opencontainers/2016/opencontainers.2016-10-12-21.01.log.html#l-71 [2]: https://github.com/opencontainers/image-spec/pull/337 [3]: https://tools.ietf.org/html/draft-fge-json-schema-validation-00#section-5.5.5 [4]: https://github.com/opencontainers/image-spec/pull/341 Signed-off-by: W. Trevor King --- schema/config_test.go | 15 ++- schema/descriptor.go | 43 ++++++ schema/manifest.go | 74 ++++++++++ .../manifest_backwards_compatibility_test.go | 55 ++++---- schema/manifest_test.go | 76 ++++++++++- schema/schema.go | 21 +-- schema/spec_test.go | 15 ++- schema/validator.go | 126 ++++++++++-------- 8 files changed, 317 insertions(+), 108 deletions(-) create mode 100644 schema/descriptor.go create mode 100644 schema/manifest.go diff --git a/schema/config_test.go b/schema/config_test.go index 27c14875c..c83080175 100644 --- a/schema/config_test.go +++ b/schema/config_test.go @@ -19,6 +19,7 @@ import ( "testing" "github.com/opencontainers/image-spec/schema" + "github.com/opencontainers/image-spec/specs-go/v1" ) func TestConfig(t *testing.T) { @@ -155,9 +156,19 @@ func TestConfig(t *testing.T) { fail: false, }, } { - r := strings.NewReader(tt.config) - err := schema.MediaTypeImageConfig.Validate(r) + configBytes := []byte(tt.config) + digest, err := schema.DigestByte(configBytes, "sha256") + if err != nil { + t.Fatal(err) + } + reader := strings.NewReader(tt.config) + descriptor := v1.Descriptor{ + MediaType: v1.MediaTypeImageConfig, + Digest: digest, + Size: int64(len(configBytes)), + } + err = schema.Validate(reader, &descriptor, true) if got := err != nil; tt.fail != got { t.Errorf("test %d: expected validation failure %t but got %t, err %v", i, tt.fail, got, err) } diff --git a/schema/descriptor.go b/schema/descriptor.go new file mode 100644 index 000000000..9648a5318 --- /dev/null +++ b/schema/descriptor.go @@ -0,0 +1,43 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schema + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "hash" +) + +// DigestByte computes the digest of a blob using the requested +// algorithm. +func DigestByte(data []byte, algorithm string) (digest string, err error) { + var hasher hash.Hash + switch algorithm { + case "sha256": + hasher = sha256.New() + default: + return "", fmt.Errorf("unrecognized algorithm: %q", algorithm) + } + + _, err = hasher.Write(data) + if err != nil { + return "", err + } + + hashBytes := hasher.Sum(nil) + hashHex := hex.EncodeToString(hashBytes[:]) + return fmt.Sprintf("%s:%s", algorithm, hashHex), nil +} diff --git a/schema/manifest.go b/schema/manifest.go new file mode 100644 index 000000000..51299f8c6 --- /dev/null +++ b/schema/manifest.go @@ -0,0 +1,74 @@ +// Copyright 2016 The Linux Foundation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package schema + +import ( + "bytes" + "encoding/json" + "fmt" + "io" + "io/ioutil" + + "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/pkg/errors" +) + +// ValidateManifest validates the given CAS blob as +// application/vnd.oci.image.manifest.v1+json. Calls +// ValidateJSONSchema as well. +func ValidateManifest(blob io.Reader, descriptor *v1.Descriptor, strict bool) (err error) { + if descriptor.MediaType != v1.MediaTypeImageManifest { + return fmt.Errorf("unexpected descriptor media type: %q", descriptor.MediaType) + } + + buffer, err := ioutil.ReadAll(blob) + if err != nil { + return errors.Wrapf(err, "unable to read %s", descriptor.Digest) + } + + err = ValidateJSONSchema(bytes.NewReader(buffer), descriptor, strict) + if err != nil { + return err + } + + header := v1.Manifest{} + err = json.Unmarshal(buffer, &header) + if err != nil { + return errors.Wrap(err, "manifest format mismatch") + } + + if header.Config.MediaType != v1.MediaTypeImageConfig { + error := fmt.Errorf("warning: config %s has an unknown media type: %s\n", header.Config.Digest, header.Config.MediaType) + if strict { + return error + } else { + fmt.Println(error) + } + } + + for _, layer := range header.Layers { + if layer.MediaType != v1.MediaTypeImageLayer && + layer.MediaType != v1.MediaTypeImageLayerNonDistributable { + error := fmt.Errorf("warning: layer %s has an unknown media type: %s\n", layer.Digest, layer.MediaType) + if strict { + return error + } else { + fmt.Println(error) + } + } + } + + return nil +} diff --git a/schema/manifest_backwards_compatibility_test.go b/schema/manifest_backwards_compatibility_test.go index 8219800b7..efb9cd961 100644 --- a/schema/manifest_backwards_compatibility_test.go +++ b/schema/manifest_backwards_compatibility_test.go @@ -15,9 +15,6 @@ package schema_test import ( - "crypto/sha256" - "encoding/hex" - "fmt" "strings" "testing" @@ -110,16 +107,14 @@ func TestBackwardsCompatibilityManifestList(t *testing.T) { fail: false, }, } { - sum := sha256.Sum256([]byte(tt.manifest)) - got := fmt.Sprintf("sha256:%s", hex.EncodeToString(sum[:])) - if tt.digest != got { - t.Errorf("test %d: expected digest %s but got %s", i, tt.digest, got) - } - manifest := convertFormats(tt.manifest) - r := strings.NewReader(manifest) - err := schema.MediaTypeManifestList.Validate(r) - + reader := strings.NewReader(manifest) + descriptor := v1.Descriptor{ + MediaType: v1.MediaTypeImageManifestList, + Digest: tt.digest, + Size: int64(len(manifest)), + } + err := schema.Validate(reader, &descriptor, true) if got := err != nil; tt.fail != got { t.Errorf("test %d: expected validation failure %t but got %t, err %v", i, tt.fail, got, err) } @@ -130,6 +125,7 @@ func TestBackwardsCompatibilityManifest(t *testing.T) { for i, tt := range []struct { manifest string digest string + strict bool fail bool }{ // manifest pulled from docker hub using hash value @@ -170,19 +166,18 @@ func TestBackwardsCompatibilityManifest(t *testing.T) { } ] }`, - fail: false, + strict: false, // unrecognized config media type application/octet-stream + fail: false, }, } { - sum := sha256.Sum256([]byte(tt.manifest)) - got := fmt.Sprintf("sha256:%s", hex.EncodeToString(sum[:])) - if tt.digest != got { - t.Errorf("test %d: expected digest %s but got %s", i, tt.digest, got) - } - manifest := convertFormats(tt.manifest) - r := strings.NewReader(manifest) - err := schema.MediaTypeManifest.Validate(r) - + reader := strings.NewReader(manifest) + descriptor := v1.Descriptor{ + MediaType: v1.MediaTypeImageManifest, + Digest: tt.digest, + Size: int64(len(manifest)), + } + err := schema.Validate(reader, &descriptor, tt.strict) if got := err != nil; tt.fail != got { t.Errorf("test %d: expected validation failure %t but got %t, err %v", i, tt.fail, got, err) } @@ -213,16 +208,14 @@ func TestBackwardsCompatibilityConfig(t *testing.T) { fail: false, }, } { - sum := sha256.Sum256([]byte(tt.manifest)) - got := fmt.Sprintf("sha256:%s", hex.EncodeToString(sum[:])) - if tt.digest != got { - t.Errorf("test %d: expected digest %s but got %s", i, tt.digest, got) - } - manifest := convertFormats(tt.manifest) - r := strings.NewReader(manifest) - err := schema.MediaTypeImageConfig.Validate(r) - + reader := strings.NewReader(manifest) + descriptor := v1.Descriptor{ + MediaType: v1.MediaTypeImageConfig, + Digest: tt.digest, + Size: int64(len(manifest)), + } + err := schema.Validate(reader, &descriptor, true) if got := err != nil; tt.fail != got { t.Errorf("test %d: expected validation failure %t but got %t, err %v", i, tt.fail, got, err) } diff --git a/schema/manifest_test.go b/schema/manifest_test.go index f1cb51d02..dc60a753e 100644 --- a/schema/manifest_test.go +++ b/schema/manifest_test.go @@ -19,11 +19,13 @@ import ( "testing" "github.com/opencontainers/image-spec/schema" + "github.com/opencontainers/image-spec/specs-go/v1" ) func TestManifest(t *testing.T) { for i, tt := range []struct { manifest string + strict bool fail bool }{ // expected failure: mediaType does not match pattern @@ -34,7 +36,8 @@ func TestManifest(t *testing.T) { "mediaType": "invalid" } `, - fail: true, + strict: true, + fail: true, }, // expected failure: config.size is a string, expected integer @@ -51,7 +54,8 @@ func TestManifest(t *testing.T) { "layers": [] } `, - fail: true, + strict: true, + fail: true, }, // expected failure: layers.size is string, expected integer @@ -74,7 +78,56 @@ func TestManifest(t *testing.T) { ] } `, - fail: true, + strict: true, + fail: true, + }, + + // expected failure: unrecognized layer media type and strict is true + { + manifest: ` +{ + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "config": { + "mediaType": "application/vnd.oci.image.config.v1+json", + "size": 1470, + "digest": "sha256:c86f7763873b6c0aae22d963bab59b4f5debbed6685761b5951584f6efb0633b" + }, + "layers": [ + { + "mediaType": "application/vnd.other.layer", + "size": "675598", + "digest": "sha256:c86f7763873b6c0aae22d963bab59b4f5debbed6685761b5951584f6efb0633b" + } + ] +} +`, + strict: true, + fail: true, + }, + + // expected success: unrecognized layer media type, but strict is false + { + manifest: ` +{ + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "config": { + "mediaType": "application/vnd.oci.image.config.v1+json", + "size": 1470, + "digest": "sha256:c86f7763873b6c0aae22d963bab59b4f5debbed6685761b5951584f6efb0633b" + }, + "layers": [ + { + "mediaType": "application/vnd.other.layer", + "size": "675598", + "digest": "sha256:c86f7763873b6c0aae22d963bab59b4f5debbed6685761b5951584f6efb0633b" + } + ] +} +`, + strict: false, + fail: true, }, // valid manifest @@ -111,12 +164,23 @@ func TestManifest(t *testing.T) { } } `, - fail: false, + strict: true, + fail: false, }, } { - r := strings.NewReader(tt.manifest) - err := schema.MediaTypeManifest.Validate(r) + manifestBytes := []byte(tt.manifest) + digest, err := schema.DigestByte(manifestBytes, "sha256") + if err != nil { + t.Fatal(err) + } + reader := strings.NewReader(tt.manifest) + descriptor := v1.Descriptor{ + MediaType: v1.MediaTypeImageManifest, + Digest: digest, + Size: int64(len(manifestBytes)), + } + err = schema.Validate(reader, &descriptor, tt.strict) if got := err != nil; tt.fail != got { t.Errorf("test %d: expected validation failure %t but got %t, err %v", i, tt.fail, got, err) } diff --git a/schema/schema.go b/schema/schema.go index 1ca6312c4..d85a41a95 100644 --- a/schema/schema.go +++ b/schema/schema.go @@ -20,26 +20,17 @@ import ( "github.com/opencontainers/image-spec/specs-go/v1" ) -// Media types for the OCI image formats -const ( - MediaTypeDescriptor Validator = v1.MediaTypeDescriptor - MediaTypeManifest Validator = v1.MediaTypeImageManifest - MediaTypeManifestList Validator = v1.MediaTypeImageManifestList - MediaTypeImageConfig Validator = v1.MediaTypeImageConfig - MediaTypeImageLayer unimplemented = v1.MediaTypeImageLayer -) - var ( // fs stores the embedded http.FileSystem // having the OCI JSON schema files in root "/". fs = _escFS(false) - // specs maps OCI schema media types to schema files. - specs = map[Validator]string{ - MediaTypeDescriptor: "content-descriptor.json", - MediaTypeManifest: "image-manifest-schema.json", - MediaTypeManifestList: "manifest-list-schema.json", - MediaTypeImageConfig: "config-schema.json", + // Schemas maps OCI media types to JSON Schema files. + Schemas = map[string]string{ + v1.MediaTypeDescriptor: "content-descriptor.json", + v1.MediaTypeImageManifest: "image-manifest-schema.json", + v1.MediaTypeImageManifestList: "manifest-list-schema.json", + v1.MediaTypeImageConfig: "config-schema.json", } ) diff --git a/schema/spec_test.go b/schema/spec_test.go index 2a6f4a493..d3be1dd3b 100644 --- a/schema/spec_test.go +++ b/schema/spec_test.go @@ -25,6 +25,7 @@ import ( "testing" "github.com/opencontainers/image-spec/schema" + "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" "github.com/russross/blackfriday" ) @@ -73,7 +74,19 @@ func validate(t *testing.T, name string) { continue } - err = schema.Validator(example.Mediatype).Validate(strings.NewReader(example.Body)) + bodyBytes := []byte(example.Body) + digest, err := schema.DigestByte(bodyBytes, "sha256") + if err != nil { + t.Fatal(err) + } + + reader := strings.NewReader(example.Body) + descriptor := v1.Descriptor{ + MediaType: example.Mediatype, + Digest: digest, + Size: int64(len(bodyBytes)), + } + err = schema.Validate(reader, &descriptor, true) if err == nil { printFields(t, "ok", example.Mediatype, example.Title) t.Log(example.Body, "---") diff --git a/schema/validator.go b/schema/validator.go index 432e7b992..623684500 100644 --- a/schema/validator.go +++ b/schema/validator.go @@ -16,60 +16,81 @@ package schema import ( "bytes" - "encoding/json" "fmt" "io" "io/ioutil" + "strings" "github.com/opencontainers/image-spec/specs-go/v1" "github.com/pkg/errors" "github.com/xeipuuv/gojsonschema" ) -// Validator wraps a media type string identifier -// and implements validation against a JSON schema. -type Validator string - -type validateDescendantsFunc func(r io.Reader) error - -var mapValidateDescendants = map[Validator]validateDescendantsFunc{ - MediaTypeManifest: validateManifestDescendants, -} - -// ValidationError contains all the errors that happened during validation. +// ValidationError contains all the errors that happened during +// validation. type ValidationError struct { Errs []error } +// Validator is a template for validating a CAS blob. The 'strict' +// parameter distinguishes between compliant blobs (which should only +// pass when strict is false) and blobs that only use features which +// the spec requires implementations to support (which should pass +// regardless of strict). +type Validator func(blob io.Reader, descriptor *v1.Descriptor, strict bool) (err error) + +// Validators is a map from media types to an appropriate Validator +// function. +var Validators = map[string]Validator{ + v1.MediaTypeDescriptor: ValidateJSONSchema, + v1.MediaTypeImageManifestList: ValidateJSONSchema, + v1.MediaTypeImageManifest: ValidateManifest, + v1.MediaTypeImageConfig: ValidateJSONSchema, +} + func (e ValidationError) Error() string { return fmt.Sprintf("%v", e.Errs) } -// Validate validates the given reader against the schema of the wrapped media type. -func (v Validator) Validate(src io.Reader) error { - buf, err := ioutil.ReadAll(src) +// Validate retrieves the appropriate Validator from Validators and +// uses it to validate the given CAS blob. Validate uses the +// Validator template; see the Validator docs for usage information. +func Validate(blob io.Reader, descriptor *v1.Descriptor, strict bool) (err error) { + validator, ok := Validators[descriptor.MediaType] + if !ok { + return fmt.Errorf("unrecognized media type %q", descriptor.MediaType) + } + return validator(blob, descriptor, strict) +} + +// ValidateJSONSchema validates the given CAS blob against the schema +// for the descriptor's media type. Calls ValidateByteSize and +// ValidateByteDigest as well. +func ValidateJSONSchema(blob io.Reader, descriptor *v1.Descriptor, strict bool) (err error) { + buffer, err := ioutil.ReadAll(blob) if err != nil { - return errors.Wrap(err, "unable to read the document file") + return errors.Wrapf(err, "unable to read %s", descriptor.Digest) } - if f, ok := mapValidateDescendants[v]; ok { - if f == nil { - return fmt.Errorf("internal error: mapValidateDescendents[%q] is nil", v) - } - err = f(bytes.NewReader(buf)) - if err != nil { - return err - } + err = ValidateByteSize(buffer, descriptor) + if err != nil { + return err } - sl := gojsonschema.NewReferenceLoaderFileSystem("file:///"+specs[v], fs) - ml := gojsonschema.NewStringLoader(string(buf)) + err = ValidateByteDigest(buffer, descriptor) + if err != nil { + return err + } - result, err := gojsonschema.Validate(sl, ml) + url := "file:///" + Schemas[descriptor.MediaType] + schemaLoader := gojsonschema.NewReferenceLoaderFileSystem(url, fs) + docLoader := gojsonschema.NewStringLoader(string(buffer)) + + result, err := gojsonschema.Validate(schemaLoader, docLoader) if err != nil { return errors.Wrapf( - WrapSyntaxError(bytes.NewReader(buf), err), - "schema %s: unable to validate", v) + WrapSyntaxError(bytes.NewReader(buffer), err), + "unable to validate JSON Schema for %s", descriptor.Digest) } if result.Valid() { @@ -77,8 +98,8 @@ func (v Validator) Validate(src io.Reader) error { } errs := make([]error, 0, len(result.Errors())) - for _, desc := range result.Errors() { - errs = append(errs, fmt.Errorf("%s", desc)) + for _, description := range result.Errors() { + errs = append(errs, fmt.Errorf("%s", description)) } return ValidationError{ @@ -86,34 +107,33 @@ func (v Validator) Validate(src io.Reader) error { } } -type unimplemented string - -func (v unimplemented) Validate(src io.Reader) error { - return fmt.Errorf("%s: unimplemented", v) -} - -func validateManifestDescendants(r io.Reader) error { - header := v1.Manifest{} - - buf, err := ioutil.ReadAll(r) - if err != nil { - return errors.Wrapf(err, "error reading the io stream") +// ValidateByteDigest checks the digest of blob against the expected +// descriptor.Digest. +func ValidateByteDigest(blob []byte, descriptor *v1.Descriptor) (err error) { + fields := strings.SplitN(descriptor.Digest, ":", 2) + if len(fields) != 2 { + return fmt.Errorf("invalid digest: %q, %v", descriptor.Digest, fields) } - - err = json.Unmarshal(buf, &header) + algorithm := fields[0] + actualDigest, err := DigestByte(blob, algorithm) if err != nil { - return errors.Wrap(err, "manifest format mismatch") + return err } - if header.Config.MediaType != string(v1.MediaTypeImageConfig) { - fmt.Printf("warning: config %s has an unknown media type: %s\n", header.Config.Digest, header.Config.MediaType) + if actualDigest != descriptor.Digest { + return fmt.Errorf("unexpected digest for %s: %s", descriptor.Digest, actualDigest) } - for _, layer := range header.Layers { - if layer.MediaType != string(v1.MediaTypeImageLayer) && - layer.MediaType != string(v1.MediaTypeImageLayerNonDistributable) { - fmt.Printf("warning: layer %s has an unknown media type: %s\n", layer.Digest, layer.MediaType) - } + return nil +} + +// ValidateByteSize checks the size of blob against the expected +// descriptor.Size. This isn't very complicated; the function is +// mostly useful for generating consistent error messages. +func ValidateByteSize(blob []byte, descriptor *v1.Descriptor) (err error) { + if descriptor.Size > 0 && int64(len(blob)) != descriptor.Size { + return fmt.Errorf("unexpected size for %s: %d != %d", descriptor.Digest, len(blob), descriptor.Size) } + return nil }