diff --git a/CHANGELOG.md b/CHANGELOG.md index d0bd782..a935e61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,19 +16,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -## [v2.0.0] - 2021-06-23 +## [v2.0.0] - 2024-08-30 ### Added -- `storage:schemes`, `storage:refs` and Storage Scheme Object +- `storage:schemes`, `storage:ref` and Storage Scheme Object - Support the storage extension in Links - Support for the Alternate Assets Extension - Support for other storage providers, including custom S3 hosts ### Changed +- The extension is a framework for storage providers, it doesn't strictly define the individual providers. - The storage providers are grouped in `storage:schemes` and located in the Item Properties, Collections or Catalog metadata -- Assets and Links reference the storage schemes by key in `storage:refs` +- Assets and Links reference the storage schemes by key in `storage:ref` ### Removed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..be7bbc6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,33 @@ +# Contributing + +All contributions are subject to the +[STAC Specification Code of Conduct](https://github.com/radiantearth/stac-spec/blob/master/CODE_OF_CONDUCT.md). +For contributions, please follow the +[STAC specification contributing guide](https://github.com/radiantearth/stac-spec/blob/master/CONTRIBUTING.md) Instructions +for running tests are copied here for convenience. + +## Running tests + +The same checks that run as checks on PR's are part of the repository and can be run locally to verify that changes are valid. +To run tests locally, you'll need `npm`, which is a standard part of any [node.js installation](https://nodejs.org/en/download/). + +First you'll need to install everything with npm once. Just navigate to the root of this repository and on +your command line run: + +```bash +npm install +``` + +Then to check markdown formatting and test the examples against the JSON schema, you can run: + +```bash +npm test +``` + +This will spit out the same texts that you see online, and you can then go and fix your markdown or examples. + +If the tests reveal formatting problems with the examples, you can fix them with: + +```bash +npm run format-examples +``` diff --git a/README.md b/README.md index ff5762b..02e4ccd 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,8 @@ It allows adding details related to cloud object storage access and costs to be This extension does not cover NFS solutions provided by PaaS cloud companies. - Examples: - - [NAIP Item](examples/item-naip.json): Shows a mixture of storage providers, including custom S3 hosts. - - [NSL Item](examples/item-nsl.json): Shows the usage of the extension in combination with the - [alternate asset extension](https://github.com/stac-extensions/alternate-assets). + - [NAIP Item with Alternate Assets](examples/item-naip.json): Shows a mixture of storage providers, including custom S3 hosts + and the [alternate assets extension](https://github.com/stac-extensions/alternate-assets). - [Catalog with Link](examples/catalog-link.json): Shows the usage of the extension on a link in a STAC Catalog. - [Collection with Auth](examples/catalog-link.json): Shows the usage of the extension in a STAC Collecion in combination with the [authentication extension](https://github.com/stac-extensions/authentication). @@ -46,82 +45,56 @@ The fields in the table below can be used in these parts of STAC documents: - [x] Links - [x] [Alternate Assets Object](https://github.com/stac-extensions/alternate-assets?tab=readme-ov-file#alternate-asset-object) -| Field Name | Type | Description | -| -------------- | ---------- | ----------- | -| `storage:refs` | \[string\] | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | +| Field Name | Type | Description | +| ------------- | ------- | ----------- | +| `storage:ref` | string | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | ### Storage Scheme Object | Field Name | Type | Description | | -------------- | ------- | ----------- | -| platform | string | **REQUIRED.** The [cloud provider](#platforms) where data is stored. | -| region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider) | -| requester_pays | boolean | Is the data requester pays or is it data manager/cloud provider pays. Defaults to `false` | -| tier | string | The title for the tier type (as defined by PaaS provider) | +| platform | string | **REQUIRED.** The cloud provider where data is stored as URI or URI template to the API. | +| region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider). | +| requester_pays | boolean | Is the data "requester pays" (`true`) or is it "data manager/cloud provider pays" (`false`). Defaults to `false`. | +| ... | ... | Additional properties as defined in the URL template or in the platform specific documents. | -The properties `title` and `description` as defined in Common Metadata can be used as well. +The properties `title` and `description` as defined in Common Metadata should be used as well. -#### Platforms +#### platform -The `platform` field identifies the cloud provider where the data is stored. +The `platform` field identifies the cloud provider where the data is stored as URI or URI template to the API of the service. -There are a couple of pre-defined values for common providers: +If a URI template is provided, all variables must be defined in the Storage Scheme Object as a property with the same name. +For example, the URI template `https://{bucket}.{region}.example.com` must have at least the properties +`bucket` and `region` defined: -- Alibaba Cloud (Aliyun): `ALIBABA` -- Amazon AWS: `AWS` -- Microsoft Azure: `AZURE` -- Google Cloud Platform: `GCP` -- IBM Cloud: `IBM` -- Oracle Cloud: `ORACLE` - -All other PaaS solutions must use a unique URL to the service. +```json +{ + "platform": "https://{bucket}.{region}.example.com", + "region": "eu-fr", + "bucket": "john-doe-stac", + "requester_pays": true +} +``` In case an `href` contains a non-HTTP URL that is not directly resolvable, the `platform` property must identify the host so that the URL can be resolved without further information. -This is especially useful to provide the endpoint URL for custom S3 providers. -In this case the `platform` is effectively the endpoint URL. - -#### Tiers - -Recommended values for the `tier` field: - -| Minimum Duration | [Google Cloud Platform](https://cloud.google.com/storage/docs/storage-classes) | [Amazon AWS](https://aws.amazon.com/s3/storage-classes/) | [Microsoft Azure](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-storage-tiers) | [IBM Cloud](https://cloud.ibm.com/objectstorage/create#pricing) | [Oracle Cloud](https://www.oracle.com/cloud/storage/pricing.html) | [Alibaba Cloud](https://www.alibabacloud.com/product/oss/pricing) | -| ------------- | --------- | ------------------------ | ------- |---------- | ----------------- | ----------------- | -| 0 (Auto-Tier) | | Intelligent-Tiering | | Smart Tier | -| 0 days | STANDARD | Standard | hot | Standard | Standard | Standard | -| 30 days | NEARLINE | Standard-IA, One Zone-IA | cool | Vault | Infrequent Access | Infrequent Access | -| 60 days | | | | | | Archive | -| 90 days | COLDLINE | Glacier | | Cold Vault | Archive | | -| 180 days | | Glacier Deep Archive | archive | | | Cold Archive | -| 365 days | ARCHIVE | | | | | | - -## Contributing - -All contributions are subject to the -[STAC Specification Code of Conduct](https://github.com/radiantearth/stac-spec/blob/master/CODE_OF_CONDUCT.md). -For contributions, please follow the -[STAC specification contributing guide](https://github.com/radiantearth/stac-spec/blob/master/CONTRIBUTING.md) Instructions -for running tests are copied here for convenience. +For example, this is especially useful to provide the endpoint URL for custom S3 providers. +In this case the `platform` could effectively provide the endpoint URL. -### Running tests +We try to collect pre-defined templates and best pratices for as many providers as possible +in this repository, but be aware that these are not part of the official extension releases +and are not validated. This extension just provides the framework, the provider best pratices +may change at any time without a new version of this extension being released. -The same checks that run as checks on PR's are part of the repository and can be run locally to verify that changes are valid. -To run tests locally, you'll need `npm`, which is a standard part of any [node.js installation](https://nodejs.org/en/download/). +The following providers have defined best pratices at this point: -First you'll need to install everything with npm once. Just navigate to the root of this repository and on -your command line run: -```bash -npm install -``` +- [AWS S3](platforms/aws-s3.md) +- [Generic S3 (non-AWS)](platforms/s3.md) +- [Microsoft Azure](platforms/ms-azure.md) -Then to check markdown formatting and test the examples against the JSON schema, you can run: -```bash -npm test -``` +Feel encouraged to submit additional platform specifications via Pull Requests. -This will spit out the same texts that you see online, and you can then go and fix your markdown or examples. +## Contributing -If the tests reveal formatting problems with the examples, you can fix them with: -```bash -npm run format-examples -``` +See the [Contributor documentation](CONTRIBUTING.md) for details. diff --git a/examples/catalog-link.json b/examples/catalog-link.json index d3a8de9..0bb9141 100644 --- a/examples/catalog-link.json +++ b/examples/catalog-link.json @@ -9,10 +9,10 @@ "description": "An example catalog with a link to documentation on object storage.", "storage:schemes": { "aws": { - "platform": "AWS", + "platform": "https://{bucket}.s3.{region}.amazonaws.com", + "bucket": "mybucket", "region": "us-west-2", - "requester_pays": true, - "tier": "Standard" + "requester_pays": true } }, "links": [ @@ -25,9 +25,7 @@ "href": "s3://mybucket/project/documentation.pdf", "type": "application/pdf", "rel": "about", - "storage:refs": [ - "aws" - ] + "storage:ref": "aws" } ] } \ No newline at end of file diff --git a/examples/collection.json b/examples/collection.json index da32b39..270e660 100644 --- a/examples/collection.json +++ b/examples/collection.json @@ -11,7 +11,8 @@ "license": "CC-0", "storage:schemes": { "aws": { - "platform": "AWS", + "platform": "https://{bucket}.s3.{region}.amazonaws.com", + "bucket": "mybucket", "region": "us-west-2", "requester_pays": true, "tier": "Standard" @@ -27,9 +28,7 @@ "title": "STAC Items as GeoParquet", "href": "s3://mybucket/project/items.parquet", "type": "application/vnd.apache.parquet", - "storage:refs": [ - "aws" - ], + "storage:ref": "aws", "auth:refs": [ "aws" ] @@ -45,9 +44,7 @@ "href": "s3://mybucket/project/documentation.pdf", "type": "application/pdf", "rel": "about", - "storage:refs": [ - "aws" - ], + "storage:ref": "aws", "auth:refs": [ "aws" ] diff --git a/examples/item-naip.json b/examples/item-naip.json index 63eb687..6af3f26 100644 --- a/examples/item-naip.json +++ b/examples/item-naip.json @@ -2,7 +2,8 @@ "stac_version": "1.0.0", "stac_extensions": [ "https://stac-extensions.github.io/storage/v2.0.0/schema.json", - "https://stac-extensions.github.io/version/v1.2.0/schema.json" + "https://stac-extensions.github.io/version/v1.2.0/schema.json", + "https://stac-extensions.github.io/alternate-assets/v1.1.0/schema.json" ], "id": "m_3009743_sw_14_1_20160928_20161129", "bbox": [ @@ -45,35 +46,16 @@ "platform": "UNKNOWN_PLATFORM", "gsd": 1, "storage:schemes": { - "az-wus2-arc": { - "platform": "AZURE", - "region": "westus2", - "tier": "archive" - }, - "gs-cld": { - "platform": "GCP", - "region": "us-central1", - "requester_pays": true, - "tier": "COLDLINE" + "az-wus2-ar": { + "platform": "https://{account}.blob.core.windows.net", + "account": "jon-doe-123", + "region": "westus2" }, "aws-std": { - "platform": "AWS", + "platform": "https://{bucket}.s3.{region}.amazonaws.com", + "bucket": "naip-visualization", "region": "us-west-2", - "requester_pays": true, - "tier": "Standard" - }, - "az-weu-hot": { - "platform": "AZURE", - "region": "westeurope", - "requester_pays": false, - "tier": "hot" - }, - "az-eus-hot": { - "platform": "AZURE", - "region": "eastus", - "requester_pays": false, - "tier": "hot", - "deprecated": true + "requester_pays": true }, "minio": { "platform": "https://play.min.io:9000" @@ -82,62 +64,18 @@ }, "assets": { "CO_GEOTIFF_RGB": { - "href": "s3://mybucket/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "minio" - ] - }, - "CO_GEOTIFF_AWS_RGB": { "href": "s3://naip-visualization/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "aws-std" - ] + "storage:ref": "aws-std", + "alternate": { + "href": "s3://mybucket/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", + "storage:ref": "minio" + } }, "GEOTIFF_AZURE_RGBIR": { "href": "https://naip-nsl.blob.core.windows.net/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", "type": "image/tiff; application=geotiff", - "storage:refs": [ - "az-wus2-ar" - ] - }, - "CO_GEOTIFF_GCP_RGB": { - "href": "gs://naip-data/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "gs-cld" - ] - }, - "CO_GEOTIFF_AZURE_RGB": { - "href": "https://naipeuwest.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "az-weu-hot" - ] - }, - "CO_GEOTIFF_AZURE_RGB_DEPRECATED": { - "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "az-eus-hot" - ], - "deprecated": true - }, - "THUMBNAIL": { - "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.200.jpg", - "type": "image/jpeg", - "storage:refs": [ - "minio" - ] - }, - "THUMBNAIL_AZURE_DEPRECATED": { - "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.200.jpg", - "type": "image/jpeg", - "storage:refs": [ - "az-eus-hot" - ], - "deprecated": true + "storage:ref": "az-wus2-ar" } }, "links": [ diff --git a/examples/item-nsl.json b/examples/item-nsl.json deleted file mode 100644 index 9b0c996..0000000 --- a/examples/item-nsl.json +++ /dev/null @@ -1,101 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/storage/v2.0.0/schema.json", - "https://stac-extensions.github.io/alternate-assets/v1.0.0/schema.json" - ], - "id": "20190822T183518Z_746_POM1_ST2_P", - "type": "Feature", - "bbox": [ - -97.7466867683867, - 30.278398961994966, - -97.72990596574927, - 30.288621181865743 - ], - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [ - -97.7466867683867, - 30.28754662370266 - ], - [ - -97.74555747279238, - 30.278398961994966 - ], - [ - -97.72990596574927, - 30.27972380176124 - ], - [ - -97.73085242627444, - 30.288621181865743 - ], - [ - -97.7466867683867, - 30.28754662370266 - ] - ] - ] - }, - "properties": { - "datetime": "2019-08-22T18:35:18+00:00", - "mission": "SWIFT", - "platform": "SWIFT_2", - "instrument": "POM_1", - "gsd": 0.20000000298023224, - "storage:schemes": { - "gcp-std": { - "platform": "GCP", - "region": "us-central1", - "requester_pays": true, - "tier": "STANDARD" - }, - "aws-glc": { - "platform": "AWS", - "region": "us-central-1", - "requester_pays": true, - "tier": "Glacier" - } - } - }, - "assets": { - "GEOTIFF_RGB": { - "href": "gs://swiftera-processed-data/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "gcp-std" - ], - "alternate": { - "aws": { - "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", - "storage:refs": [ - "aws-std" - ] - } - } - }, - "THUMBNAIL_RGB": { - "href": "gs://swiftera-processed-data/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", - "type": "image/png", - "storage:refs": [ - "gcp-std" - ], - "alternate": { - "aws": { - "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", - "storage:refs": [ - "aws-std" - ] - } - } - } - }, - "links": [ - { - "href": "https://example.com/examples/item-nsl.json", - "rel": "self" - } - ] -} \ No newline at end of file diff --git a/json-schema/schema.json b/json-schema/schema.json index 5087247..9ab8a25 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -101,24 +101,9 @@ "properties": { "platform": { "title": "Platform", - "oneOf": [ - { - "type": "string", - "enum": [ - "AWS", - "GCP", - "AZURE", - "IBM", - "ALIBABA", - "ORACLE" - ] - }, - { - "type": "string", - "format": "iri", - "pattern": "^[\\w\\+.-]+://" - } - ] + "type": "string", + "format": "uri-template", + "pattern": "^[\\w\\+.-]+://" }, "region": { "title": "Region", @@ -128,12 +113,9 @@ "type": "boolean", "title": "Requester pays", "default": false - }, - "tier": { - "title": "Tier", - "type": "string" } - } + }, + "additionalProperties": true } }, "additionalProperties": false @@ -147,12 +129,9 @@ "refs_field": { "type": "object", "properties": { - "storage:refs": { - "type": "array", - "items": { - "type": "string", - "minLength": 1 - } + "storage:ref": { + "type": "string", + "minLength": 1 } }, "patternProperties": { diff --git a/platforms/aws-s3.md b/platforms/aws-s3.md new file mode 100644 index 0000000..93fde52 --- /dev/null +++ b/platforms/aws-s3.md @@ -0,0 +1,7 @@ +# AWS S3 + +This defines the Amazon Web Services (AWS) S3 interface. + +- `platform`: `https://{bucket}.s3.{region}.amazonaws.com` +- `bucket`: The bucket name +- `region`: One of the S3 regions (lowercase) diff --git a/platforms/ms-azure.md b/platforms/ms-azure.md new file mode 100644 index 0000000..e7c968d --- /dev/null +++ b/platforms/ms-azure.md @@ -0,0 +1,6 @@ +# Microsoft Azure + +This defines the Microsoft Azure interface. + +- `platform`: `https://{account}.blob.core.windows.net` +- `account`: The Microsoft account identifier diff --git a/platforms/s3.md b/platforms/s3.md new file mode 100644 index 0000000..7809afc --- /dev/null +++ b/platforms/s3.md @@ -0,0 +1,5 @@ +# S3 (non-AWS) + +This defines the S3 interface for providers other than AWS (e.g. minio-based). + +- `platform`: The API URL, must be the endpoint URL that can be used for the AWS CLI for example.