Skip to content

Commit

Permalink
Merge pull request #83 from worldbank/revert-81-feature/ntl_metadata
Browse files Browse the repository at this point in the history
Revert "[WIP] Feature/ntl metadata"
  • Loading branch information
andresfchamorro authored Nov 4, 2024
2 parents 14b1bf6 + b1176be commit 6c539ab
Show file tree
Hide file tree
Showing 13 changed files with 125 additions and 591 deletions.
12 changes: 3 additions & 9 deletions space2stats_api/src/README.md
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
## space2stats

### Generating Preliminary CATALOG, COLLECTION, and ITEM files
### Generating STAC files
- Navigate to the METADATA sub-directory and run the following commands in order:
1. get_types.py
2. create_stac.py
- Note that the get types function is reading in a parquet file from the following directory: space2stats_api/src/local.parquet
- Here is a workflow diagram of the initial STAC metadata creation:
- Here is a workflow diagram of the STAC metadata creation:

![Create Stac](../../docs/images/create_stac_workflow.png)

### Adding new ITEM files
- In link_new_item.py set "Paths and metadata setup" in the main function to point towards the corresponding locally saved parquet file
- Navigate to the METADATA sub-directory and run the following commands in order:
1. get_types.py
2. line_new_items.py
![Create Stac](../../docs/images/create_stac_workflow.png)
Binary file not shown.
37 changes: 21 additions & 16 deletions space2stats_api/src/space2stats_ingest/METADATA/create_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ def load_metadata(file: str) -> Dict[str, pd.DataFrame]:


# Function to create STAC catalog
def create_stac_catalog(overview: pd.DataFrame, nada: pd.DataFrame) -> Catalog:
def create_stac_catalog(
overview: pd.DataFrame, nada: pd.DataFrame, catalog_dir: str
) -> Catalog:
catalog = Catalog(
id="space2stats-catalog",
description=overview.loc["Description Resource"].values[0],
Expand All @@ -64,6 +66,8 @@ def create_stac_catalog(overview: pd.DataFrame, nada: pd.DataFrame) -> Catalog:
href="https://worldbank.github.io/DECAT_Space2Stats/stac/catalog.json",
)

# catalog.set_self_href(os.path.relpath("catalog.json", start=catalog_dir))

return catalog


Expand Down Expand Up @@ -108,11 +112,11 @@ def create_stac_collection(overview: pd.DataFrame) -> Collection:


# Function to create STAC Item from GeoDataFrame
def create_stac_item(column_types: dict, metadata: pd.DataFrame) -> Item:
def create_stac_item(
column_types: dict, feature_catalog: pd.DataFrame, item_dir: str
) -> Item:
data_dict = []

feature_catalog = metadata["feature_catalog"]

for column, dtype in column_types.items():
description = feature_catalog.loc[
feature_catalog["variable"] == column, "description"
Expand Down Expand Up @@ -150,35 +154,34 @@ def create_stac_item(column_types: dict, metadata: pd.DataFrame) -> Item:
89.98750455101016,
]

sources = metadata["sources"]
pop_metadata = sources[sources["Name"] == "Population"].iloc[0]
item = Item(
id="space2stats_population_2020",
geometry=geom,
bbox=bbox,
datetime=datetime.now(),
properties={
"name": pop_metadata["Name"],
"description": pop_metadata["Description"],
"methodological_notes": pop_metadata["Methodological Notes"],
"source_data": pop_metadata["Source Data"],
"sci:citation": pop_metadata["Citation source"],
"organization": pop_metadata["Organization"],
"method": pop_metadata["Method"],
"resolution": pop_metadata["Resolution"],
"name": "Population Data",
"description": "Gridded population disaggregated by gender for the year 2020, with data available for different age groups.",
"methodological_notes": "Global raster files are processed for each hexagonal grid using zonal statistics.",
"source_data": "WorldPop gridded population, 2020, Unconstrained, UN-Adjusted",
"sci:citation": "Stevens FR, Gaughan AE, Linard C, Tatem AJ (2015) Disaggregating Census Data for Population Mapping Using Random Forests with Remotely-Sensed and Ancillary Data.",
"organization": "WorldPop, https://www.worldpop.org",
"method": "sum",
"resolution": "100 meters",
"table:primary_geometry": "geometry",
"table:columns": data_dict,
"vector:layers": {
"space2stats": column_types_with_geometry,
},
"themes": pop_metadata["Theme"],
"themes": ["Demographics", "Population"],
},
stac_extensions=[
"https://stac-extensions.github.io/table/v1.2.0/schema.json",
"https://stac-extensions.github.io/scientific/v1.0.0/schema.json",
],
)

# item.set_self_href(os.path.join("items", f"{item.id}.json"))
return item


Expand Down Expand Up @@ -229,6 +232,7 @@ def main():
catalog = create_stac_catalog(
metadata["overview"],
metadata["nada"],
join(git_root, metadata_dir, "stac"),
)

# Create STAC collection
Expand All @@ -237,7 +241,8 @@ def main():
# Create STAC item
item = create_stac_item(
column_types,
metadata,
metadata["feature_catalog"],
join(git_root, metadata_dir, "stac"),
)

# Add assets to item
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def save_parquet_types_to_json(parquet_file: str, json_file: str):

if __name__ == "__main__":
git_root = get_git_root()
parquet_file = join(git_root, "space2stats_api/src/ntl2012.parquet")
parquet_file = join(git_root, "space2stats_api/src/space2stats.parquet")
json_file = join(
git_root, "space2stats_api/src/space2stats_ingest/METADATA/types.json"
)
Expand Down
151 changes: 0 additions & 151 deletions space2stats_api/src/space2stats_ingest/METADATA/link_new_item.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@
"href": "./space2stats_population_2020/space2stats_population_2020.json",
"type": "application/json",
"title": "Space2Stats Population Data Item"
},
{
"rel": "item",
"href": "./space2stats_ntl_2013/space2stats_ntl_2013.json",
"type": "application/json",
"title": "Space2Stats NTL 2013 Data Item"
}
],
"Title": "Space2Stats Database",
Expand All @@ -38,28 +32,12 @@
"hexagons",
"global"
],
"title": "Space2Stats Collection",
"extent": {
"spatial": {
"bbox": [
[
-180.0,
-90.0,
180.0,
90.0
]
]
},
"temporal": {
"interval": [
[
"2020-01-01T00:00:00Z",
null
]
]
"summaries": {
"datetime": {
"min": "2020-01-01T00:00:00Z",
"max": null
}
},
"license": "CC-BY-4.0",
"providers": [
{
"name": "World Bank",
Expand All @@ -70,12 +48,6 @@
"url": "https://www.worldbank.org/"
}
],
"summaries": {
"datetime": {
"min": "2020-01-01T00:00:00Z",
"max": null
}
},
"assets": {
"documentation": {
"href": "https://space2stats.ds.io/docs",
Expand All @@ -85,5 +57,27 @@
"metadata"
]
}
}
},
"title": "Space2Stats Collection",
"extent": {
"spatial": {
"bbox": [
[
-180.0,
-90.0,
180.0,
90.0
]
]
},
"temporal": {
"interval": [
[
"2020-01-01T00:00:00Z",
null
]
]
}
},
"license": "CC-BY-4.0"
}
Loading

0 comments on commit 6c539ab

Please sign in to comment.