Skip to content

Commit

Permalink
Fix gcts (#3)
Browse files Browse the repository at this point in the history
* filename

* dask complete install

* revised workflow

* more flexible dask client manager

* add features to gdf

* only detect antimerdian crosses

* revised gcts workflow and  schema

* lints

* prepare run on slurm

* revised launch settings

* revised configs

* .

* .

* .

* avoid scattering

* .

* wip

* wip

* typo

* wip

* more workers

* try adding countries around antimeridian

* comment sampling

* fix: geometry type as wkb and not bytes

* revised stacs

* revised generic buffer func

* add offset rectangle

* fix suffix error in name data

* fix pd type checking

* fix rotation angle

---------

Co-authored-by: floriscalkoen <floris_calkoen@hotmail.com>
Co-authored-by: Floris Calkoen <floriscalkoen@users.noreply.github.com>
  • Loading branch information
3 people authored Sep 2, 2024
1 parent 924e728 commit be62472
Show file tree
Hide file tree
Showing 18 changed files with 844 additions and 403 deletions.
19 changes: 11 additions & 8 deletions analytics/hypsometry.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
"GCTS_LANDWARD_CONTAINER = \"az://public/coastal-analytics/gcts-2000m-landward.parquet\"\n",
"# NOTE: before we stored the results here, keep for a while as ref\n",
"GCTS_ELEVATION_CONTAINER = \"az://public/coastal-analytics/gcts-2000m-elevation.parquet\"\n",
"# NOTE: Next iteration we will store it here and extract the profiles including the tr_name\n",
"# NOTE: Next iteration we will store it here and extract the profiles including the transect_id\n",
"# GCTS_ELEVATION_CONTAINER = \"az://coastal-transect-repository/deltadtm-elevation.parquet\"\n",
"H3_ELEVATION_CONTAINER = (\n",
" f\"az://public/coastal-analytics/h3-l{H3_LEVEL}-pct-lt-{LOWER_THAN}m.parquet\"\n",
Expand Down Expand Up @@ -131,11 +131,14 @@
"metadata": {},
"outputs": [],
"source": [
"from coastpy.utils.dask_utils import create_dask_client\n",
"from coastpy.utils.dask import DaskClientManager\n",
"\n",
"instance_type = configure_instance()\n",
"client = create_dask_client(instance_type)\n",
"client"
"client = DaskClientManager().create_client(\n",
" instance_type,\n",
" threads_per_worker=1,\n",
" processes=True,\n",
" )\n"
]
},
{
Expand Down Expand Up @@ -168,7 +171,7 @@
" return shapely.LineString([p1, p2])\n",
"\n",
" geoms = df.apply(extract_landward_side, axis=1)\n",
" return gpd.GeoDataFrame(df[\"tr_name\"], geometry=geoms, crs=4326)\n",
" return gpd.GeoDataFrame(df[\"transect_id\"], geometry=geoms, crs=4326)\n",
"\n",
" gcts_collection = coclico_catalog.get_child(\"gcts\")\n",
" gcts_extents = read_items_extent(gcts_collection, columns=[\"geometry\", \"assets\"])\n",
Expand All @@ -177,15 +180,15 @@
" # template GDF that matches what is retunred from map_extract_landward_side\n",
" META = gpd.GeoDataFrame(\n",
" {\n",
" \"tr_name\": gpd.GeoSeries([], dtype=str),\n",
" \"transect_id\": gpd.GeoSeries([], dtype=str),\n",
" \"geometry\": gpd.GeoSeries([], dtype=GeometryDtype()),\n",
" }\n",
" )\n",
"\n",
" transects = dask_geopandas.read_parquet(\n",
" gcts_hrefs,\n",
" storage_options=storage_options,\n",
" columns=[\"tr_name\", \"geometry\", \"lon\", \"lat\"],\n",
" columns=[\"transect_id\", \"geometry\", \"lon\", \"lat\"],\n",
" )\n",
"\n",
" transects = transects.map_partitions(map_extract_landward_side, meta=META)\n",
Expand Down Expand Up @@ -241,7 +244,7 @@
" da = da.where(da != da.rio.nodata, np.nan)\n",
" da = da.rio.write_nodata(np.nan)\n",
"\n",
" # TODO: ensure that tr_name is tracked so that we can use the elevation data later at a transect level\n",
" # TODO: ensure that transect_id is tracked so that we can use the elevation data later at a transect level\n",
" clipped = da.rio.clip(transects.geometry.to_list()).rename(\"band_data\")\n",
"\n",
" df = (\n",
Expand Down
2 changes: 1 addition & 1 deletion ci/envs/312-coastal-full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ dependencies:
- cartopy
- cfgrib
- contextily
- dask
- dask[complete]
- dask-geopandas
- dask-image
- dask-jobqueue
Expand Down
42 changes: 24 additions & 18 deletions scripts/python/add_gcts_to_stac.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,20 @@
storage_account_name = "coclico"
storage_options = {"account_name": storage_account_name, "credential": sas_token}

# NOTE:
TEST_RELEASE = True

# Container and URI configuration
CONTAINER_NAME = "gcts"
PREFIX = "release/2024-03-18"
RELEASE_DATE = "2024-08-02"
PREFIX = f"release/{RELEASE_DATE}"
CONTAINER_URI = f"az://{CONTAINER_NAME}/{PREFIX}"
PARQUET_MEDIA_TYPE = "application/vnd.apache.parquet"
LICENSE = "CC-BY-4.0"

# Collection information
COLLECTION_ID = "gcts"
COLLECTION_TITLE = "Global Coastal Transect System (GCTS)"
DATE_TRANSECTS_CREATED = "2024-03-18"

# Transect and zoom configuration
TRANSECT_LENGTH = 2000
Expand All @@ -56,11 +59,14 @@
ASSET_DESCRIPTION = f"Parquet dataset with coastal transects ({TRANSECT_LENGTH} m) at 100 m alongshore resolution for this region."

# GeoParquet STAC items
GEOPARQUET_STAC_ITEMS_HREF = f"az://items/{COLLECTION_ID}.parquet"
if TEST_RELEASE:
GEOPARQUET_STAC_ITEMS_HREF = f"az://items-test/{COLLECTION_ID}.parquet"
else:
GEOPARQUET_STAC_ITEMS_HREF = f"az://items/{COLLECTION_ID}.parquet"

COLUMN_DESCRIPTIONS = [
{
"name": "tr_name",
"name": "transect_id",
"type": "string",
"description": "A unique identifier for each transect, constructed from three key components: the 'coastline_id', 'segment_id', and 'interpolated_distance'. The 'coastline_id' corresponds to the FID in OpenStreetMap (OSM) and is prefixed with 'cl'. The 'segment_id' indicates the segment of the OSM coastline split by a UTM grid, prefixed with 's'. The 'interpolated_distance' represents the distance from the starting point of the coastline to the transect, interpolated along the segment, and is prefixed with 'tr'. The complete structure is 'cl[coastline_id]s[segment_id]tr[interpolated_distance]', exemplified by 'cl32946s04tr08168547'. This composition ensures each transect name is a distinct and informative representation of its geographical and spatial attributes.",
},
Expand All @@ -85,17 +91,17 @@
"description": "Well-Known Binary (WKB) representation of the transect as a linestring geometry.",
},
{
"name": "coastline_is_closed",
"name": "osm_coastline_is_closed",
"type": "bool",
"description": "Indicates whether the source OpenStreetMap (OSM) coastline, from which the transects were derived, forms a closed loop. A value of 'true' suggests that the coastline represents an enclosed area, such as an island.",
},
{
"name": "coastline_length",
"name": "osm_coastline_length",
"type": "int32",
"description": "Represents the total length of the source OpenStreetMap (OSM) coastline, that is summed across various UTM regions. It reflects the aggregate length of the original coastline from which the transects are derived.",
},
{
"name": "utm_crs",
"name": "utm_epsg",
"type": "int32",
"description": "EPSG code representing the UTM Coordinate Reference System for the transect.",
},
Expand All @@ -110,24 +116,24 @@
"description": "QuadKey corresponding to the transect origin location at zoom 12, following the Bing Maps Tile System for spatial indexing.",
},
{
"name": "isoCountryCodeAlpha2",
"name": "continent",
"type": "string",
"description": "ISO 3166-1 alpha-2 country code for the country in which the transect is located.",
"description": "Name of the continent in which the transect is located.",
},
{
"name": "admin_level_1_name",
"name": "country",
"type": "string",
"description": "Name of the first-level administrative division (e.g., country) in which the transect is located.",
"description": "ISO alpha-2 country code for the country in which the transect is located. The country data are extracted from Overture Maps (divisions).",
},
{
"name": "isoSubCountryCode",
"name": "common_country_name",
"type": "string",
"description": "ISO code for the sub-country or second-level administrative division in which the transect is located.",
"description": "Common country name (EN) in which the transect is located. The country data are extracted from Overture Maps (divisions).",
},
{
"name": "admin_level_2_name",
"name": "common_region_name",
"type": "string",
"description": "Name of the second-level administrative division (e.g., state or province) in which the transect is located.",
"description": "Common region name (EN) in which the transect is located. The regions are extracted from Overture Maps (divisions).",
},
]

Expand Down Expand Up @@ -192,7 +198,7 @@ def create_collection(
),
]

start_datetime = datetime.datetime.strptime(DATE_TRANSECTS_CREATED, "%Y-%m-%d")
start_datetime = datetime.datetime.strptime(RELEASE_DATE, "%Y-%m-%d")

extent = pystac.Extent(
pystac.SpatialExtent([[-180.0, 90.0, 180.0, -90.0]]),
Expand Down Expand Up @@ -276,7 +282,7 @@ def create_collection(
collection.stac_extensions.append(stac_table.SCHEMA_URI)

VersionExtension.add_to(collection)
collection.extra_fields["version"] = "1.0.0"
collection.extra_fields["version"] = RELEASE_DATE

return collection

Expand Down Expand Up @@ -304,7 +310,7 @@ def create_item(
"description": ASSET_DESCRIPTION,
}

dt = datetime.datetime.strptime(DATE_TRANSECTS_CREATED, "%Y-%m-%d")
dt = datetime.datetime.strptime(RELEASE_DATE, "%Y-%m-%d")
# shape = shapely.box(*bbox)
# geometry = shapely.geometry.mapping(shape)
template = pystac.Item(
Expand Down
Loading

0 comments on commit be62472

Please sign in to comment.