Skip to content

Commit

Permalink
refactor: check for metadata before replacing
Browse files Browse the repository at this point in the history
Prior to replacement, verify the presence of a metadata document in
GBIF. This precaution prevents potential errors when attempting to
replace a metadata document that does not currently exist.
  • Loading branch information
clnsmth authored Sep 8, 2023
1 parent c9ab920 commit 01ddd6c
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
5 changes: 3 additions & 2 deletions src/gbif_registrar/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
from requests import post, get, delete
from gbif_registrar.config import USER_NAME, PASSWORD, GBIF_API, REGISTRY_BASE_URL
from gbif_registrar.utilities import read_local_dataset_metadata
from gbif_registrar.utilities import read_local_dataset_metadata, has_metadata


def initiate_crawl(local_dataset_id, local_dataset_endpoint, gbif_dataset_uuid):
Expand Down Expand Up @@ -53,7 +53,8 @@ def initiate_crawl(local_dataset_id, local_dataset_endpoint, gbif_dataset_uuid):
# Post a new metadata document to update the GBIF landing page. This is
# necessary because GBIF doesn't "re-crawl" the local dataset metadata when
# the new local dataset endpoint is updated.
post_new_metadata_document(local_dataset_id, gbif_dataset_uuid)
if has_metadata(gbif_dataset_uuid):
post_new_metadata_document(local_dataset_id, gbif_dataset_uuid)


def post_new_metadata_document(local_dataset_id, gbif_dataset_uuid):
Expand Down
27 changes: 26 additions & 1 deletion src/gbif_registrar/utilities.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
"""Miscellaneous utilities"""
import os.path
from json import loads
import pandas as pd
from requests import get
from gbif_registrar.config import PASTA_ENVIRONMENT
from gbif_registrar.config import PASTA_ENVIRONMENT, GBIF_API


def initialize_registrations(file_path):
Expand Down Expand Up @@ -111,3 +112,27 @@ def read_local_dataset_metadata(local_dataset_id):
resp = get(metadata_url, timeout=60)
resp.raise_for_status()
return resp.text


def has_metadata(gbif_dataset_uuid):
"""Check if a GBIF dataset has a metadata document.
Parameters
----------
gbif_dataset_uuid : str
The registration identifier assigned by GBIF to the local dataset.
Returns
-------
bool
True if the dataset has a metadata document, False otherwise.
Notes
-----
The presence of a dataset title indicates that the dataset has been
crawled by GBIF and the metadata document has been created.
"""
resp = get(url=GBIF_API + "/" + gbif_dataset_uuid, timeout=60)
resp.raise_for_status()
details = loads(resp.text)
return bool(details.get("title"))
7 changes: 7 additions & 0 deletions tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from gbif_registrar.utilities import initialize_registrations
from gbif_registrar.utilities import expected_cols
from gbif_registrar.utilities import read_local_dataset_metadata
from gbif_registrar.utilities import has_metadata


def test_initialize_registrations_writes_to_path(tmp_path):
Expand Down Expand Up @@ -52,3 +53,9 @@ def test_read_local_dataset_metadata_returns_str():
"""Test that read_local_dataset_metadata returns a string."""
metadata = read_local_dataset_metadata("edi.941.3")
assert isinstance(metadata, str)


def test_has_metadata_returns_expected_type():
"""Test that the has_metadata function returns a boolean."""
res = has_metadata("cfb3f6d5-ed7d-4fff-9f1b-f032ed1de485")
assert isinstance(res, bool)

0 comments on commit 01ddd6c

Please sign in to comment.