From b0bf065a54dd02f8c071ac8ef13c1bb3a94e0f33 Mon Sep 17 00:00:00 2001 From: "John T. Wodder II" Date: Thu, 7 Mar 2024 09:59:13 -0500 Subject: [PATCH] Convert DOI URLs in `related_publications` to related resources --- dandi/metadata/util.py | 25 +++++++++++++++++++ .../tests/data/metadata/metadata2asset_3.json | 7 ++++++ .../data/metadata/metadata2asset_simple1.json | 3 ++- dandi/tests/test_metadata.py | 6 ++++- 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/dandi/metadata/util.py b/dandi/metadata/util.py index 8849a9164..d3f8f95b4 100644 --- a/dandi/metadata/util.py +++ b/dandi/metadata/util.py @@ -11,6 +11,7 @@ from dandischema import models import requests import tenacity +from yarl import URL from .. import __version__ from ..utils import ensure_datetime @@ -583,6 +584,29 @@ def extract_digest(metadata: dict) -> dict[models.DigestType, str] | None: return None +def extract_related_resource(metadata: dict) -> list[models.Resource] | None: + pubs = metadata.get("related_publications") + if not isinstance(pubs, (list, tuple)): + return None + related = [] + for v in pubs: + if not isinstance(v, str): + continue + try: + u = URL(v) + except ValueError: + continue + if u.scheme not in ("http", "https") or u.host != "doi.org": + continue + related.append( + models.Resource( + identifier=v, + relation=models.RelationType.IsDescribedBy, + ) + ) + return related + + FIELD_EXTRACTORS: dict[str, Callable[[dict], Any]] = { "wasDerivedFrom": extract_wasDerivedFrom, "wasAttributedTo": extract_wasAttributedTo, @@ -595,6 +619,7 @@ def extract_digest(metadata: dict) -> dict[models.DigestType, str] | None: "anatomy": extract_anatomy, "digest": extract_digest, "species": extract_species, + "relatedResource": extract_related_resource, } diff --git a/dandi/tests/data/metadata/metadata2asset_3.json b/dandi/tests/data/metadata/metadata2asset_3.json index f3e844133..44b809a69 100644 --- a/dandi/tests/data/metadata/metadata2asset_3.json +++ b/dandi/tests/data/metadata/metadata2asset_3.json @@ -92,5 +92,12 @@ "name": "Cyperus bulbosus" } } + ], + "relatedResource": [ + { + "schemaKey": "Resource", + "identifier": "https://doi.org/10.48324/dandi.000027/0.210831.2033", + "relation": "dcite:IsDescribedBy" + } ] } diff --git a/dandi/tests/data/metadata/metadata2asset_simple1.json b/dandi/tests/data/metadata/metadata2asset_simple1.json index 04babc1a1..931779db5 100644 --- a/dandi/tests/data/metadata/metadata2asset_simple1.json +++ b/dandi/tests/data/metadata/metadata2asset_simple1.json @@ -42,5 +42,6 @@ "schemaKey": "Participant", "identifier": "sub-01" } - ] + ], + "relatedResource": [] } diff --git a/dandi/tests/test_metadata.py b/dandi/tests/test_metadata.py index 07c19b24a..1cd082358 100644 --- a/dandi/tests/test_metadata.py +++ b/dandi/tests/test_metadata.py @@ -323,7 +323,9 @@ def test_timedelta2duration(td: timedelta, duration: str) -> None: "institution": "University College", "keywords": ["test", "sample", "example", "test-case"], "lab": "Retriever Laboratory", - "related_publications": "A Brief History of Test Cases", + "related_publications": [ + "https://doi.org/10.48324/dandi.000027/0.210831.2033" + ], "session_description": "Some test data", "session_id": "XYZ789", "session_start_time": "2020-08-31T15:58:28-04:00", @@ -860,6 +862,7 @@ def test_nwb2asset(simple2_nwb: Path) -> None: variableMeasured=[], measurementTechnique=[], approach=[], + relatedResource=[], ) @@ -939,4 +942,5 @@ def test_nwb2asset_remote_asset(nwb_dandiset: SampleDandiset) -> None: variableMeasured=[], measurementTechnique=[], approach=[], + relatedResource=[], )