Skip to content

Commit

Permalink
refactor: enhance clarity in the register function
Browse files Browse the repository at this point in the history
- Rename the 'register' function to explicitly indicate that it
registers a dataset.
- Move the 'dataset' parameter to the first position for improved
function call readability.
- Rename the 'file_path' parameter to better convey that it represents
registration information as a file for better understanding.
  • Loading branch information
clnsmth authored Sep 27, 2023
1 parent ee98c9e commit c92b496
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 42 deletions.
42 changes: 17 additions & 25 deletions src/gbif_registrar/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,43 +65,35 @@ def initialize_registrations_file(file_path):
data.to_csv(file_path, index=False, mode="x")


def register(file_path, local_dataset_id=None):
"""Register a local_dataset_id with GBIF. This is required before
requesting a crawl.
def register_dataset(local_dataset_id, registrations_file):
"""Register a local dataset with GBIF and update the registrations file.
Parameters
----------
file_path : str
Path of the registrations file.
local_dataset_id : str
Identifier of the local dataset to be registered with GBIF. Run with
local_dataset_id argument when registering for the first time, or run
with local_dataset_id=None to fix incomplete registrations.
The local dataset identifier to be registered with GBIF.
registrations_file : str
The path of the registrations file.
Returns
-------
None
The registrations file as a .csv.
The registrations file, written back to `registrations_file` as a .csv.
Notes
-----
Running this function will overwrite the existing registrations file at
`file_path`.
This function has subroutines that insert a local_dataset_group_id and
local_dataset_endpoint based on specifications of the local system. These
parts of the source code can be edited to accommodate your local system.
Examples
--------
>>> register_dataset("edi.929.2", "registrations.csv")
"""
# Read the registrations file from the file path parameter
rgstrs = read_registrations(file_path)
registrations = read_registrations(registrations_file)
# If the local_dataset_id already exists in the registrations file, then
# return the registrations file as-is and send to complete_registrations
# function for further processing.
if local_dataset_id not in set(rgstrs["local_dataset_id"]):
if local_dataset_id not in set(registrations["local_dataset_id"]):
# If the local_dataset_id parameter is not None, then append the
# local__dataset_id value to the rgstrs data frame in a new row and in
# the local_dataset_id column. The other columns should be empty at this
# point.
# local__dataset_id value to the registrations data frame in a new row
# and in the local_dataset_id column. The other columns should be
# empty at this point.
if local_dataset_id is not None:
new_row = pd.DataFrame(
{
Expand All @@ -113,11 +105,11 @@ def register(file_path, local_dataset_id=None):
},
index=[0],
)
rgstrs = pd.concat([rgstrs, new_row], ignore_index=True)
registrations = pd.concat([registrations, new_row], ignore_index=True)
# Call the complete_registrations function to complete the registration
rgstrs = complete_registrations(rgstrs)
registrations = complete_registrations(registrations)
# Write the completed registrations file to the file path parameter
rgstrs.to_csv(file_path, index=False, mode="w")
registrations.to_csv(registrations_file, index=False, mode="w")


def complete_registrations(rgstrs):
Expand Down
37 changes: 21 additions & 16 deletions tests/test_register.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Test register.py"""
"""Test register_dataset.py"""

import os.path
import hashlib
Expand All @@ -7,7 +7,7 @@
from gbif_registrar.register import get_local_dataset_group_id
from gbif_registrar.register import get_local_dataset_endpoint
from gbif_registrar.register import get_gbif_dataset_uuid
from gbif_registrar.register import register
from gbif_registrar.register import register_dataset
from gbif_registrar.register import request_gbif_dataset_uuid
from gbif_registrar.config import PASTA_ENVIRONMENT
from gbif_registrar.register import initialize_registrations_file
Expand Down Expand Up @@ -121,10 +121,10 @@ def test_request_gbif_dataset_uuid_failure(mocker):
assert res is None


def test_register_success(
def test_register_dataset_success(
local_dataset_id, gbif_dataset_uuid, tmp_path, rgstrs, mocker
):
"""Test that the register function returns a file with a new row containing
"""Test that the register_dataset function returns a file with a new row containing
the local data set ID along with a unique GBIF registration number."""

# Create a copy of the registrations file in the temporary test directory
Expand All @@ -137,22 +137,23 @@ def test_register_success(
"gbif_registrar.register.get_gbif_dataset_uuid", return_value=gbif_dataset_uuid
)

# Run the register function and check that the new row was added to the
# Run the register_dataset function and check that the new row was added to the
# registrations file, and that the new row contains the local data set ID
# and a unique GBIF registration number.
register(
file_path=tmp_path / "registrations.csv", local_dataset_id=local_dataset_id
register_dataset(
local_dataset_id=local_dataset_id,
registrations_file=tmp_path / "registrations.csv",
)
rgstrs_final = read_registrations(tmp_path / "registrations.csv")
assert rgstrs_final.shape[0] == rgstrs.shape[0] + 1
assert rgstrs_final.iloc[-1]["local_dataset_id"] == local_dataset_id
assert rgstrs_final.iloc[-1]["gbif_dataset_uuid"] == gbif_dataset_uuid


def test_register_repairs_failed_registration(
def test_register_dataset_repairs_failed_registration(
local_dataset_id, gbif_dataset_uuid, tmp_path, rgstrs, mocker
):
"""Test that the register function repairs a failed registration attempt."""
"""Test that the register_dataset function repairs a failed registration attempt."""

# Create a copy of the registrations file in the temporary test directory
# so that the test can modify it without affecting the original file.
Expand All @@ -166,14 +167,16 @@ def test_register_repairs_failed_registration(

rgstrs_initial = read_registrations("tests/registrations.csv")
rgstrs_initial.to_csv(tmp_path / "registrations.csv", index=False)
register(
file_path=tmp_path / "registrations.csv", local_dataset_id=local_dataset_id
register_dataset(
local_dataset_id=local_dataset_id,
registrations_file=tmp_path / "registrations.csv",
)
rgstrs_initial = read_registrations(tmp_path / "registrations.csv")
rgstrs_initial.iloc[-1, -4:] = None
rgstrs_initial.to_csv(tmp_path / "registrations.csv", index=False)
register(
file_path=tmp_path / "registrations.csv", local_dataset_id=local_dataset_id
register_dataset(
local_dataset_id=local_dataset_id,
registrations_file=tmp_path / "registrations.csv",
)
rgstrs_final = read_registrations(tmp_path / "registrations.csv")
assert rgstrs_final.shape[0] == rgstrs_initial.shape[0]
Expand All @@ -185,8 +188,8 @@ def test_register_repairs_failed_registration(
assert rgstrs_final.iloc[-1, -4:-1].notnull().all()


def test_register_ignores_complete_registrations(tmp_path, rgstrs):
"""Test that the register function ignores complete registrations.
def test_register_dataset_ignores_complete_registrations(tmp_path, rgstrs):
"""Test that the register_dataset function ignores complete registrations.
A new data set is not being registered, and all existing data sets are
fully registered. The regsitration files should be unchanged."""
Expand All @@ -197,7 +200,9 @@ def test_register_ignores_complete_registrations(tmp_path, rgstrs):

rgstrs_initial = read_registrations("tests/registrations.csv")
rgstrs_initial.to_csv(tmp_path / "registrations.csv", index=False)
register(file_path=tmp_path / "registrations.csv")
register_dataset(
local_dataset_id=None, registrations_file=tmp_path / "registrations.csv"
)
rgstrs_final = read_registrations(tmp_path / "registrations.csv")
assert rgstrs_final.shape[0] == rgstrs_initial.shape[0]
assert rgstrs_final.equals(rgstrs_initial)
2 changes: 1 addition & 1 deletion tests/test_utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_is_synchronized_success(tmp_path, mocker, eml, gbif_metadata):
registrations = read_registrations("tests/registrations.csv")
# Add new line to registrations with a dataset that is synchronized with
# GBIF so that is_synchronized can access this information via the
# file_path argument.
# registrations_file argument.
local_dataset_id = "edi.941.3"
new_row = registrations.iloc[-1].copy()
new_row["local_dataset_id"] = local_dataset_id
Expand Down

0 comments on commit c92b496

Please sign in to comment.