diff --git a/tests/transform-genbank-location/transform-genbank-location.t b/tests/transform-genbank-location/transform-genbank-location.t new file mode 100644 index 0000000..a835455 --- /dev/null +++ b/tests/transform-genbank-location/transform-genbank-location.t @@ -0,0 +1,30 @@ +Verify behavior of `transform-genbank-location` around prescence/abscence of +`database` and `location` fields. + +If `location` field is present, transform it. + + $ echo '{"database":"GenBank", "location": "USA:Oregon, Salem" }' \ + > | $TESTDIR/../../transform-genbank-location + {"database":"GenBank","location":"Salem","country":"USA","division":"Oregon"} + +If `database` field is absent, complain. + + $ echo '{"location": "USA:Oregon, Salem" }' \ + > | $TESTDIR/../../transform-genbank-location + Record must contain `database` field to use `transform-genbank-location.` + {"location":"USA:Oregon, Salem"} + +If `database` field has unsupported value, complain. + + $ echo '{"database": "unsupported", "location": "USA:Oregon, Salem" }' \ + > | $TESTDIR/../../transform-genbank-location + Database value of unsupported not supported for `transform-genbank-location`; must be "GenBank" or "RefSeq". + {"database":"unsupported","location":"USA:Oregon, Salem"} + + +If `location` field is absent, complain. + + $ echo '{"database": "GenBank" }' \ + > | $TESTDIR/../../transform-genbank-location + `transform-genbank-location` requires a `location` field; this record does not have one. + {"database":"GenBank"} diff --git a/transform-genbank-location b/transform-genbank-location index 70ba56f..010955a 100755 --- a/transform-genbank-location +++ b/transform-genbank-location @@ -7,14 +7,23 @@ GenBank by verifying that the 'database' field has a value of "GenBank" or "RefS Outputs the modified record to stdout. """ import json -from sys import stdin, stdout +from sys import stdin, stderr, stdout def parse_location(record: dict) -> dict: # Expected pattern for the location field is "[:][, ]" # See GenBank docs for their "country" field: # https://www.ncbi.nlm.nih.gov/genbank/collab/country/ - geographic_data = record['location'].split(':') + location_field = record.get("location", "") + if not location_field: + print( + "`transform-genbank-location` requires a `location` field; this record does not have one.", + file=stderr, + ) + # bail early because we're not gonna make any changes + return record + + geographic_data = location_field.split(':') country = geographic_data[0] division = '' @@ -38,6 +47,13 @@ if __name__ == '__main__': database = record.get('database', '') if database in {'GenBank', 'RefSeq'}: parse_location(record) + else: + if database: + error_msg = f"""Database value of {database} not supported for `transform-genbank-location`; must be "GenBank" or "RefSeq".""" + else: + error_msg = "Record must contain `database` field to use `transform-genbank-location.`" + + print(error_msg, file=stderr) json.dump(record, stdout, allow_nan=False, indent=None, separators=',:') print()