diff --git a/ingest/defaults/config.yaml b/ingest/defaults/config.yaml index 073e622..6a48c07 100644 --- a/ingest/defaults/config.yaml +++ b/ingest/defaults/config.yaml @@ -70,6 +70,8 @@ curate: # These date formats should use directives expected by datetime # See https://docs.python.org/3.9/library/datetime.html#strftime-and-strptime-format-codes expected_date_formats: ["%Y", "%Y-%m", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%SZ"] + # The expected field that contains the GenBank geo_loc_name + genbank_location_field: location titlecase: # List of string fields to titlecase fields: ["region", "country", "division", "location"] diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk index 2e4a066..bdf3211 100644 --- a/ingest/rules/curate.smk +++ b/ingest/rules/curate.smk @@ -73,6 +73,7 @@ rule curate: strain_backup_fields=config["curate"]["strain_backup_fields"], date_fields=config["curate"]["date_fields"], expected_date_formats=config["curate"]["expected_date_formats"], + genbank_location_field=config["curate"]["genbank_location_field"], articles=config["curate"]["titlecase"]["articles"], abbreviations=config["curate"]["titlecase"]["abbreviations"], titlecase_fields=config["curate"]["titlecase"]["fields"], @@ -85,30 +86,30 @@ rule curate: shell: """ (cat {input.sequences_ndjson} \ - | ./vendored/transform-field-names \ + | augur curate rename \ --field-map {params.field_map} \ | augur curate normalize-strings \ - | ./vendored/transform-strain-names \ + | augur curate transform-strain-name \ --strain-regex {params.strain_regex} \ --backup-fields {params.strain_backup_fields} \ | augur curate format-dates \ --date-fields {params.date_fields} \ --expected-date-formats {params.expected_date_formats} \ - | ./vendored/transform-genbank-location \ + | augur curate parse-genbank-location \ + --location-field {params.genbank_location_field} \ | augur curate titlecase \ --titlecase-fields {params.titlecase_fields} \ --articles {params.articles} \ --abbreviations {params.abbreviations} \ - | ./vendored/transform-authors \ + | augur curate abbreviate-authors \ --authors-field {params.authors_field} \ --default-value {params.authors_default_value} \ --abbr-authors-field {params.abbr_authors_field} \ - | ./vendored/apply-geolocation-rules \ + | augur curate apply-geolocation-rules \ --geolocation-rules {input.all_geolocation_rules} \ - | ./vendored/merge-user-metadata \ + | augur curate apply-record-annotations \ --annotations {input.annotations} \ --id-field {params.annotations_id} \ - | augur curate passthru \ --output-metadata {output.metadata} \ --output-fasta {output.sequences} \ --output-id-field {params.id_field} \