From 72c6cf19fe15a07bf6bd6b7e2e9f3f06da3623d2 Mon Sep 17 00:00:00 2001
From: james hadfield <hadfield.james@gmail.com>
Date: Mon, 1 Jul 2024 13:26:35 +1200
Subject: [PATCH 1/6] Copy transform-field-names from nextstrain/ingest

Copied from <https://github.com/nextstrain/ingest/blob/c94d78d1f38b99e893007a76526f3d3824ecded0/transform-strain-names>
which was itself copied from <https://github.com/nextstrain/mpox/blob/5969604dfe426745b789746427b580c69d484790/ingest/bin/transform-strain-names>

Only change in this commit was to remove the shebang and not make the
file executable.
---
 augur/curate/rename.py | 52 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)
 create mode 100644 augur/curate/rename.py

diff --git a/augur/curate/rename.py b/augur/curate/rename.py
new file mode 100644
index 000000000..ac6e500dc
--- /dev/null
+++ b/augur/curate/rename.py
@@ -0,0 +1,52 @@
+"""
+Renames fields of the NDJSON record from stdin and outputs modified records
+to stdout.
+"""
+import argparse
+import json
+from sys import stderr, stdin, stdout
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(
+        description=__doc__,
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--field-map", nargs="+",
+        help="Fields names in the NDJSON record mapped to new field names, " +
+             "formatted as '{old_field_name}={new_field_name}'. " +
+             "If the old field does not exist in record, the new field will be added with an empty string value. " +
+             "If the new field already exists in record, then the renaming of the old field will be skipped. " +
+             "Skips the field if the old field name is the same as the new field name (case-sensitive).")
+    parser.add_argument("--force", action="store_true",
+        help="Force renaming of old field even if the new field already exists. " +
+             "Please keep in mind this will overwrite the value of the new field.")
+
+    args = parser.parse_args()
+
+    field_map = {}
+    for field in args.field_map:
+        old_name, new_name = field.split('=')
+
+        if old_name == new_name:
+            continue
+
+        field_map[old_name] = new_name
+
+    for record in stdin:
+        record = json.loads(record)
+
+        for old_field, new_field in field_map.items():
+
+            if record.get(new_field) and not args.force:
+                print(
+                    f"WARNING: skipping rename of {old_field} because record",
+                    f"already has a field named {new_field}.",
+                    file=stderr
+                )
+                continue
+
+            record[new_field] = record.pop(old_field, '')
+
+        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
+        print()

From fb54c4c2c5e8bc6dfe03f938960a0e3b4bf11a04 Mon Sep 17 00:00:00 2001
From: james hadfield <hadfield.james@gmail.com>
Date: Mon, 1 Jul 2024 15:07:19 +1200
Subject: [PATCH 2/6] [curate rename] refactor to match curate structure

Includes changes to make the copied script work as a new Augur
subcommand and fit in with the codebase. The functional behaviour of the
actual renaming is unchanged, although the I/O options are now expanded
as we inherit the general `augur curate` machinery.
---
 augur/curate/__init__.py |  3 ++-
 augur/curate/rename.py   | 39 ++++++++++++++++++++-------------------
 2 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/augur/curate/__init__.py b/augur/curate/__init__.py
index c8aacbbc5..c5961a9b1 100644
--- a/augur/curate/__init__.py
+++ b/augur/curate/__init__.py
@@ -13,7 +13,7 @@
 from augur.io.metadata import DEFAULT_DELIMITERS, InvalidDelimiter, read_table_to_dict, read_metadata_with_sequences, write_records_to_tsv
 from augur.io.sequences import write_records_to_fasta
 from augur.types import DataErrorMethod
-from . import format_dates, normalize_strings, passthru, titlecase, apply_geolocation_rules, apply_record_annotations, abbreviate_authors, parse_genbank_location, transform_strain_name
+from . import format_dates, normalize_strings, passthru, titlecase, apply_geolocation_rules, apply_record_annotations, abbreviate_authors, parse_genbank_location, transform_strain_name, rename
 
 
 SUBCOMMAND_ATTRIBUTE = '_curate_subcommand'
@@ -27,6 +27,7 @@
     abbreviate_authors,
     parse_genbank_location,
     transform_strain_name,
+    rename,
 ]
 
 
diff --git a/augur/curate/rename.py b/augur/curate/rename.py
index ac6e500dc..57172e944 100644
--- a/augur/curate/rename.py
+++ b/augur/curate/rename.py
@@ -1,28 +1,31 @@
 """
-Renames fields of the NDJSON record from stdin and outputs modified records
-to stdout.
+Renames fields / columns of the input data
 """
+
+from typing import Iterable
+from augur.io.print import print_err
 import argparse
-import json
-from sys import stderr, stdin, stdout
 
+def register_parser(parent_subparsers):
+    parser = parent_subparsers.add_parser("rename",
+    parents = [parent_subparsers.shared_parser],
+    help = __doc__)
+
+    p = parser.add_argument_group(title="RENAME SPECIFIC OPTIONS")
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(
-        description=__doc__,
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument("--field-map", nargs="+",
+    p.add_argument("--field-map", nargs="+", default=[],
         help="Fields names in the NDJSON record mapped to new field names, " +
              "formatted as '{old_field_name}={new_field_name}'. " +
              "If the old field does not exist in record, the new field will be added with an empty string value. " +
              "If the new field already exists in record, then the renaming of the old field will be skipped. " +
              "Skips the field if the old field name is the same as the new field name (case-sensitive).")
-    parser.add_argument("--force", action="store_true",
+    p.add_argument("--force", action="store_true",
         help="Force renaming of old field even if the new field already exists. " +
              "Please keep in mind this will overwrite the value of the new field.")
 
-    args = parser.parse_args()
+    return parser
+
+def run(args: argparse.Namespace, records: Iterable[dict]) -> Iterable[dict]:
 
     field_map = {}
     for field in args.field_map:
@@ -33,20 +36,18 @@
 
         field_map[old_name] = new_name
 
-    for record in stdin:
-        record = json.loads(record)
+    for record in records:
+        record = record.copy()
 
         for old_field, new_field in field_map.items():
 
             if record.get(new_field) and not args.force:
-                print(
+                print_err(
                     f"WARNING: skipping rename of {old_field} because record",
-                    f"already has a field named {new_field}.",
-                    file=stderr
+                    f"already has a field named {new_field}."
                 )
                 continue
 
             record[new_field] = record.pop(old_field, '')
 
-        json.dump(record, stdout, allow_nan=False, indent=None, separators=',:')
-        print()
+        yield(record)

From c2b229726e02c725cc6736d642c62c16e474c92c Mon Sep 17 00:00:00 2001
From: james hadfield <hadfield.james@gmail.com>
Date: Mon, 1 Jul 2024 16:28:22 +1200
Subject: [PATCH 3/6] [curate rename] Add tests

Tests describe desired output, not current output
---
 .../curate/cram/rename/drop-columns.t         | 29 ++++++
 .../curate/cram/rename/force-behaviour.t      | 18 ++++
 tests/functional/curate/cram/rename/general.t | 91 +++++++++++++++++++
 3 files changed, 138 insertions(+)
 create mode 100644 tests/functional/curate/cram/rename/drop-columns.t
 create mode 100644 tests/functional/curate/cram/rename/force-behaviour.t
 create mode 100644 tests/functional/curate/cram/rename/general.t

diff --git a/tests/functional/curate/cram/rename/drop-columns.t b/tests/functional/curate/cram/rename/drop-columns.t
new file mode 100644
index 000000000..d31844ae4
--- /dev/null
+++ b/tests/functional/curate/cram/rename/drop-columns.t
@@ -0,0 +1,29 @@
+
+Setup
+
+  $ export AUGUR="${AUGUR:-$TESTDIR/../../../../../bin/augur}"
+
+  $ cat >records.ndjson <<~~
+  > {"strain": "s_1", "country": "c_1", "accession": "a_1"}
+  > {"strain": "s_2", "country": "c_2", "accession": "a_2"}
+  > {"strain": "s_3", "country": "c_3", "accession": "a_3"}
+  > ~~
+
+Rename the strain column to accession -- we don't move the strain column to where accession was,
+We simply rename it and drop the (old) accession column. The alternate (which I don't like) produces:
+{"country": "c_1", "accession": "s_1"}
+{"country": "c_2", "accession": "s_2"}
+{"country": "c_3", "accession": "s_3"}
+
+  $ $AUGUR curate rename --field-map "strain=accession" --force < <(cat records.ndjson)
+  {"accession": "s_1", "country": "c_1"}
+  {"accession": "s_2", "country": "c_2"}
+  {"accession": "s_3", "country": "c_3"}
+
+Similarly, renaming accession to strain re-names the column "in-place" and drops the (old) strain column
+
+  $ $AUGUR curate rename --field-map "accession=strain" --force < <(cat records.ndjson)
+  {"country": "c_1", "strain": "a_1"}
+  {"country": "c_2", "strain": "a_2"}
+  {"country": "c_3", "strain": "a_3"}
+
diff --git a/tests/functional/curate/cram/rename/force-behaviour.t b/tests/functional/curate/cram/rename/force-behaviour.t
new file mode 100644
index 000000000..5886c599c
--- /dev/null
+++ b/tests/functional/curate/cram/rename/force-behaviour.t
@@ -0,0 +1,18 @@
+Setup
+
+  $ export AUGUR="${AUGUR:-$TESTDIR/../../../../../bin/augur}"
+
+
+  $ cat >records.ndjson <<~~
+  > {"accession": "record_1", "country": ""}
+  > {"accession": "record_2", "country": "country_2"}
+  > ~~
+
+Test that --force is required if a key exists, irregardless of its associated value
+(This was not the behaviour in the precursor command `transform-field-names`)
+
+  $ $AUGUR curate rename --field-map "accession=country" < <(cat records.ndjson) 1> out1.ndjson
+  WARNING: skipping rename of accession because record already has a field named country.
+
+  $ diff records.ndjson out1.ndjson
+
diff --git a/tests/functional/curate/cram/rename/general.t b/tests/functional/curate/cram/rename/general.t
new file mode 100644
index 000000000..f7f4401d7
--- /dev/null
+++ b/tests/functional/curate/cram/rename/general.t
@@ -0,0 +1,91 @@
+Setup
+
+  $ export AUGUR="${AUGUR:-$TESTDIR/../../../../../bin/augur}"
+
+The tests here use NDJSON I/O and don't explicitly test TSV I/O as we rely
+on the general curate infrastructure to enforce that each row has the same
+fields. See <https://github.com/nextstrain/augur/issues/1510> for more
+
+  $ cat >records.ndjson <<~~
+  > {"accession": "record_1", "country": "country_1"}
+  > {"accession": "record_2", "country": "country_2"}
+  > ~~
+
+The --field-map argument is requried (error text not checked as it includes the entire argparse usage text)
+
+  $ $AUGUR curate rename < <(cat records.ndjson) 2>/dev/null
+  [2]
+
+Rename "accession" to "strain" -- the order should be preserved (i.e. strain is first column)
+
+  $ $AUGUR curate rename --field-map "accession=strain" < <(cat records.ndjson)
+  {"strain": "record_1", "country": "country_1"}
+  {"strain": "record_2", "country": "country_2"}
+
+
+Rename "accession" to "country" - single error message is reported as we won't overwrite without --force
+and we don't change the data
+
+  $ $AUGUR curate rename --field-map "accession=country" < <(cat records.ndjson) > out2.ndjson
+  WARNING: skipping rename of accession because record already has a field named country.
+
+  $ diff records.ndjson out2.ndjson
+
+Rename "accession" to "country" using --force
+
+  $ $AUGUR curate rename --field-map "accession=country" --force < <(cat records.ndjson)
+  {"country": "record_1"}
+  {"country": "record_2"}
+
+Asking to rename multiple columns to the same new name is an error!
+
+  $ $AUGUR curate rename --field-map "accession=foo" "country=foo" < <(cat records.ndjson)
+  ERROR: Asked to rename multiple fields to 'foo'.
+  [2]
+
+Asking to rename a non-existant column raises a warning. Using a warning not an error allows the command
+to be idempotent.
+
+  $ $AUGUR curate rename --field-map "strain=foo" < <(cat records.ndjson) 1> out3.ndjson
+  WARNING: Asked to rename field 'strain' (to 'foo') but it doesn't exist in the input data.
+
+  $ diff records.ndjson out3.ndjson
+
+
+Rename will re-order fields to match the first observed record (with any necessary changes applied)
+This produces NDJSON output which more closely matches TSV output.
+
+  $ cat >records.unordered.ndjson <<~~
+  > {"accession": "record_1", "country": "country_1"}
+  > {"country": "country_2", "accession": "record_2"}
+  > ~~
+
+  $ $AUGUR curate rename --field-map "accession=id" --force < <(cat records.unordered.ndjson)
+  {"id": "record_1", "country": "country_1"}
+  {"id": "record_2", "country": "country_2"}
+
+
+Using --field-map without a single '=' char is an error
+  $ $AUGUR curate rename --field-map "accession" < <(cat records.ndjson)
+  ERROR: The field-map 'accession' must contain a single '=' character.
+  [2]
+
+Using --field-map with more than a single '=' char is an error
+  $ $AUGUR curate rename --field-map "accession=id=strain" < <(cat records.ndjson)
+  ERROR: The field-map 'accession=id=strain' must contain a single '=' character.
+  [2]
+
+Using --field-map with spaces surrounding field names is OK (as long as you quote the arg appropriately)
+  $ $AUGUR curate rename --field-map " accession = strain " < <(cat records.ndjson)
+  {"strain": "record_1", "country": "country_1"}
+  {"strain": "record_2", "country": "country_2"}
+
+Using --field-map with an empty "old field" doesn't make sense
+  $ $AUGUR curate rename --field-map "=accession" < <(cat records.ndjson)
+  ERROR: The field-map '=accession' doesn't specify a name for the existing field.
+  [2]
+
+Using --field-map with an empty "new field" doesn't (yet) make sense
+  $ $AUGUR curate rename --field-map "accession=" < <(cat records.ndjson)
+  ERROR: The field-map 'accession=' doesn't specify a name for the new field.
+  [2]

From 84ee0af9b564d04a241e79e916855d6aa5041daf Mon Sep 17 00:00:00 2001
From: james hadfield <hadfield.james@gmail.com>
Date: Tue, 2 Jul 2024 11:04:50 +1200
Subject: [PATCH 4/6] [curate rename] update behaviour

to match expected behaviour in tests.

The main changes functional changes are around the order of fields,
where we now rename "in-place" rather than adding the renamed column
at the end (which for TSV output is the last column).

More sanity checks are performed on arguments and they are
cross-referenced with the provided records.

Note that this relies on each record having the same fields, and this is
not asserted here. See <https://github.com/nextstrain/augur/issues/1510>
---
 augur/curate/rename.py | 79 ++++++++++++++++++++++++++++++++----------
 1 file changed, 61 insertions(+), 18 deletions(-)

diff --git a/augur/curate/rename.py b/augur/curate/rename.py
index 57172e944..f4e3dc048 100644
--- a/augur/curate/rename.py
+++ b/augur/curate/rename.py
@@ -2,52 +2,95 @@
 Renames fields / columns of the input data
 """
 
-from typing import Iterable
-from augur.io.print import print_err
+from typing import Iterable, Literal, Union, Dict, List, Tuple
 import argparse
+from augur.io.print import print_err
+from augur.errors import AugurError
 
 def register_parser(parent_subparsers):
     parser = parent_subparsers.add_parser("rename",
     parents = [parent_subparsers.shared_parser],
     help = __doc__)
 
-    p = parser.add_argument_group(title="RENAME SPECIFIC OPTIONS")
-
-    p.add_argument("--field-map", nargs="+", default=[],
+    required = parser.add_argument_group(title="REQUIRED")
+    required.add_argument("--field-map", nargs="+", required=True,
         help="Fields names in the NDJSON record mapped to new field names, " +
              "formatted as '{old_field_name}={new_field_name}'. " +
-             "If the old field does not exist in record, the new field will be added with an empty string value. " +
-             "If the new field already exists in record, then the renaming of the old field will be skipped. " +
+             "If the new field already exists, then the renaming of the old field will be skipped. " +
              "Skips the field if the old field name is the same as the new field name (case-sensitive).")
-    p.add_argument("--force", action="store_true",
+
+    optional = parser.add_argument_group(title="OPTIONAL")
+    optional.add_argument("--force", action="store_true",
         help="Force renaming of old field even if the new field already exists. " +
              "Please keep in mind this will overwrite the value of the new field.")
 
     return parser
 
-def run(args: argparse.Namespace, records: Iterable[dict]) -> Iterable[dict]:
 
+def parse_field_map(field_map_arg: List[str]) -> Dict[str,str]:
+    seen_old, seen_new = set(), set()
+    
     field_map = {}
-    for field in args.field_map:
-        old_name, new_name = field.split('=')
+    for field in field_map_arg:
+        fields = [n.strip() for n in field.split('=')]
+        if len(fields)!=2:
+            raise AugurError(f"The field-map {field!r} must contain a single '=' character.")
+        old_name, new_name = fields
+
+        # Sanity check the requests to catch typos etc
+        if not old_name:
+            raise AugurError(f"The field-map {field!r} doesn't specify a name for the existing field.")
+        if not new_name:
+            raise AugurError(f"The field-map {field!r} doesn't specify a name for the new field.")
+        if old_name in seen_old:
+            raise AugurError(f"Asked to rename field {old_name!r} multiple times.")
+        if new_name in seen_new:
+            raise AugurError(f"Asked to rename multiple fields to {new_name!r}.")
+        seen_old.add(old_name)
+        seen_new.add(new_name)
 
         if old_name == new_name:
             continue
 
         field_map[old_name] = new_name
+    return field_map
 
-    for record in records:
-        record = record.copy()
 
-        for old_field, new_field in field_map.items():
+def transform_columns(existing_fields: List[str], field_map: Dict[str,str], force: bool) -> List[Tuple[str,str]]:
+    """
+    Calculate the mapping of old column names to new column names
+    """
+    # check that all columns to be renamed exist
+    for name in list(field_map.keys()):
+        if name not in existing_fields:
+            print_err(f"WARNING: Asked to rename field {name!r} (to {field_map[name]!r}) but it doesn't exist in the input data.")
+            del field_map[name]
 
-            if record.get(new_field) and not args.force:
+    # iterate through field_map and remove rename requests if they would drop an existing column
+    # doing this ahead-of-time allows us to preserve the order of fields using a simpler implementation
+    if not force:
+        for old_field, new_field in list(field_map.items()):
+            if new_field in existing_fields:
                 print_err(
                     f"WARNING: skipping rename of {old_field} because record",
                     f"already has a field named {new_field}."
                 )
-                continue
+                del field_map[old_field]
+
+    m = []
+    for field in existing_fields:
+        if field in field_map:
+            m.append((field, field_map[field]))
+        elif field in field_map.values():
+            pass # another column is renamed to this name, so we drop it
+        else:
+            m.append((field, field)) # no change to field name
+    return m
 
-            record[new_field] = record.pop(old_field, '')
 
-        yield(record)
+def run(args: argparse.Namespace, records: Iterable[dict]) -> Iterable[dict]:
+    col_map: Union[Literal[False], List[Tuple[str,str]]] = False
+    for record in records:
+        if not col_map: # initialise using first record
+            col_map = transform_columns(list(record.keys()), parse_field_map(args.field_map), args.force)
+        yield({new_field:record[old_field] for old_field, new_field in col_map})

From 8c3ac16c2013105907e6526c5ba782fa48539d8d Mon Sep 17 00:00:00 2001
From: james hadfield <hadfield.james@gmail.com>
Date: Tue, 2 Jul 2024 12:56:51 +1200
Subject: [PATCH 5/6] [curate rename] allow column duplication

See discussion in PR review for context
<https://github.com/nextstrain/augur/pull/1506#discussion_r1661629271>
---
 augur/curate/rename.py                        | 46 +++++++++----------
 .../functional/curate/cram/rename/duplicate.t | 24 ++++++++++
 2 files changed, 46 insertions(+), 24 deletions(-)
 create mode 100644 tests/functional/curate/cram/rename/duplicate.t

diff --git a/augur/curate/rename.py b/augur/curate/rename.py
index f4e3dc048..68730fa28 100644
--- a/augur/curate/rename.py
+++ b/augur/curate/rename.py
@@ -2,7 +2,7 @@
 Renames fields / columns of the input data
 """
 
-from typing import Iterable, Literal, Union, Dict, List, Tuple
+from typing import Iterable, Literal, Union, List, Tuple
 import argparse
 from augur.io.print import print_err
 from augur.errors import AugurError
@@ -14,9 +14,9 @@ def register_parser(parent_subparsers):
 
     required = parser.add_argument_group(title="REQUIRED")
     required.add_argument("--field-map", nargs="+", required=True,
-        help="Fields names in the NDJSON record mapped to new field names, " +
-             "formatted as '{old_field_name}={new_field_name}'. " +
+        help="Rename fields/columns via '{old_field_name}={new_field_name}'. " +
              "If the new field already exists, then the renaming of the old field will be skipped. " +
+             "Multiple entries with the same '{old_field_name}' will duplicate the field/column. " +
              "Skips the field if the old field name is the same as the new field name (case-sensitive).")
 
     optional = parser.add_argument_group(title="OPTIONAL")
@@ -27,10 +27,10 @@ def register_parser(parent_subparsers):
     return parser
 
 
-def parse_field_map(field_map_arg: List[str]) -> Dict[str,str]:
-    seen_old, seen_new = set(), set()
+def parse_field_map(field_map_arg: List[str]) -> List[Tuple[str,str]]:
+    seen_new = set() # keep track of the new field names
     
-    field_map = {}
+    field_map = []
     for field in field_map_arg:
         fields = [n.strip() for n in field.split('=')]
         if len(fields)!=2:
@@ -42,46 +42,44 @@ def parse_field_map(field_map_arg: List[str]) -> Dict[str,str]:
             raise AugurError(f"The field-map {field!r} doesn't specify a name for the existing field.")
         if not new_name:
             raise AugurError(f"The field-map {field!r} doesn't specify a name for the new field.")
-        if old_name in seen_old:
-            raise AugurError(f"Asked to rename field {old_name!r} multiple times.")
         if new_name in seen_new:
             raise AugurError(f"Asked to rename multiple fields to {new_name!r}.")
-        seen_old.add(old_name)
         seen_new.add(new_name)
 
-        if old_name == new_name:
-            continue
-
-        field_map[old_name] = new_name
+        field_map.append((old_name, new_name))
     return field_map
 
 
-def transform_columns(existing_fields: List[str], field_map: Dict[str,str], force: bool) -> List[Tuple[str,str]]:
+def transform_columns(existing_fields: List[str], field_map: List[Tuple[str,str]], force: bool) -> List[Tuple[str,str]]:
     """
     Calculate the mapping of old column names to new column names
     """
     # check that all columns to be renamed exist
-    for name in list(field_map.keys()):
-        if name not in existing_fields:
-            print_err(f"WARNING: Asked to rename field {name!r} (to {field_map[name]!r}) but it doesn't exist in the input data.")
-            del field_map[name]
+    for idx,names in enumerate(field_map[:]):
+        old_name, new_name = names
+        if old_name not in existing_fields:
+            print_err(f"WARNING: Asked to rename field {old_name!r} (to {new_name!r}) but it doesn't exist in the input data.")
+            field_map.pop(idx)
 
     # iterate through field_map and remove rename requests if they would drop an existing column
     # doing this ahead-of-time allows us to preserve the order of fields using a simpler implementation
     if not force:
-        for old_field, new_field in list(field_map.items()):
-            if new_field in existing_fields:
+        for idx, fields in enumerate(field_map[:]):
+            old_field, new_field = fields
+            if new_field in existing_fields and new_field!=old_field:
                 print_err(
                     f"WARNING: skipping rename of {old_field} because record",
                     f"already has a field named {new_field}."
                 )
-                del field_map[old_field]
+                field_map.pop(idx)
+
+    names_to_change, new_names = set([f[0] for f in field_map]), set([f[1] for f in field_map])
 
     m = []
     for field in existing_fields:
-        if field in field_map:
-            m.append((field, field_map[field]))
-        elif field in field_map.values():
+        if field in names_to_change:
+            m += [(field,new_field) for old_field, new_field in field_map if old_field==field]
+        elif field in new_names:
             pass # another column is renamed to this name, so we drop it
         else:
             m.append((field, field)) # no change to field name
diff --git a/tests/functional/curate/cram/rename/duplicate.t b/tests/functional/curate/cram/rename/duplicate.t
new file mode 100644
index 000000000..40d513e55
--- /dev/null
+++ b/tests/functional/curate/cram/rename/duplicate.t
@@ -0,0 +1,24 @@
+Setup
+
+  $ export AUGUR="${AUGUR:-$TESTDIR/../../../../../bin/augur}"
+
+  $ cat >records.ndjson <<~~
+  > {"accession": "record_1", "country": "country_1"}
+  > {"accession": "record_2", "country": "country_2"}
+  > ~~
+
+
+Asking to rename the same column multiple times results in duplication of the column.
+Additional columns are inserted next to the existing one, and the order of the new columns
+matches the field-map
+
+  $ $AUGUR curate rename --field-map "accession=id" "accession=genbank_accession" < <(cat records.ndjson)
+  {"id": "record_1", "genbank_accession": "record_1", "country": "country_1"}
+  {"id": "record_2", "genbank_accession": "record_2", "country": "country_2"}
+
+
+We can use the same name to keep the original column
+
+  $ $AUGUR curate rename --field-map "accession=id" "accession=accession" < <(cat records.ndjson)
+  {"id": "record_1", "accession": "record_1", "country": "country_1"}
+  {"id": "record_2", "accession": "record_2", "country": "country_2"}

From 0d0401b0f642bb7a5411f09683f26e4df9deea1b Mon Sep 17 00:00:00 2001
From: james hadfield <hadfield.james@gmail.com>
Date: Tue, 2 Jul 2024 11:14:57 +1200
Subject: [PATCH 6/6] changelog

---
 CHANGES.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGES.md b/CHANGES.md
index 4f5ad277f..7b969727b 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -16,6 +16,7 @@
 * Added a new sub-command `augur curate parse-genbank-location` to parse the `geo_loc_name` field from GenBank reconds. Previously, this was available as the `translate-genbank-location` script within the nextstrain/ingest repo. [#1485][] (@genehack)
 * curate format-dates: Added defaults to `--expected-date-formats` so that ISO 8601 dates (`%Y-%m-%d`) and its various masked forms (e.g. `%Y-XX-XX`) are automatically parsed by the command. [#1501][] (@joverlee521)
 * Added a new sub-command `augur curate translate-strain-name` to filter strain names based on matching a regular expression. Previously, this was available as the `translate-strain-names` script within the nextstrain/ingest repo. [#1514][] (@genehack)
+* Added a new sub-command `augur curate rename` to rename field / column names. Previously, a similar version was available as the `translate-field-names` script within the nextstrain/ingest repo however the behaviour is slightly changed here. [#1506][] (@jameshadfield)
 
 ### Bug Fixes
 
@@ -32,6 +33,7 @@
 [#1509]: https://github.com/nextstrain/augur/pull/1509
 [#1514]: https://github.com/nextstrain/augur/pull/1514
 [#1518]: https://github.com/nextstrain/augur/pull/1518
+[#1506]: https://github.com/nextstrain/augur/pull/1506
 
 ## 24.4.0 (15 May 2024)