Skip to content

Commit

Permalink
Merge pull request #43 from monarch-initiative/fix-not-translated
Browse files Browse the repository at this point in the history
Fix NOT_TRANSLATED treatment in prepare table and add default sorting to merge()
  • Loading branch information
matentzn authored Mar 1, 2024
2 parents 739d572 + b787f09 commit a9ae364
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 11 deletions.
19 changes: 12 additions & 7 deletions src/babelon/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@
type=click.File(mode="w"),
default=sys.stdout,
)
sort_table_option = click.option(
"--sort-tables",
type=bool,
default=True,
help="If true, all output tables are sorted before written.",
)
output_format_option = click.option(
"--output-format",
"-t",
Expand Down Expand Up @@ -153,12 +159,7 @@ def translate(input, model, language_code, update_existing, output):
default=True,
help="If true, the translation status is changed to CANDIDATE if a source value has changed.",
)
@click.option(
"--sort-tables",
type=bool,
default=True,
help="If true, all output tables are sorted before written.",
)
@sort_table_option
@output_option
def prepare_translation(
input,
Expand Down Expand Up @@ -228,8 +229,9 @@ def statistics_translation_profile_command(

@click.command("merge")
@multiple_inputs_argument
@sort_table_option
@output_option
def merge(inputs, output):
def merge(inputs, sort_tables, output):
"""Merge multiple babelon files into one."""
df = pd.read_csv(inputs[0], sep="\t")

Expand All @@ -238,6 +240,9 @@ def merge(inputs, output):
df_temp = pd.read_csv(input_file, sep="\t")
df = pd.concat([df, df_temp], axis=0, ignore_index=True)

if sort_tables:
df = sort_babelon_tsv(df)

if output:
df.to_csv(output, sep="\t", index=False)
else:
Expand Down
14 changes: 10 additions & 4 deletions src/babelon/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,17 +205,23 @@ def prepare_translation_for_ontology(
source_value = row["source_value"]
translation_status = row["translation_status"]
term_metadata = _get_metadata_for_term(ontology, subject_id)
if translation_status == "NOT_TRANSLATED":
if not include_not_translated:
mark_index_for_removal.append(index)
if predicate_id in term_metadata:
output_not_translated_data.append(row.to_dict())
else:
logging.warning(
f"{predicate_id} value for {subject_id} is marked as NOT_TRANSLATED,"
f"but does not exist at all in the ontology. Omitting row."
)
if predicate_id in term_metadata:
ontology_value = term_metadata[predicate_id][0]
if len(term_metadata[predicate_id]) > 1:
logging.warning(
f"{predicate_id} value for {subject_id} is ambiguous,"
f"picking first one ({term_metadata[predicate_id]})."
)
if translation_status == "NOT_TRANSLATED":
output_not_translated_data.append(row.to_dict())
if not include_not_translated:
mark_index_for_removal.append(index)
if ontology_value != source_value:
translation_value = row["translation_value"]
df_augmented.at[index, "source_value"] = ontology_value
Expand Down

0 comments on commit a9ae364

Please sign in to comment.