Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test translation feature #32

Merged
merged 2 commits into from
Feb 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,4 @@ tests/data/all_translations/*
src/notebooks/.ipynb_checkpoints/babelon_notebook-checkpoint.ipynb
.vscode
src/notebooks/hpo_notes.xliff
tests/tmp/
2,693 changes: 1,527 additions & 1,166 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ xmltodict = "^0.13.0"
pandas = "^2.0.2"
oaklib = "^0.5.9"
tabulate = "^0.9.0"
llm = "^0.13.1"
python-dotenv = "^1.0.1"

[tool.poetry.group.docs.dependencies]
mkdocs = "^1.4.2"
Expand Down
1 change: 1 addition & 0 deletions src/babelon/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""babelon package."""

import importlib_metadata

try:
Expand Down
10 changes: 6 additions & 4 deletions src/babelon/babelon_io.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""babelon.io."""
from babelon.parsers.xliff import XliffParser

from babelon.parsers.xliff import xliff_path_to_babelon


def parse_file(input_path: str, output_path: str) -> None:
Expand All @@ -14,8 +15,9 @@ def parse_file(input_path: str, output_path: str) -> None:
"""
file_extension = input_path.split(".")[-1]
if file_extension == "xliff":
parser = XliffParser(input_file_path=input_path, output_file_path=output_path)
parser.xml_to_tsv()
parser.synonym_split()
df_babelon, df_synonym = xliff_path_to_babelon(input_file_path=input_path)
output_path_synonym = str(output_path).replace(".babelon.", ".synonyms.")
df_babelon.to_csv(output_path, sep="\t", index=False)
df_synonym.to_csv(output_path_synonym, sep="\t", index=False)
else:
raise ValueError(f"File type: {file_extension} not supported.")
147 changes: 88 additions & 59 deletions src/babelon/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,16 @@
from pathlib import Path

import click
import pandas as pd
from oaklib import get_adapter

from babelon.babelon_io import parse_file
from babelon.translation_profile import statistics_translation_profile, update_translation_profile
from babelon.translate import prepare_translation_for_ontology, translate_profile
from babelon.translation_profile import statistics_translation_profile

info_log = logging.getLogger("info")
# Click input options common across commands
input_argument = click.argument("input_path", required=True, type=click.Path())
input_argument = click.argument("input", required=False, type=click.Path())

input_format_option = click.option(
"--input-format",
Expand Down Expand Up @@ -71,80 +74,106 @@ def babelon():
@input_argument
# @input_format_option
@output_option
def parse(input_path, output):
def parse(input, output):
"""Parse a file in one of the supported formats (such as obographs) into an SSSOM TSV file."""
parse_file(input_path=input_path, output_path=output)
parse_file(input_path=input, output_path=output)


if __name__ == "__main__":
try:
parse(sys.argv[1:])
except Exception as e:
print(e)
@click.command("translate")
@input_argument
@output_option
def translate(input, output):
"""Process a table to translate values."""
df = pd.read_csv(input, sep="\t")
translated_df = translate_profile(df)
translated_df.to_csv(output, sep="\t", index=False)


@click.command("statistics")
@click.command()
@input_argument
@click.option("--oak-adapter", type=str, help="Oak handle string.")
@click.option("--language-code", type=str, help="ISO code for the target translation language.")
@click.option(
"--translation-profile",
"-t",
metavar="PATH",
required=True,
help="Path to translation profile.",
type=Path,
"--term-list",
type=click.Path(exists=True),
help="Path to file containing term ids to be translated.",
)
@click.option("--field", multiple=True, type=str, help="Fields to be translated.")
@output_option
def prepare_translation(input, oak_adapter, language_code, term_list, field, output):
"""Translate ontology fields based on the specified language code."""
ontology = get_adapter(oak_adapter)
if input:
df_babelon = pd.read_csv(input, sep="\t")
else:
df_babelon = None

terms = None
if term_list:
with open(term_list, "r") as file:
lines = file.readlines()
terms = [line.strip() for line in lines]

output_profile = prepare_translation_for_ontology(
ontology=ontology,
language_code=language_code,
df_babelon=df_babelon,
terms=terms,
fields=field,
)
output_profile.to_csv(output, sep="\t", index=False)


@click.command("statistics")
@input_argument
def statistics_translation_profile_command(
translation_profile: Path,
input: Path,
):
"""Takes as an input a babelon profile (TSV) and returns some basic stats:
"""Take as an input a babelon profile (TSV) and returns some basic stats.

number of translations by source_language, target_language
number of translations by source_language, target_language, predicate_id
number of translations by source_language, target_language, translation_status
Args:
translation_profile (Path): translation profile
input (Path): translation profile
"""
statistics_translation_profile(translation_profile)
statistics_translation_profile(input)


@click.command("update-translation-profile")
@click.option(
"--translation-profile",
"-t",
metavar="PATH",
required=True,
help="Path to translation profile.",
type=Path,
)
@click.option(
"--ontology-file",
"-o",
metavar="PATH",
required=True,
help="Path to ontology file.",
type=Path,
)
@click.option(
"--output",
"-o",
metavar="PATH",
required=True,
help="Path where updated profile will be written.",
type=Path,
)
def update_translation_profile_command(
translation_profile: Path,
ontology_file: Path,
output: Path,
):
"""Write update_translation_profile to TSV file.

Args:
translation_profile (Path): Path to the translation profile
ontology_file (Path): Path to the ontology file
output (Path): Path to the output TSV file
"""
update_translation_profile(translation_profile, ontology_file, output)
@click.command("example")
@input_argument
def example(input):
"""Generate an example babelon file for user."""
data = [
{
"source_language": "en",
"source_value": "Fever",
"subject_id": "HP:0001945",
"predicate_id": "rdfs:label",
"translation_language": "de",
"translation_value": "",
"translation_status": "NOT_TRANSLATED",
},
{
"source_language": "en",
"source_value": "Stroke",
"subject_id": "HP:0001297",
"predicate_id": "rdfs:label",
"translation_language": "de",
"translation_value": "",
"translation_status": "NOT_TRANSLATED",
},
]
df = pd.DataFrame(data)

if input:
df.to_csv(input, sep="\t", index=False)
else:
click.echo(df.to_string(index=False))


babelon.add_command(parse)
babelon.add_command(update_translation_profile_command)
babelon.add_command(prepare_translation)
babelon.add_command(statistics_translation_profile_command)
babelon.add_command(example)
babelon.add_command(translate)
Loading
Loading