Skip to content

Commit

Permalink
wip: refactor translation detection in survey.py
Browse files Browse the repository at this point in the history
- for the purposes of _generate_static_instances, it seems like what
  matters is whether the choice list being processed is multi-language
  or not. The decision needs to be per choice list because otherwise
  choice lists with only default values would show "-" in every language
  except the default. The alternative taken here is to emit it as a
  non-translated list so that every language would see the same labels
  or media.
- to help differentiate choice from question translations, dict subtypes
  are introduced, rather than relying on matching the list_name (it may
  not make much difference, but that info is available to use).
- todos:
  - failing test related to JSON conversion, where the itext choice
    references in the secondary instances go missing vs. xml output.
  - test assertions for case(s) with translations + media
  • Loading branch information
lindsay-stevens committed Nov 16, 2023
1 parent 19fab39 commit 152527e
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 46 deletions.
89 changes: 50 additions & 39 deletions pyxform/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,22 @@ def is_label_dynamic(label: str) -> bool:
return False


class Translation(dict):
__slots__ = ()


class QuestionTranslation(Translation):
pass


class ChoiceTranslation(Translation):
pass


def recursive_dict():
return defaultdict(recursive_dict)


class Survey(Section):
"""
Survey class - represents the full XForm XML.
Expand All @@ -205,7 +221,7 @@ class Survey(Section):
constants.COMPACT_DELIMITER: str,
"file_name": str,
"default_language": str,
"_translations": dict,
"_translations": recursive_dict,
"submission_url": str,
"auto_send": str,
"auto_delete": str,
Expand Down Expand Up @@ -306,18 +322,21 @@ def get_setvalues_for_question_name(self, question_name):

def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo:
"""
Generates <instance> elements for static data
(e.g. choices for select type questions)
Note that per commit message 0578242 and in xls2json.py R539, an
instance is only output for select items defined in the choices sheet
when the item has a choice_filter, and it is that way for backwards
compatibility.
Generate <instance> elements for static data (e.g. choices for selects)
"""
instance_element_list = []
multi_language = self.is_multi_language()
has_media = bool(choice_list[0].get("media"))
has_dyn_label = has_dynamic_label(choice_list)
multi_language = False
if isinstance(self._translations, dict):
choices = tuple(
k
for items in self._translations.values()
for k, v in items.items()
if isinstance(v, ChoiceTranslation) and k.split("-")[0] == list_name
)
if 0 < len(choices):
multi_language = True

for idx, choice in enumerate(choice_list):
choice_element_list = []
Expand Down Expand Up @@ -656,24 +675,16 @@ def xml_instance(self, **kwargs):

def _add_to_nested_dict(self, dicty, path, value):
if len(path) == 1:
dicty[path[0]] = value
key = path[0]
if key in dicty and isinstance(dicty[key], dict) and isinstance(value, dict):
dicty[key].update(value)
else:
dicty[key] = value
return
if path[0] not in dicty:
dicty[path[0]] = {}
self._add_to_nested_dict(dicty[path[0]], path[1:], value)

def is_multi_language(self) -> bool:
translations = self.get("_translations")
if translations is not None and isinstance(translations, dict):
count = len(translations)
if constants.DEFAULT_LANGUAGE_VALUE in translations:
if 1 < count:
return True # Default present and one or more other langs
else:
if 0 < count:
return True # Default not present and one or more langs
return False

def _setup_translations(self):
"""
set up the self._translations dict which will be referenced in the
Expand All @@ -682,18 +693,17 @@ def _setup_translations(self):

def _setup_choice_translations(
name, choice_value, itext_id
) -> Generator[Tuple[Tuple[str, str, str], str], None, None]:
) -> Generator[Tuple[List[str], str], None, None]:
for media_or_lang, value in choice_value.items(): # noqa
if isinstance(value, dict):
for language, val in value.items():
yield ((language, itext_id, media_or_lang), val)
yield ([language, itext_id, media_or_lang], val)
else:
if name == constants.MEDIA:
yield ((self.default_language, itext_id, media_or_lang), value)
yield ([self.default_language, itext_id, media_or_lang], value)
else:
yield ((media_or_lang, itext_id, "long"), value)
yield ([media_or_lang, itext_id, "long"], value)

self._translations = defaultdict(dict) # pylint: disable=W0201
itemsets_multi_language = set()
itemsets_has_media = set()
itemsets_has_dyn_label = set()
Expand Down Expand Up @@ -732,7 +742,9 @@ def get_choices():
)

for path, value in get_choices():
self._add_to_nested_dict(self._translations, path, value)
last_path = path.pop()
leaf_value = ChoiceTranslation({last_path: value})
self._add_to_nested_dict(self._translations, path, leaf_value)

select_types = set(aliases.select.keys())
for element in self.iter_descendants():
Expand All @@ -758,33 +770,35 @@ def get_choices():

self._translations[d["lang"]][translation_path] = self._translations[
d["lang"]
].get(translation_path, {})
].get(translation_path, QuestionTranslation())

self._translations[d["lang"]][translation_path].update(
{
form: {
"text": d["text"],
"output_context": d["output_context"],
}
},
}
)

def _add_empty_translations(self):
"""
Adds translations so that every itext element has the same elements \
accross every language.
When translations are not provided "-" will be used.
Adds translations so that every itext element has the same elements across every
language. When translations are not provided "-" will be used.
This disables any of the default_language fallback functionality.
"""
paths = {}
for lang, translation in self._translations.items():
for path, content in translation.items():
paths[path] = paths.get(path, set()).union(content.keys())
existing_path = paths.get(path, set())
if isinstance(existing_path, tuple):
existing_path = existing_path[0]
paths[path] = (existing_path.union(content.keys()), type(content))

for lang, translation in self._translations.items():
for path, content_types in paths.items():
for path, (content_types, translation_type) in paths.items():
if path not in self._translations[lang]:
self._translations[lang][path] = {}
self._translations[lang][path] = translation_type()
for content_type in content_types:
if content_type not in self._translations[lang][path]:
self._translations[lang][path][content_type] = "-"
Expand Down Expand Up @@ -841,9 +855,6 @@ def _set_up_media_translations(media_dict, translation_key):

translations_trans_key[media_type] = media

if not self._translations:
self._translations = defaultdict(dict) # pylint: disable=W0201

for survey_element in self.iter_descendants():
# Skip set up of media for choices in selects. Translations for their media
# content should have been set up in _setup_translations, with one copy of
Expand Down
64 changes: 58 additions & 6 deletions tests/test_translations.py
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ def run(name):
):
run(name=f"questions={count}, without check (seconds):")

def test_translation_detection__survey_columns_present(self):
def test_translation_detection__survey_and_choices_columns_present(self):
"""Should identify that the survey is multi-language when first row(s) empty."""
md = """
| survey | | | | |
Expand All @@ -431,12 +431,26 @@ def test_translation_detection__survey_columns_present(self):
"""
self.assertPyxformXform(
md=md,
debug=True,
# TODO: assertions
xml__xpath_match=[
xpq.body_select1_itemset("f"),
xpq.body_label_inline("select1", "f", "f"),
xpq.body_select1_itemset("q1"),
xpq.body_label_itext("select1", "q1"),
xpq.model_itext_label("q1", "Question 1", DEFAULT_LANG),
xpq.model_itext_label("q1", "Question A", "en"),
xpq.model_itext_label("q1", "-", "fr"),
xpc.model_instance_choices_label("c0", (("n", "l"),)),
xpc.model_instance_choices_itext("c1", ("na", "nb")),
xpc.model_itext_choice_text_label_by_pos(
DEFAULT_LANG, "c1", ("la", "lb")
),
xpc.model_itext_choice_text_label_by_pos("en", "c1", ("-", "lb-e")),
xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "lb-f")),
],
)

def test_translation_detection__survey_columns_not_present(self):
"""Should identify that the survey is multi-language when first row(s) empty."""
"""Should identify that the survey is multi-language when only choices translated."""
md = """
| survey | | | |
| | type | name | label |
Expand All @@ -450,8 +464,46 @@ def test_translation_detection__survey_columns_not_present(self):
"""
self.assertPyxformXform(
md=md,
debug=True,
# TODO: assertions
xml__xpath_match=[
xpq.body_select1_itemset("f"),
xpq.body_label_inline("select1", "f", "f"),
xpq.body_select1_itemset("q1"),
xpq.body_label_inline("select1", "q1", "Question 1"),
xpc.model_instance_choices_label("c0", (("n", "l"),)),
xpc.model_instance_choices_itext("c1", ("na", "nb")),
xpc.model_itext_choice_text_label_by_pos(
DEFAULT_LANG, "c1", ("la", "lb")
),
xpc.model_itext_choice_text_label_by_pos("en", "c1", ("-", "lb-e")),
xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "lb-f")),
],
)

def test_translation_detection__only_survey_columns_present(self):
"""Should identify that the survey is multi-language when first row(s) empty."""
md = """
| survey | | | | |
| | type | name | label | label::en |
| | select_one c0 | f | f | |
| | select_one c1 | q1 | Question 1 | Question A |
| choices | | | |
| | list name | name | label |
| | c0 | n | l |
| | c1 | na | la |
| | c1 | nb | lb |
"""
self.assertPyxformXform(
md=md,
xml__xpath_match=[
xpq.body_select1_itemset("f"),
xpq.body_label_inline("select1", "f", "f"),
xpq.body_select1_itemset("q1"),
xpq.body_label_itext("select1", "q1"),
xpq.model_itext_label("q1", "Question 1", DEFAULT_LANG),
xpq.model_itext_label("q1", "Question A", "en"),
xpc.model_instance_choices_label("c0", (("n", "l"),)),
xpc.model_instance_choices_label("c1", (("na", "la"), ("nb", "lb"))),
],
)

def test_translation_detection__survey_columns_present_with_media(self):
Expand Down
27 changes: 26 additions & 1 deletion tests/xpath_helpers/questions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
class XPathHelper:
"""
XPath expressions for choices assertions.
XPath expressions for questions assertions.
"""

@staticmethod
Expand All @@ -20,6 +20,31 @@ def model_instance_bind(q_name: str, _type: str):
]
"""

@staticmethod
def model_itext_label(q_name: str, q_label: str, lang: str):
"""Model itext contains the question label."""
return f"""
/h:html/h:head/x:model/x:itext/x:translation[@lang='{lang}']
/x:text[@id='/test_name/{q_name}:label']
/x:value[not(@form) and text()='{q_label}']
"""

@staticmethod
def body_label_inline(q_type: str, q_name: str, q_label: str):
"""Body element contains the question label."""
return f"""
/h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}']
/x:label[not(@ref) and text()='{q_label}']
"""

@staticmethod
def body_label_itext(q_type: str, q_name: str):
"""Body element references itext for the question label."""
return f"""
/h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}']
/x:label[@ref="jr:itext('/test_name/{q_name}:label')" and not(text())]
"""

@staticmethod
def body_select1_itemset(q_name: str):
"""Body has a select1 with an itemset, and no inline items."""
Expand Down

0 comments on commit 152527e

Please sign in to comment.