wip: refactor translation detection in survey.py

- for the purposes of _generate_static_instances, it seems like what matters is whether the choice list being processed is multi-language or not. The decision needs to be per choice list because otherwise choice lists with only default values would show "-" in every language except the default. The alternative taken here is to emit it as a non-translated list so that every language would see the same labels or media. - to help differentiate choice from question translations, dict subtypes are introduced, rather than relying on matching the list_name (it may not make much difference, but that info is available to use). - todos: - failing test related to JSON conversion, where the itext choice references in the secondary instances go missing vs. xml output. - test assertions for case(s) with translations + media
XLSForm · Nov 16, 2023 · 152527e · 152527e
1 parent 19fab39
commit 152527e
Show file tree

Hide file tree

Showing 3 changed files with 134 additions and 46 deletions.
diff --git a/pyxform/survey.py b/pyxform/survey.py
@@ -182,6 +182,22 @@ def is_label_dynamic(label: str) -> bool:
         return False
 
 
+class Translation(dict):
+    __slots__ = ()
+
+
+class QuestionTranslation(Translation):
+    pass
+
+
+class ChoiceTranslation(Translation):
+    pass
+
+
+def recursive_dict():
+    return defaultdict(recursive_dict)
+
+
 class Survey(Section):
     """
     Survey class - represents the full XForm XML.
@@ -205,7 +221,7 @@ class Survey(Section):
             constants.COMPACT_DELIMITER: str,
             "file_name": str,
             "default_language": str,
-            "_translations": dict,
+            "_translations": recursive_dict,
             "submission_url": str,
             "auto_send": str,
             "auto_delete": str,
@@ -306,18 +322,21 @@ def get_setvalues_for_question_name(self, question_name):
 
     def _generate_static_instances(self, list_name, choice_list) -> InstanceInfo:
         """
-        Generates <instance> elements for static data
-        (e.g. choices for select type questions)
-
-        Note that per commit message 0578242 and in xls2json.py R539, an
-        instance is only output for select items defined in the choices sheet
-        when the item has a choice_filter, and it is that way for backwards
-        compatibility.
+        Generate <instance> elements for static data (e.g. choices for selects)
         """
         instance_element_list = []
-        multi_language = self.is_multi_language()
         has_media = bool(choice_list[0].get("media"))
         has_dyn_label = has_dynamic_label(choice_list)
+        multi_language = False
+        if isinstance(self._translations, dict):
+            choices = tuple(
+                k
+                for items in self._translations.values()
+                for k, v in items.items()
+                if isinstance(v, ChoiceTranslation) and k.split("-")[0] == list_name
+            )
+            if 0 < len(choices):
+                multi_language = True
 
         for idx, choice in enumerate(choice_list):
             choice_element_list = []
@@ -656,24 +675,16 @@ def xml_instance(self, **kwargs):
 
     def _add_to_nested_dict(self, dicty, path, value):
         if len(path) == 1:
-            dicty[path[0]] = value
+            key = path[0]
+            if key in dicty and isinstance(dicty[key], dict) and isinstance(value, dict):
+                dicty[key].update(value)
+            else:
+                dicty[key] = value
             return
         if path[0] not in dicty:
             dicty[path[0]] = {}
         self._add_to_nested_dict(dicty[path[0]], path[1:], value)
 
-    def is_multi_language(self) -> bool:
-        translations = self.get("_translations")
-        if translations is not None and isinstance(translations, dict):
-            count = len(translations)
-            if constants.DEFAULT_LANGUAGE_VALUE in translations:
-                if 1 < count:
-                    return True  # Default present and one or more other langs
-            else:
-                if 0 < count:
-                    return True  # Default not present and one or more langs
-        return False
-
     def _setup_translations(self):
         """
         set up the self._translations dict which will be referenced in the
@@ -682,18 +693,17 @@ def _setup_translations(self):
 
         def _setup_choice_translations(
             name, choice_value, itext_id
-        ) -> Generator[Tuple[Tuple[str, str, str], str], None, None]:
+        ) -> Generator[Tuple[List[str], str], None, None]:
             for media_or_lang, value in choice_value.items():  # noqa
                 if isinstance(value, dict):
                     for language, val in value.items():
-                        yield ((language, itext_id, media_or_lang), val)
+                        yield ([language, itext_id, media_or_lang], val)
                 else:
                     if name == constants.MEDIA:
-                        yield ((self.default_language, itext_id, media_or_lang), value)
+                        yield ([self.default_language, itext_id, media_or_lang], value)
                     else:
-                        yield ((media_or_lang, itext_id, "long"), value)
+                        yield ([media_or_lang, itext_id, "long"], value)
 
-        self._translations = defaultdict(dict)  # pylint: disable=W0201
         itemsets_multi_language = set()
         itemsets_has_media = set()
         itemsets_has_dyn_label = set()
@@ -732,7 +742,9 @@ def get_choices():
                         )
 
         for path, value in get_choices():
-            self._add_to_nested_dict(self._translations, path, value)
+            last_path = path.pop()
+            leaf_value = ChoiceTranslation({last_path: value})
+            self._add_to_nested_dict(self._translations, path, leaf_value)
 
         select_types = set(aliases.select.keys())
         for element in self.iter_descendants():
@@ -758,33 +770,35 @@ def get_choices():
 
                     self._translations[d["lang"]][translation_path] = self._translations[
                         d["lang"]
-                    ].get(translation_path, {})
+                    ].get(translation_path, QuestionTranslation())
 
                     self._translations[d["lang"]][translation_path].update(
                         {
                             form: {
                                 "text": d["text"],
                                 "output_context": d["output_context"],
-                            }
+                            },
                         }
                     )
 
     def _add_empty_translations(self):
         """
-        Adds translations so that every itext element has the same elements \
-        accross every language.
-        When translations are not provided "-" will be used.
+        Adds translations so that every itext element has the same elements across every
+        language. When translations are not provided "-" will be used.
         This disables any of the default_language fallback functionality.
         """
         paths = {}
         for lang, translation in self._translations.items():
             for path, content in translation.items():
-                paths[path] = paths.get(path, set()).union(content.keys())
+                existing_path = paths.get(path, set())
+                if isinstance(existing_path, tuple):
+                    existing_path = existing_path[0]
+                paths[path] = (existing_path.union(content.keys()), type(content))
 
         for lang, translation in self._translations.items():
-            for path, content_types in paths.items():
+            for path, (content_types, translation_type) in paths.items():
                 if path not in self._translations[lang]:
-                    self._translations[lang][path] = {}
+                    self._translations[lang][path] = translation_type()
                 for content_type in content_types:
                     if content_type not in self._translations[lang][path]:
                         self._translations[lang][path][content_type] = "-"
@@ -841,9 +855,6 @@ def _set_up_media_translations(media_dict, translation_key):
 
                     translations_trans_key[media_type] = media
 
-        if not self._translations:
-            self._translations = defaultdict(dict)  # pylint: disable=W0201
-
         for survey_element in self.iter_descendants():
             # Skip set up of media for choices in selects. Translations for their media
             # content should have been set up in _setup_translations, with one copy of

diff --git a/tests/test_translations.py b/tests/test_translations.py
@@ -416,7 +416,7 @@ def run(name):
             ):
                 run(name=f"questions={count}, without check (seconds):")
 
-    def test_translation_detection__survey_columns_present(self):
+    def test_translation_detection__survey_and_choices_columns_present(self):
         """Should identify that the survey is multi-language when first row(s) empty."""
         md = """
         | survey  |                |       |            |            |
@@ -431,12 +431,26 @@ def test_translation_detection__survey_columns_present(self):
         """
         self.assertPyxformXform(
             md=md,
-            debug=True,
-            # TODO: assertions
+            xml__xpath_match=[
+                xpq.body_select1_itemset("f"),
+                xpq.body_label_inline("select1", "f", "f"),
+                xpq.body_select1_itemset("q1"),
+                xpq.body_label_itext("select1", "q1"),
+                xpq.model_itext_label("q1", "Question 1", DEFAULT_LANG),
+                xpq.model_itext_label("q1", "Question A", "en"),
+                xpq.model_itext_label("q1", "-", "fr"),
+                xpc.model_instance_choices_label("c0", (("n", "l"),)),
+                xpc.model_instance_choices_itext("c1", ("na", "nb")),
+                xpc.model_itext_choice_text_label_by_pos(
+                    DEFAULT_LANG, "c1", ("la", "lb")
+                ),
+                xpc.model_itext_choice_text_label_by_pos("en", "c1", ("-", "lb-e")),
+                xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "lb-f")),
+            ],
         )
 
     def test_translation_detection__survey_columns_not_present(self):
-        """Should identify that the survey is multi-language when first row(s) empty."""
+        """Should identify that the survey is multi-language when only choices translated."""
         md = """
         | survey  |                |       |            |
         |         | type           | name  | label      |
@@ -450,8 +464,46 @@ def test_translation_detection__survey_columns_not_present(self):
         """
         self.assertPyxformXform(
             md=md,
-            debug=True,
-            # TODO: assertions
+            xml__xpath_match=[
+                xpq.body_select1_itemset("f"),
+                xpq.body_label_inline("select1", "f", "f"),
+                xpq.body_select1_itemset("q1"),
+                xpq.body_label_inline("select1", "q1", "Question 1"),
+                xpc.model_instance_choices_label("c0", (("n", "l"),)),
+                xpc.model_instance_choices_itext("c1", ("na", "nb")),
+                xpc.model_itext_choice_text_label_by_pos(
+                    DEFAULT_LANG, "c1", ("la", "lb")
+                ),
+                xpc.model_itext_choice_text_label_by_pos("en", "c1", ("-", "lb-e")),
+                xpc.model_itext_choice_text_label_by_pos("fr", "c1", ("-", "lb-f")),
+            ],
+        )
+
+    def test_translation_detection__only_survey_columns_present(self):
+        """Should identify that the survey is multi-language when first row(s) empty."""
+        md = """
+        | survey  |                |       |            |            |
+        |         | type           | name  | label      | label::en  |
+        |         | select_one c0  | f     | f          |            |
+        |         | select_one c1  | q1    | Question 1 | Question A |
+        | choices |           |      |        |
+        |         | list name | name | label  |
+        |         | c0        | n    | l      |
+        |         | c1        | na   | la     |
+        |         | c1        | nb   | lb     |
+        """
+        self.assertPyxformXform(
+            md=md,
+            xml__xpath_match=[
+                xpq.body_select1_itemset("f"),
+                xpq.body_label_inline("select1", "f", "f"),
+                xpq.body_select1_itemset("q1"),
+                xpq.body_label_itext("select1", "q1"),
+                xpq.model_itext_label("q1", "Question 1", DEFAULT_LANG),
+                xpq.model_itext_label("q1", "Question A", "en"),
+                xpc.model_instance_choices_label("c0", (("n", "l"),)),
+                xpc.model_instance_choices_label("c1", (("na", "la"), ("nb", "lb"))),
+            ],
         )
 
     def test_translation_detection__survey_columns_present_with_media(self):

diff --git a/tests/xpath_helpers/questions.py b/tests/xpath_helpers/questions.py
@@ -1,6 +1,6 @@
 class XPathHelper:
     """
-    XPath expressions for choices assertions.
+    XPath expressions for questions assertions.
     """
 
     @staticmethod
@@ -20,6 +20,31 @@ def model_instance_bind(q_name: str, _type: str):
           ]
         """
 
+    @staticmethod
+    def model_itext_label(q_name: str, q_label: str, lang: str):
+        """Model itext contains the question label."""
+        return f"""
+        /h:html/h:head/x:model/x:itext/x:translation[@lang='{lang}']
+          /x:text[@id='/test_name/{q_name}:label']
+          /x:value[not(@form) and text()='{q_label}']
+        """
+
+    @staticmethod
+    def body_label_inline(q_type: str, q_name: str, q_label: str):
+        """Body element contains the question label."""
+        return f"""
+        /h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}']
+          /x:label[not(@ref) and text()='{q_label}']
+        """
+
+    @staticmethod
+    def body_label_itext(q_type: str, q_name: str):
+        """Body element references itext for the question label."""
+        return f"""
+        /h:html/h:body/x:{q_type}[@ref='/test_name/{q_name}']
+          /x:label[@ref="jr:itext('/test_name/{q_name}:label')" and not(text())]
+        """
+
     @staticmethod
     def body_select1_itemset(q_name: str):
         """Body has a select1 with an itemset, and no inline items."""