Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More performance improvements #743

Merged
merged 9 commits into from
Dec 24, 2024
50 changes: 49 additions & 1 deletion pyxform/question.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""

import os.path
from collections.abc import Iterable
from collections.abc import Callable, Generator, Iterable
from itertools import chain
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -391,6 +391,22 @@ def validate(self):
for child in self.children:
child.validate()

def iter_descendants(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe this implementation could go in some sort of shared place? Everyone's favorite, a util file? I'm worried about these identical implementations drifting apart...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree 3x copy/pastas = probably time for a shared func. I didn't do that here because of the points noted in #745. In which case the only copy of this method override for iter_descendants would be MultipleChoice.

self,
condition: Callable[["SurveyElement"], bool] | None = None,
iter_into_section_items: bool = False,
) -> Generator["SurveyElement", None, None]:
if condition is None:
yield self
elif condition(self):
yield self
if iter_into_section_items and self.children:
for e in self.children:
yield from e.iter_descendants(
condition=condition,
iter_into_section_items=iter_into_section_items,
)

def build_xml(self, survey: "Survey"):
if self.bind["type"] not in {"string", "odk:rank"}:
raise PyXFormError("""Invalid value for `self.bind["type"]`.""")
Expand Down Expand Up @@ -514,6 +530,22 @@ def __init__(self, name: str, label: str | dict | None = None, **kwargs):
)
super().__init__(name=name, label=label, **kwargs)

def iter_descendants(
self,
condition: Callable[["SurveyElement"], bool] | None = None,
iter_into_section_items: bool = False,
) -> Generator["SurveyElement", None, None]:
if condition is None:
yield self
elif condition(self):
yield self
if iter_into_section_items and self.children:
for e in self.children:
yield from e.iter_descendants(
condition=condition,
iter_into_section_items=iter_into_section_items,
)

def xml(self, survey: "Survey"):
result = node("tag", key=self.name)
result.appendChild(self.xml_label(survey=survey))
Expand Down Expand Up @@ -548,6 +580,22 @@ def __init__(self, **kwargs):

super().__init__(**kwargs)

def iter_descendants(
self,
condition: Callable[["SurveyElement"], bool] | None = None,
iter_into_section_items: bool = False,
) -> Generator["SurveyElement", None, None]:
if condition is None:
yield self
elif condition(self):
yield self
if iter_into_section_items and self.children:
for e in self.children:
yield from e.iter_descendants(
condition=condition,
iter_into_section_items=iter_into_section_items,
)

def build_xml(self, survey: "Survey"):
control_dict = self.control
control_dict["ref"] = self.get_xpath()
Expand Down
18 changes: 17 additions & 1 deletion pyxform/section.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Section survey element module.
"""

from collections.abc import Generator, Iterable
from collections.abc import Callable, Generator, Iterable
from itertools import chain
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -78,6 +78,22 @@ def validate(self):
element.validate()
self._validate_uniqueness_of_element_names()

def iter_descendants(
self,
condition: Callable[["SurveyElement"], bool] | None = None,
iter_into_section_items: bool = False,
) -> Generator["SurveyElement", None, None]:
if condition is None:
yield self
elif condition(self):
yield self
if self.children:
for e in self.children:
yield from e.iter_descendants(
condition=condition,
iter_into_section_items=iter_into_section_items,
)

# there's a stronger test of this when creating the xpath
# dictionary for a survey.
def _validate_uniqueness_of_element_names(self):
Expand Down
22 changes: 10 additions & 12 deletions pyxform/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from pyxform import aliases, constants
from pyxform.constants import EXTERNAL_INSTANCE_EXTENSIONS, NSMAP
from pyxform.entities.entity_declaration import EntityDeclaration
from pyxform.errors import PyXFormError, ValidationError
from pyxform.external_instance import ExternalInstance
from pyxform.instance import SurveyInstance
Expand Down Expand Up @@ -239,7 +238,7 @@ def __init__(self, **kwargs):
self._created: datetime.now = datetime.now()
self._search_lists: set = set()
self._translations: recursive_dict = recursive_dict()
self._xpath: dict[str, SurveyElement | None] = {}
self._xpath: dict[str, Section | Question | None] = {}

# Structure
# attribute is for custom instance attrs from settings e.g. attribute::abc:xyz
Expand Down Expand Up @@ -1027,17 +1026,15 @@ def _set_up_media_translations(media_dict, translation_key):

translations_trans_key[media_type] = media

for survey_element in self.iter_descendants(
condition=lambda i: not isinstance(
i, Survey | EntityDeclaration | ExternalInstance | Tag | Option
)
for item in self.iter_descendants(
condition=lambda i: isinstance(i, Section | Question)
):
# Skip set up of media for choices in selects. Translations for their media
# content should have been set up in _setup_translations, with one copy of
# each choice translation per language (after _add_empty_translations).
media_dict = survey_element.get("media")
if isinstance(media_dict, dict) and 0 < len(media_dict):
translation_key = survey_element.get_xpath() + ":label"
media_dict = item.media
if isinstance(media_dict, dict) and media_dict:
translation_key = f"{item.get_xpath()}:label"
_set_up_media_translations(media_dict, translation_key)

def itext(self) -> DetachableElement:
Expand Down Expand Up @@ -1137,10 +1134,11 @@ def __unicode__(self):

def _setup_xpath_dictionary(self):
for element in self.iter_descendants(lambda i: isinstance(i, Question | Section)):
if element.name in self._xpath:
self._xpath[element.name] = None
element_name = element.name
if element_name in self._xpath:
self._xpath[element_name] = None
else:
self._xpath[element.name] = element
self._xpath[element_name] = element

def _var_repl_function(
self, matchobj, context, use_current=False, reference_parent=False
Expand Down
24 changes: 12 additions & 12 deletions pyxform/survey_element.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,24 +141,24 @@ def validate(self):
f"The name '{self.name}' contains an invalid character '{invalid_char.group(0)}'. Names {const.XML_IDENTIFIER_ERROR_MESSAGE}"
)

# TODO: Make sure renaming this doesn't cause any problems
def iter_descendants(
self, condition: Callable[["SurveyElement"], bool] | None = None
self,
condition: Callable[["SurveyElement"], bool] | None = None,
iter_into_section_items: bool = False,
) -> Generator["SurveyElement", None, None]:
"""
Get each of self.children.
Iterate the object, and it's children (if applicable).

:param condition: If this evaluates to True, yield the element.
:param condition: If provided, the element will only be returned if this callable
evaluates to True. Can be used to filter by class/type or other properties.
:param iter_into_section_items: If False, only iterate into the children of
sections (survey or group), e.g. to get Sections, Questions, etc. If True, also
iterate into the children of those children, e.g. to get Options and Tags.
"""
# it really seems like this method should not yield self
if condition is not None:
if condition(self):
yield self
else:
if condition is None:
yield self
elif condition(self):
yield self
if hasattr(self, const.CHILDREN) and self.children is not None:
for e in self.children:
yield from e.iter_descendants(condition=condition)

def iter_ancestors(
self, condition: Callable[["SurveyElement"], bool] | None = None
Expand Down
8 changes: 4 additions & 4 deletions pyxform/xls2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
import re
import sys
from itertools import chain
from typing import IO, Any

from pyxform import aliases, constants
Expand Down Expand Up @@ -55,9 +56,9 @@ def merge_dicts(dict_a, dict_b, default_key="default"):
a recursive call to this function,
otherwise they are just added to the output dict.
"""
if dict_a is None or dict_a == {}:
if not dict_a:
return dict_b
if dict_b is None or dict_b == {}:
if not dict_b:
return dict_a

if not isinstance(dict_a, dict):
Expand All @@ -71,8 +72,7 @@ def merge_dicts(dict_a, dict_b, default_key="default"):

# Union keys but retain order (as opposed to set()), preferencing dict_a then dict_b.
# E.g. {"a": 1, "b": 2} + {"c": 3, "a": 4} -> {"a": None, "b": None, "c": None}
all_keys = {k: None for k in dict_a.keys()}
all_keys.update({k: None for k in dict_b.keys()})
all_keys = {k: None for k in (chain(dict_a.keys(), dict_b.keys()))}

out_dict = {}
for key in all_keys.keys():
Expand Down