Skip to content

Commit

Permalink
fix: sort rules without explicit indices
Browse files Browse the repository at this point in the history
fixes: #299

Co-authored-by: Eric Joanis <eric.joanis@nrc-cnrc.gc.ca>
  • Loading branch information
roedoejet and joanise committed Nov 10, 2023
1 parent f766a66 commit d1aa6dd
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 2 deletions.
7 changes: 5 additions & 2 deletions g2p/mappings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
)

GEN_DIR = os.path.join(os.path.dirname(LANGS_FILE), "generated")
EXPLICIT_INDEX_PATTERN = re.compile(r"{\d+}")


class Mapping(_MappingModelDefinition):
Expand Down Expand Up @@ -153,7 +154,9 @@ def process_model_specs(self): # noqa: C901
self.rules = sorted(
# Temporarily normalize to NFD for heuristic sorting of NFC-defined rules
self.rules,
key=lambda x: len(normalize(x.rule_input, "NFD"))
key=lambda x: len(
normalize(re.sub(EXPLICIT_INDEX_PATTERN, "", x.rule_input), "NFD")
)
if isinstance(x, Rule)
else len(normalize(x["in"], "NFD")),
reverse=True,
Expand Down Expand Up @@ -241,7 +244,7 @@ def rule_to_regex(self, rule: Union[Rule, dict]) -> Union[Pattern, None]:
"This is disallowed. Please check your mapping file for rules with null inputs."
)
return None
input_match = re.sub(re.compile(r"{\d+}"), "", rule.rule_input)
input_match = re.sub(EXPLICIT_INDEX_PATTERN, "", rule.rule_input)
try:
inp = create_fixed_width_lookbehind(rule.context_before) + input_match
if rule.context_after:
Expand Down
7 changes: 7 additions & 0 deletions g2p/tests/test_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,13 @@ def test_rule_ordering(self):
transducer_default = Transducer(Mapping(rules=rules))
self.assertEqual(transducer_default("aa").output_string, "bb")

def test_rule_ordering_with_indices(self):
"""a{1}b{3} should be shorter than abc"""
rules = [{"in": "a{1}b{2}", "out": "x{1}x{2}"}, {"in": "abc", "out": "y"}]
mapping = Mapping(rules=rules, rule_ordering="apply-longest-first")
transducer = Transducer(mapping)
self.assertEqual(transducer("abc").output_string, "y")

def test_rule_ordering_given_invalid_value(self):
"""
It should log an error messages if given an invalid value for
Expand Down

0 comments on commit d1aa6dd

Please sign in to comment.