Skip to content

Commit

Permalink
test: add alignment tests and improve coverage for tranducers
Browse files Browse the repository at this point in the history
  • Loading branch information
dhdaines committed Mar 28, 2023
1 parent 126aa83 commit 76f85dd
Show file tree
Hide file tree
Showing 5 changed files with 235 additions and 58 deletions.
71 changes: 71 additions & 0 deletions g2p/tests/test_indices.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,11 @@ class IndicesTest(TestCase):
((1, 'b'), (0, '')),
((1, 'c'), (0, '')) ]
Test case # 11
# Sort of an insertion test (empty inputs are not allowed)
Test case # 12
# Verify that empty inputs are not allowed
"""

def __init__(self, *args):
Expand Down Expand Up @@ -370,10 +375,14 @@ def test_wacky(self):
self.assertEqual(
transducer_lite.edges, [(0, 4), (1, 0), (2, 1), (2, 2), (3, 3)]
)
self.assertEqual(transducer_lite.alignments(), [("abcc", "ccccc")])
self.assertEqual(
transducer_lite_extra.edges,
[(0, 4), (1, 0), (2, 1), (2, 2), (3, 3), (4, 5)],
)
self.assertEqual(
transducer_lite_extra.alignments(), [("abcc", "ccccc"), ("a", "a")]
)
transducer_no_i = self.trans_wacky("\U0001f600\U0001f603\U0001f604\U0001f604")
self.assertEqual(
transducer_no_i.output_string,
Expand All @@ -385,76 +394,132 @@ def test_wacky(self):
"\U0001f604\U0001f604\U0001f604\U0001f604\U0001f604",
)
self.assertEqual(transducer.edges, [(0, 4), (1, 0), (2, 1), (2, 2), (3, 3)])
self.assertEqual(
transducer.alignments(),
[
(
"\U0001f600\U0001f603\U0001f604\U0001f604",
"\U0001f604\U0001f604\U0001f604\U0001f604\U0001f604",
)
],
)

def test_circum(self):
"""Test circumfixing"""
transducer = self.trans_circum("ac")
self.assertEqual(transducer.output_string, "cac")
self.assertEqual(transducer.edges, [(0, 1), (1, 0), (1, 2)])
self.assertEqual(transducer.alignments(), [("ac", "cac")])

def test_case_one(self):
"""Test case one"""
transducer = self.trans_one("test")
self.assertEqual(transducer.output_string, "pest")
self.assertEqual(transducer.edges, [(0, 0), (1, 1), (2, 2), (3, 3)])
self.assertEqual(
transducer.alignments(), [("t", "p"), ("e", "e"), ("s", "s"), ("t", "t")]
)

def test_case_two(self):
transducer = self.trans_two("test")
self.assertEqual(transducer.output_string, "tst")
self.assertEqual(transducer.edges, [(0, 0), (1, 0), (2, 1), (3, 2)])
self.assertEqual(transducer.alignments(), [("te", "t"), ("s", "s"), ("t", "t")])

def test_case_three(self):
transducer = self.trans_three("test")
self.assertEqual(transducer.output_string, "chest")
self.assertEqual(transducer.edges, [(0, 0), (0, 1), (1, 2), (2, 3), (3, 4)])
self.assertEqual(
transducer.alignments(), [("t", "ch"), ("e", "e"), ("s", "s"), ("t", "t")]
)

def test_case_four(self):
transducer = self.trans_four("test")
self.assertEqual(transducer.output_string, "pst")
self.assertEqual(transducer.edges, [(0, 0), (1, 0), (2, 1), (3, 2)])
self.assertEqual(transducer.alignments(), [("te", "p"), ("s", "s"), ("t", "t")])

def test_case_six(self):
transducer = self.trans_six("test")
self.assertEqual(transducer.output_string, "tset")
self.assertEqual(transducer.edges, [(0, 0), (1, 2), (2, 1), (3, 3)])
self.assertEqual(
transducer.alignments(), [("t", "t"), ("es", "se"), ("t", "t")]
)

def test_case_long_six(self):
transducer = self.trans_six("esesse")
self.assertEqual(transducer.output_string, "sesese")
# Ensure that *minimal* monotonic segments are output
self.assertEqual(
transducer.alignments(),
[("es", "se"), ("es", "se"), ("s", "s"), ("e", "e")],
)

def test_case_seven(self):
transducer_as_written = self.test_seven_as_written("test")
self.assertEqual(transducer_as_written.output_string, "test")
self.assertEqual(transducer_as_written.edges, [(0, 0), (1, 1), (2, 2), (3, 3)])
self.assertEqual(
transducer_as_written.alignments(),
[("t", "t"), ("e", "e"), ("s", "s"), ("t", "t")],
)
transducer = self.trans_seven("test")
self.assertEqual(transducer.output_string, "tesht")
self.assertEqual(transducer.edges, [(0, 0), (1, 1), (2, 2), (2, 3), (3, 4)])
self.assertEqual(
transducer.alignments(), [("t", "t"), ("e", "e"), ("s", "sh"), ("t", "t")]
)

def test_case_eight(self):
transducer = self.trans_eight("test")
self.assertEqual(transducer.output_string, "chess")
self.assertEqual(transducer.edges, [(0, 0), (1, 1), (1, 2), (2, 3), (3, 4)])
self.assertEqual(
transducer.alignments(), [("t", "c"), ("e", "he"), ("s", "s"), ("t", "s")]
)

def test_case_nine(self):
transducer = self.trans_nine("aa")
self.assertEqual(transducer.output_string, "")
self.assertEqual(transducer.edges, [(0, None), (1, None)])
# Support deletions in alignments
self.assertEqual(transducer.alignments(), [("aa", "")])

def test_case_ten(self):
transducer = self.trans_ten("abc")
self.assertEqual(transducer.output_string, "a")
self.assertEqual(transducer.edges, [(0, 0), (1, 0), (2, 0)])
self.assertEqual(transducer.alignments(), [("abc", "a")])

def test_case_eleven(self):
transducer = self.trans_eleven("a")
self.assertEqual(transducer.output_string, "aaaa")
self.assertEqual(transducer.edges, [(0, 0), (0, 1), (0, 2), (0, 3)])
self.assertEqual(transducer.alignments(), [("a", "aaaa")])

def test_case_twelve(self):
# Empty inputs are not allowed (should it actually throw an exception?)
with self.assertLogs() as cm:
self.test_mapping_twelve = Mapping(
[{"in": "", "out": "aa", "context_before": "b"}]
)
self.trans_twelve = Transducer(self.test_mapping_twelve)
transducer = self.trans_twelve("b")
self.assertIn(
"disallowed",
cm.output[0],
"it should warn that empty inputs are disallowed",
)
self.assertEqual(transducer.output_string, "b")

def test_case_acdc(self):
transducer = Transducer(Mapping([{"in": "a{1}c{2}", "out": "c{2}a{1}c{2}"}]))
tg = transducer("acdc")
self.assertEqual(tg.output_string, "cacdc")
self.assertEqual(tg.edges, [(0, 1), (1, 0), (1, 2), (2, 3), (3, 4)])
self.assertEqual(tg.alignments(), [("ac", "cac"), ("d", "d"), ("c", "c")])

def test_case_acac(self):
transducer = Transducer(Mapping([{"in": "ab{1}c{2}", "out": "ab{2}"}]))
Expand All @@ -476,6 +541,7 @@ def test_case_acac(self):
(5, 3),
],
)
self.assertEqual(tg.alignments(), [("abcab", "ab"), ("c", "ab")])
tg_default = transducer_default("abcabc")
self.assertEqual(tg_default.output_string, "abab")
self.assertEqual(
Expand All @@ -491,6 +557,7 @@ def test_case_acac(self):
(5, 3),
],
)
self.assertEqual(tg_default.alignments(), [("abcab", "ab"), ("c", "ab")])

def test_arpabet(self):
transducer = Transducer(
Expand Down Expand Up @@ -518,6 +585,7 @@ def test_arpabet(self):
(1, 9),
],
)
self.assertEqual(tg.alignments(), [("ĩ", "IY N "), ("ĩ", "IY N ")])
self.assertEqual(
tg_nfd.edges,
[
Expand All @@ -533,6 +601,9 @@ def test_arpabet(self):
(3, 9),
],
)
self.assertEqual(
tg_nfd.alignments(), [("i", "I"), ("̃", "Y N "), ("i", "I"), ("̃", "Y N ")]
)


if __name__ == "__main__":
Expand Down
2 changes: 1 addition & 1 deletion g2p/tests/test_lexicon_transducer.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_eng_lexicon(self):
pe = tg.pretty_edges()
self.assertEqual(
pe,
[["x", "ɛ"], ["x", "k"], ["x", "s"], ["t", "t"], ["r", "ɹ"], ["a", "ʌ"]],
[("x", "ɛ"), ("x", "k"), ("x", "s"), ("t", "t"), ("r", "ɹ"), ("a", "ʌ")],
)

def test_eng_transducer(self):
Expand Down
3 changes: 3 additions & 0 deletions g2p/tests/test_tokenize_and_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ def test_tok_and_map_mic(self):
def test_tokenizing_transducer(self):
ref_word_ipa = g2p.make_g2p("mic", "mic-ipa")("sq").output_string
transducer = g2p.make_g2p("mic", "mic-ipa", tok_lang="mic")
self.assertEqual(transducer.transducer.in_lang, transducer.in_lang)
self.assertEqual(transducer.transducer.out_lang, transducer.out_lang)
self.assertEqual(transducer.transducer, transducer.transducers[0])
word_ipa = transducer("sq").output_string
self.assertEqual(word_ipa, ref_word_ipa)
string_ipa = transducer(self.contextualize("sq")).output_string
Expand Down
97 changes: 92 additions & 5 deletions g2p/tests/test_transducer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,12 @@ class TransducerTest(TestCase):
@classmethod
def setUpClass(cls):
cls.test_mapping_moh = Mapping(in_lang="moh-equiv", out_lang="moh-ipa")
cls.test_mapping = Mapping([{"in": "a", "out": "b"}])
cls.test_mapping_rev = Mapping([{"in": "a", "out": "b"}], reverse=True)
cls.test_mapping = Mapping(
[{"in": "a", "out": "b"}], in_lang="spam", out_lang="eggs"
)
cls.test_mapping_rev = Mapping(
[{"in": "a", "out": "b"}], reverse=True, in_lang="eggs", out_lang="parrot"
)
cls.test_mapping_ordered_feed = Mapping(
[{"in": "a", "out": "b"}, {"in": "b", "out": "c"}]
)
Expand Down Expand Up @@ -69,10 +73,91 @@ def setUpClass(cls):
Mapping(os.path.join(PUBLIC_DIR, "mappings", "deletion_config_json.yaml"))
)

def test_properties(self):
"""Test all the basic properties of transducers."""
self.assertEqual("spam", self.test_trans.in_lang)
self.assertEqual("eggs", self.test_trans.out_lang)
self.assertEqual([self.test_trans], self.test_trans.transducers)
self.assertEqual(
[self.test_trans, self.test_trans_rev],
self.test_trans_composite.transducers,
)
self.assertEqual("spam", self.test_trans_composite.in_lang)
self.assertEqual("parrot", self.test_trans_composite.out_lang)

def test_graph_properties(self):
"""Test all the basic properties of graphs."""
tg = self.test_trans("abab")
self.assertEqual("abab", tg.input_string)
self.assertEqual("bbbb", tg.output_string)
self.assertEqual(1, len(tg.tiers))
self.assertEqual([(0, "a"), (1, "b"), (2, "a"), (3, "b")], tg.input_nodes)
self.assertEqual([(0, "b"), (1, "b"), (2, "b"), (3, "b")], tg.output_nodes)
self.assertEqual([(0, 0), (1, 1), (2, 2), (3, 3)], tg.edges)
self.assertEqual(
[[("a", "b"), ("b", "b"), ("a", "b"), ("b", "b")]], tg.pretty_edges()
)
self.assertEqual(1, len(tg.debugger))
self.assertEqual(2, len(tg.debugger[0]))
tg.input_string = "bbbb"
self.assertEqual([(0, "b"), (1, "b"), (2, "b"), (3, "b")], tg.input_nodes)
tg.output_string = "baba"
self.assertEqual([(0, "b"), (1, "a"), (2, "b"), (3, "a")], tg.output_nodes)
tg.edges = [(0, 1), (1, 0), (2, 3), (3, 2)]
self.assertEqual([(0, 1), (1, 0), (2, 3), (3, 2)], tg.edges)
tg.debugger = [["spam", "spam", "spam", "spam"]]
self.assertEqual(1, len(tg.debugger))
self.assertEqual(4, len(tg.debugger[0]))
with self.assertRaises(ValueError):
tg.input_nodes = ("foo", "bar", "baz")
with self.assertRaises(ValueError):
tg.output_nodes = ("foo", "bar", "baz")
with self.assertRaises(ValueError):
tg.tiers = ["spam", "spam", "eggs", "spam"]
tg = self.test_trans("abab")
tg += tg
self.assertEqual("abababab", tg.input_string)
self.assertEqual("bbbbbbbb", tg.output_string)

def test_composite_graph_properties(self):
"""Test all the basic properties of composite graphs."""
ctg = self.test_trans_composite("aba")
self.assertEqual("aba", ctg.input_string)
self.assertEqual("aaa", ctg.output_string)
self.assertEqual(2, len(ctg.tiers))
self.assertEqual([(0, "a"), (1, "b"), (2, "a")], ctg.input_nodes)
self.assertEqual([(0, "a"), (1, "a"), (2, "a")], ctg.output_nodes)
self.assertEqual([(0, 0), (1, 1), (2, 2)], ctg.edges)
self.assertEqual(
[
[("a", "b"), ("b", "b"), ("a", "b")],
[("b", "a"), ("b", "a"), ("b", "a")],
],
ctg.pretty_edges(),
)
self.assertEqual(len(ctg.tiers), len(ctg.debugger))
ctg.input_string = "bbbb"
self.assertEqual([(0, "b"), (1, "b"), (2, "b"), (3, "b")], ctg.input_nodes)
ctg.output_string = "baba"
self.assertEqual([(0, "b"), (1, "a"), (2, "b"), (3, "a")], ctg.output_nodes)
ctg.debugger = [["spam", "spam", "spam", "spam"]]
self.assertEqual(1, len(ctg.debugger))
self.assertEqual(4, len(ctg.debugger[0]))
with self.assertRaises(ValueError):
ctg.edges = [(0, 1), (1, 0), (2, 3), (3, 2)]
with self.assertRaises(ValueError):
ctg.input_nodes = ("foo", "bar", "baz")
with self.assertRaises(ValueError):
ctg.output_nodes = ("foo", "bar", "baz")
with self.assertRaises(ValueError):
ctg.tiers = ["spam", "spam", "eggs", "spam"]
ctg = self.test_trans_composite("aba")
ctg += ctg
self.assertEqual("abaaba", ctg.input_string)
self.assertEqual("aaaaaa", ctg.output_string)

def test_ordered(self):
transducer_i_feed = self.test_trans_ordered_feed("a")
transducer_feed = self.test_trans_ordered_feed("a")
transducer_i_counter_feed = self.test_trans_ordered_counter_feed("a")
transducer_counter_feed = self.test_trans_ordered_counter_feed("a")
# These should feed b -> c
self.assertEqual(transducer_feed.output_string, "c")
Expand Down Expand Up @@ -117,7 +202,9 @@ def test_regex_set(self):
self.assertEqual(self.test_regex_set_transducer("fa").output_string, "fb")

def test_deletion(self):
self.assertEqual(self.test_deletion_transducer("a").output_string, "")
tg = self.test_deletion_transducer("a")
self.assertEqual(tg.output_string, "")
self.assertEqual(tg.pretty_edges(), [[("a", None)]])
self.assertEqual(self.test_deletion_transducer_csv("a").output_string, "")
self.assertEqual(self.test_deletion_transducer_json("a").output_string, "")

Expand Down
Loading

0 comments on commit 76f85dd

Please sign in to comment.