Skip to content

Commit

Permalink
fix(update): prevent loading all the mappings multiple times
Browse files Browse the repository at this point in the history
  • Loading branch information
roedoejet committed Sep 9, 2023
1 parent 5b259ff commit 5ccd595
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 49 deletions.
68 changes: 34 additions & 34 deletions g2p/mappings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,40 +42,6 @@
class Mapping(_MappingModelDefinition):
"""Class for lookup tables"""

@staticmethod
def find_mapping(
in_lang: Union[None, str] = None, out_lang: Union[None, str] = None
) -> "Mapping":
"""Given an input and an output language, find a mapping to get between them."""
if in_lang is None or out_lang is None:
raise exceptions.MappingMissing(in_lang, out_lang)
for mapping in MAPPINGS_AVAILABLE:
if mapping.in_lang == in_lang and mapping.out_lang == out_lang:
if mapping.type == "lexicon":
# do *not* deep copy this, because alignments are big!
return mapping.model_copy()
else:
return deepcopy(mapping)
raise exceptions.MappingMissing(in_lang, out_lang)

@staticmethod
def find_mapping_by_id(map_id: str) -> "Mapping":
"""Find the mapping with a given ID, i.e., the "id" found in the mapping, like in the "panphon_preprocessor" mapping."""
for mapping in MAPPINGS_AVAILABLE:
if mapping.id == map_id:
return deepcopy(mapping)
raise exceptions.MappingMissing(map_id, None)

@staticmethod
def load_mapping_from_path(path_to_mapping_config: Union[str, Path], index=0):
"""Loads a mapping from a path, if there is more than one mapping, then it loads based on the int
provided to the 'index' argument. Default is 0.
"""
mapping_config = MappingConfig.load_mapping_config_from_path(
path_to_mapping_config
)
return mapping_config.mappings[index]

def model_post_init(self, *args, **kwargs) -> None:
"""After the model is constructed, we process the model specs by applying all the configuration to the rules (ie prevent feeding, unicode normalization etc..)"""
if self.type == MAPPING_TYPE.mapping or self.type is None:
Expand Down Expand Up @@ -118,6 +84,40 @@ def __getitem__(self, item):
)
)

@staticmethod
def find_mapping(
in_lang: Union[None, str] = None, out_lang: Union[None, str] = None
) -> "Mapping":
"""Given an input and an output language, find a mapping to get between them."""
if in_lang is None or out_lang is None:
raise exceptions.MappingMissing(in_lang, out_lang)
for mapping in MAPPINGS_AVAILABLE:
if mapping.in_lang == in_lang and mapping.out_lang == out_lang:
if mapping.type == "lexicon":
# do *not* deep copy this, because alignments are big!
return mapping.model_copy()
else:
return deepcopy(mapping)
raise exceptions.MappingMissing(in_lang, out_lang)

@staticmethod
def find_mapping_by_id(map_id: str) -> "Mapping":
"""Find the mapping with a given ID, i.e., the "id" found in the mapping, like in the "panphon_preprocessor" mapping."""
for mapping in MAPPINGS_AVAILABLE:
if mapping.id == map_id:
return deepcopy(mapping)
raise exceptions.MappingMissing(map_id, None)

@staticmethod
def load_mapping_from_path(path_to_mapping_config: Union[str, Path], index=0):
"""Loads a mapping from a path, if there is more than one mapping, then it loads based on the int
provided to the 'index' argument. Default is 0.
"""
mapping_config = MappingConfig.load_mapping_config_from_path(
path_to_mapping_config
)
return mapping_config.mappings[index]

@staticmethod
def _string_to_pua(string: str, offset: int) -> str:
"""Given an string of length n, and an offset m,
Expand Down
Binary file modified g2p/mappings/langs/langs.json.gz
Binary file not shown.
19 changes: 4 additions & 15 deletions g2p/mappings/langs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from networkx import DiGraph, write_gpickle
from networkx.algorithms.dag import ancestors, descendants

from g2p.exceptions import MalformedMapping
from g2p.log import LOGGER
from g2p.mappings import MAPPINGS_AVAILABLE, Mapping, MappingConfig
from g2p.mappings.langs import LANGS_DIR, LANGS_NETWORK, LANGS_NWORK_PATH, LANGS_PKL
Expand Down Expand Up @@ -164,20 +163,10 @@ def cache_langs(
mapping_config = MappingConfig.load_mapping_config_from_path(path)
# TODO: should put in some measure to prioritize non-generated
# mappings and warn when they override
for index, mapping in enumerate(mapping_config.mappings):
in_lang = mapping_config.mappings[index].in_lang
out_lang = mapping_config.mappings[index].out_lang
mappings_legal_pairs.append((in_lang, out_lang))

if not mapping.language_name:
raise MalformedMapping(
f"language_name missing in {path} from mapping "
f"from {in_lang} to {out_lang}"
)
mapping_config.mappings[index] = Mapping.load_mapping_from_path(path, index)
# Exclude the parent directory when caching
mapping_config.mappings[index].parent_dir = None
langs[code] = mapping_config.model_dump(exclude_none=True)
mappings_legal_pairs.extend(
[(mapping.in_lang, mapping.out_lang) for mapping in mapping_config.mappings]
)
langs[code] = mapping_config.export_to_dict()

# Save as a Directional Graph
lang_network = DiGraph()
Expand Down

0 comments on commit 5ccd595

Please sign in to comment.