diff --git a/src/access_nri_intake/catalog/translators.py b/src/access_nri_intake/catalog/translators.py index c048d0a..e547296 100644 --- a/src/access_nri_intake/catalog/translators.py +++ b/src/access_nri_intake/catalog/translators.py @@ -192,6 +192,31 @@ def _unique_values(series): return df[self.columns] # Preserve ordering + def set_dispatch( + self, core_colname: str, func: Callable, input_name: Optional[str] = None + ): + """ + Set a dispatch function for a column. Typically only required when either: + 1. `core_colname != input_name` + 2. A custom translation function (`func`) is required. + + Parameters + ---------- + core_colname: str + The core column name to translate to + input_name: str, optional + The name of the column in the source. If not provided, this defaults + to none, and no translation will occur + func: callable + The function to translate the column + """ + if core_colname not in ["model", "realm", "frequency", "variable"]: + raise ValueError( + "'flag' must be one of 'model', 'realm', 'frequency', 'variable'" + ) + self._dispatch[core_colname] = func + setattr(self._dispatch_keys, core_colname, input_name) + def _realm_translator(self) -> pd.Series: """ Return realm, fixing a few issues @@ -240,16 +265,21 @@ def __init__(self, source, columns): """ super().__init__(source, columns) - self._dispatch["model"] = self._model_translator - self._dispatch["realm"] = self._realm_translator - self._dispatch["frequency"] = self._frequency_translator - self._dispatch["variable"] = self._variable_translator - - self._dispatch_keys = _DispatchKeys( - model="source_id", - realm="realm", - frequency="frequency", - variable="variable_id", + self.set_dispatch( + input_name="source_id", core_colname="model", func=super()._model_translator + ) + self.set_dispatch( + input_name="realm", core_colname="realm", func=super()._realm_translator + ) + self.set_dispatch( + input_name="frequency", + core_colname="frequency", + func=super()._frequency_translator, + ) + self.set_dispatch( + input_name="variable_id", + core_colname="variable", + func=super()._variable_translator, ) @@ -271,16 +301,21 @@ def __init__(self, source, columns): """ super().__init__(source, columns) - self._dispatch["model"] = self._model_translator - self._dispatch["realm"] = self._realm_translator - self._dispatch["frequency"] = self._frequency_translator - self._dispatch["variable"] = self._variable_translator - - self._dispatch_keys = _DispatchKeys( - model="model", - realm="realm", - frequency="frequency", - variable="variable", + self.set_dispatch( + input_name="model", core_colname="model", func=super()._model_translator + ) + self.set_dispatch( + input_name="realm", core_colname="realm", func=super()._realm_translator + ) + self.set_dispatch( + input_name="frequency", + core_colname="frequency", + func=super()._frequency_translator, + ) + self.set_dispatch( + input_name="variable", + core_colname="variable", + func=super()._variable_translator, ) @@ -302,17 +337,11 @@ def __init__(self, source, columns): """ super().__init__(source, columns) - self._dispatch["variable"] = self._variable_translator - self._dispatch_keys = _DispatchKeys(variable="variable") - - def _realm_translator(self) -> pd.Series: - raise AttributeError( - f"{self.__class__.__name__}: 'realm' does not require translation" - ) - def _frequency_translator(self) -> pd.Series: - raise AttributeError( - f"{self.__class__.__name__}: 'data' does not require translation" + self.set_dispatch( + input_name="variable", + core_colname="variable", + func=super()._variable_translator, ) @@ -334,15 +363,21 @@ def __init__(self, source, columns): """ super().__init__(source, columns) - self._dispatch["model"] = self._model_translator - self._dispatch["realm"] = self._realm_translator - self._dispatch["frequency"] = self._frequency_translator - self._dispatch["variable"] = self._variable_translator - self._dispatch_keys = _DispatchKeys( - model="source_id", - realm="realm", - variable="variable_id", - frequency="freq", + self.set_dispatch( + input_name="source_id", core_colname="model", func=super()._model_translator + ) + self.set_dispatch( + input_name="realm", core_colname="realm", func=self._realm_translator + ) + self.set_dispatch( + input_name="freq", + core_colname="frequency", + func=super()._frequency_translator, + ) + self.set_dispatch( + input_name="variable_id", + core_colname="variable", + func=super()._variable_translator, ) def _realm_translator(self): @@ -370,16 +405,16 @@ def __init__(self, source, columns): """ super().__init__(source, columns) - self._dispatch["model"] = self._model_translator - self._dispatch["frequency"] = self._frequency_translator - self._dispatch["variable"] = self._variable_translator - self._dispatch["realm"] = self._variable_translator - - self._dispatch_keys = _DispatchKeys( - model="source_id", - frequency="frequency", - variable="variable_id", - realm="realm", + self.set_dispatch( + input_name="source_id", core_colname="model", func=super()._model_translator + ) + self.set_dispatch( + input_name="variable_id", + core_colname="variable", + func=super()._variable_translator, + ) + self.set_dispatch( + input_name="realm", core_colname="realm", func=self._realm_translator ) def _realm_translator(self): diff --git a/tests/test_translators.py b/tests/test_translators.py index aba1c88..29b422b 100644 --- a/tests/test_translators.py +++ b/tests/test_translators.py @@ -271,11 +271,7 @@ def test_BarpaTranslator(test_data, groupby, n_entries): @pytest.mark.parametrize( "groupby, n_entries", - [ - (None, 5), - (["variable"], 4), - (["frequency"], 2), - ], + [(None, 5), (["variable"], 4), (["frequency"], 2), (["realm"], 1)], ) def test_CordexTranslator(test_data, groupby, n_entries): """Test CORDEX datastore translator"""