From 6d7e3727c3b71cce0af5fa92511d6a2ec5fda3c1 Mon Sep 17 00:00:00 2001 From: Mohamed Abdel Wedoud Date: Tue, 6 Aug 2024 21:32:32 +0200 Subject: [PATCH] feat(aggregation-api): use columns regex only for `details-XXX`, remove case sensitiveness --- .../study/business/aggregator_management.py | 27 ++++++++++++------- antarest/study/service.py | 12 +++------ antarest/study/web/raw_studies_blueprint.py | 20 +++----------- 3 files changed, 26 insertions(+), 33 deletions(-) diff --git a/antarest/study/business/aggregator_management.py b/antarest/study/business/aggregator_management.py index cafacef0a7..23f2a5fecc 100644 --- a/antarest/study/business/aggregator_management.py +++ b/antarest/study/business/aggregator_management.py @@ -80,7 +80,6 @@ def __init__( frequency: MatrixFrequency, ids_to_consider: t.Sequence[str], columns_names: t.Sequence[str], - columns_regexes: t.Sequence[str], mc_years: t.Optional[t.Sequence[int]] = None, ): self.study_path: Path = study_path @@ -91,7 +90,6 @@ def __init__( self.frequency: MatrixFrequency = frequency self.mc_years: t.Optional[t.Sequence[int]] = mc_years self.columns_names: t.Sequence[str] = columns_names - self.columns_regexes: t.Sequence[str] = columns_regexes self.ids_to_consider: t.Sequence[str] = ids_to_consider self.output_type = ( "areas" @@ -199,24 +197,35 @@ def _gather_all_files_to_consider__all(self) -> t.Sequence[Path]: ] return all_output_files - def columns_filtering(self, df: pd.DataFrame) -> pd.DataFrame: + def columns_filtering(self, df: pd.DataFrame, is_details: bool) -> pd.DataFrame: # columns filtering - if self.columns_names: - filtered_columns = [c for c in df.columns.tolist() if c in self.columns_names] - df = df.loc[:, filtered_columns] - if self.columns_regexes: - filtered_columns = [c for c in df.columns.tolist() if any(regex in c for regex in self.columns_regexes)] + lower_case_columns = [c.lower() for c in self.columns_names] + if lower_case_columns: + if is_details: + filtered_columns = [ + c for c in df.columns.tolist() if any(regex in c.lower() for regex in lower_case_columns) + ] + else: + filtered_columns = [c for c in df.columns.tolist() if c.lower() in lower_case_columns] df = df.loc[:, filtered_columns] return df def _build_dataframe(self, files: t.Sequence[Path], horizon: int) -> pd.DataFrame: + is_details = self.query_file in [ + MCIndAreasQueryFile.DETAILS, + MCAllAreasQueryFile.DETAILS, + MCIndAreasQueryFile.DETAILS_ST_STORAGE, + MCAllAreasQueryFile.DETAILS_ST_STORAGE, + MCIndAreasQueryFile.DETAILS_RES, + MCAllAreasQueryFile.DETAILS_RES, + ] final_df = pd.DataFrame() nb_files = len(files) for k, file_path in enumerate(files): df = self._parse_output_file(file_path) # columns filtering - df = self.columns_filtering(df) + df = self.columns_filtering(df, is_details) # if no columns, no need to continue list_of_df_columns = df.columns.tolist() diff --git a/antarest/study/service.py b/antarest/study/service.py index a4301cd9b9..9624eb9228 100644 --- a/antarest/study/service.py +++ b/antarest/study/service.py @@ -341,7 +341,6 @@ def aggregate_output_data( frequency: MatrixFrequency, mc_years: t.Sequence[int], columns_names: t.Sequence[str], - columns_regexes: t.Sequence[str], ids_to_consider: t.Sequence[str], params: RequestParameters, ) -> pd.DataFrame: @@ -353,8 +352,7 @@ def aggregate_output_data( query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res") frequency: yearly, monthly, weekly, daily or hourly. mc_years: list of monte-carlo years, if empty, all years are selected - columns_names: columns to be selected, if empty, all columns are selected - columns_regexes: columns to be selected using list of regexes, if empty, all columns are selected + columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected params: request parameters @@ -366,7 +364,7 @@ def aggregate_output_data( study_path = self.storage_service.raw_study_service.get_study_path(study) # fmt: off aggregator_manager = AggregatorManager(study_path, output_id, query_file, frequency, ids_to_consider, - columns_names, columns_regexes, mc_years) + columns_names, mc_years) # fmt: on return aggregator_manager.aggregate_output_data() @@ -377,7 +375,6 @@ def aggregate_output_data__all( query_file: t.Union[MCAllAreasQueryFile, MCAllLinksQueryFile], frequency: MatrixFrequency, columns_names: t.Sequence[str], - columns_regexes: t.Sequence[str], ids_to_consider: t.Sequence[str], params: RequestParameters, ) -> pd.DataFrame: @@ -388,8 +385,7 @@ def aggregate_output_data__all( output_id: simulation output ID query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res") frequency: yearly, monthly, weekly, daily or hourly. - columns_names: columns to be selected, if empty, all columns are selected - columns_regexes: columns to be selected using list of regexes, if empty, all columns are selected + columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected params: request parameters @@ -401,7 +397,7 @@ def aggregate_output_data__all( study_path = self.storage_service.raw_study_service.get_study_path(study) # fmt: off aggregator_manager = AggregatorManager(study_path, output_id, query_file, frequency, ids_to_consider, - columns_names, columns_regexes) + columns_names) # fmt: on return aggregator_manager.aggregate_output_data__all() diff --git a/antarest/study/web/raw_studies_blueprint.py b/antarest/study/web/raw_studies_blueprint.py index f36be17099..43bedebc01 100644 --- a/antarest/study/web/raw_studies_blueprint.py +++ b/antarest/study/web/raw_studies_blueprint.py @@ -192,7 +192,6 @@ def aggregate_areas_raw_data( mc_years: str = "", areas_ids: str = "", columns_names: str = "", - columns_regexes: str = "", export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore current_user: JWTUser = Depends(auth.get_current_user), ) -> FileResponse: @@ -207,8 +206,7 @@ def aggregate_areas_raw_data( - `frequency`: "hourly", "daily", "weekly", "monthly", "annual" - `mc_years`: which Monte Carlo years to be selected. If empty, all are selected (comma separated) - `areas_ids`: which areas to be selected. If empty, all are selected (comma separated) - - `columns_names`: which columns to be selected. If empty, all are selected (comma separated) - - `columns_regexes`: which columns to be selected using comma separated regexes. If empty, all are selected + - `columns_names`: regexes (if details) or columns to be selected, if empty, all columns are selected - `export_format`: Returned file format (csv by default). Returns: @@ -232,7 +230,6 @@ def aggregate_areas_raw_data( frequency=frequency, mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)], columns_names=_split_comma_separated_values(columns_names), - columns_regexes=_split_comma_separated_values(columns_regexes), ids_to_consider=_split_comma_separated_values(areas_ids), params=parameters, ) @@ -264,7 +261,6 @@ def aggregate_links_raw_data( mc_years: str = "", links_ids: str = "", columns_names: str = "", - columns_regexes: str = "", export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore current_user: JWTUser = Depends(auth.get_current_user), ) -> FileResponse: @@ -278,8 +274,7 @@ def aggregate_links_raw_data( - `frequency`: "hourly", "daily", "weekly", "monthly", "annual" - `mc_years`: which Monte Carlo years to be selected. If empty, all are selected (comma separated) - `links_ids`: which links to be selected (ex: "be - fr"). If empty, all are selected (comma separated) - - `columns_names`: which columns to be selected. If empty, all are selected (comma separated) - - `columns_regexes`: which columns to be selected using comma separated regexes. If empty, all are selected + - `columns_names`: regexes (if details) or columns to be selected, if empty, all columns are selected - `export_format`: Returned file format (csv by default). Returns: @@ -303,7 +298,6 @@ def aggregate_links_raw_data( frequency=frequency, mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)], columns_names=_split_comma_separated_values(columns_names), - columns_regexes=_split_comma_separated_values(columns_regexes), ids_to_consider=_split_comma_separated_values(links_ids), params=parameters, ) @@ -334,7 +328,6 @@ def aggregate_areas_raw_data__all( frequency: MatrixFrequency, areas_ids: str = "", columns_names: str = "", - columns_regexes: str = "", export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore current_user: JWTUser = Depends(auth.get_current_user), ) -> FileResponse: @@ -348,8 +341,7 @@ def aggregate_areas_raw_data__all( - `query_file`: "values", "details", "details-STstorage", "details-res" - `frequency`: "hourly", "daily", "weekly", "monthly", "annual" - `areas_ids`: which areas to be selected. If empty, all are selected (comma separated) - - `columns_names`: which columns to be selected. If empty, all are selected (comma separated) - - `columns_regexes`: which columns to be selected using comma separated regexes. If empty, all are selected + - `columns_names`: regexes (if details) or columns to be selected, if empty, all columns are selected - `export_format`: Returned file format (csv by default). Returns: @@ -372,7 +364,6 @@ def aggregate_areas_raw_data__all( query_file=query_file, frequency=frequency, columns_names=_split_comma_separated_values(columns_names), - columns_regexes=_split_comma_separated_values(columns_regexes), ids_to_consider=_split_comma_separated_values(areas_ids), params=parameters, ) @@ -403,7 +394,6 @@ def aggregate_links_raw_data__all( frequency: MatrixFrequency, links_ids: str = "", columns_names: str = "", - columns_regexes: str = "", export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore current_user: JWTUser = Depends(auth.get_current_user), ) -> FileResponse: @@ -416,8 +406,7 @@ def aggregate_links_raw_data__all( - `query_file`: "values" (currently the only available option) - `frequency`: "hourly", "daily", "weekly", "monthly", "annual" - `links_ids`: which links to be selected (ex: "be - fr"). If empty, all are selected (comma separated) - - `columns_names`: which columns to be selected. If empty, all are selected (comma separated) - - `columns_regexes`: which columns to be selected using comma separated regexes. If empty, all are selected + - `columns_names`: regexes (if details) or columns to be selected, if empty, all columns are selected - `export_format`: Returned file format (csv by default). Returns: @@ -440,7 +429,6 @@ def aggregate_links_raw_data__all( query_file=query_file, frequency=frequency, columns_names=_split_comma_separated_values(columns_names), - columns_regexes=_split_comma_separated_values(columns_regexes), ids_to_consider=_split_comma_separated_values(links_ids), params=parameters, )