Skip to content

Commit

Permalink
feat(aggregation-api): use columns regex only for details-XXX, remo…
Browse files Browse the repository at this point in the history
…ve case sensitiveness
  • Loading branch information
mabw-rte committed Aug 6, 2024
1 parent 5891cbc commit 6d7e372
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 33 deletions.
27 changes: 18 additions & 9 deletions antarest/study/business/aggregator_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ def __init__(
frequency: MatrixFrequency,
ids_to_consider: t.Sequence[str],
columns_names: t.Sequence[str],
columns_regexes: t.Sequence[str],
mc_years: t.Optional[t.Sequence[int]] = None,
):
self.study_path: Path = study_path
Expand All @@ -91,7 +90,6 @@ def __init__(
self.frequency: MatrixFrequency = frequency
self.mc_years: t.Optional[t.Sequence[int]] = mc_years
self.columns_names: t.Sequence[str] = columns_names
self.columns_regexes: t.Sequence[str] = columns_regexes
self.ids_to_consider: t.Sequence[str] = ids_to_consider
self.output_type = (
"areas"
Expand Down Expand Up @@ -199,24 +197,35 @@ def _gather_all_files_to_consider__all(self) -> t.Sequence[Path]:
]
return all_output_files

def columns_filtering(self, df: pd.DataFrame) -> pd.DataFrame:
def columns_filtering(self, df: pd.DataFrame, is_details: bool) -> pd.DataFrame:
# columns filtering
if self.columns_names:
filtered_columns = [c for c in df.columns.tolist() if c in self.columns_names]
df = df.loc[:, filtered_columns]
if self.columns_regexes:
filtered_columns = [c for c in df.columns.tolist() if any(regex in c for regex in self.columns_regexes)]
lower_case_columns = [c.lower() for c in self.columns_names]
if lower_case_columns:
if is_details:
filtered_columns = [
c for c in df.columns.tolist() if any(regex in c.lower() for regex in lower_case_columns)
]
else:
filtered_columns = [c for c in df.columns.tolist() if c.lower() in lower_case_columns]
df = df.loc[:, filtered_columns]
return df

def _build_dataframe(self, files: t.Sequence[Path], horizon: int) -> pd.DataFrame:
is_details = self.query_file in [
MCIndAreasQueryFile.DETAILS,
MCAllAreasQueryFile.DETAILS,
MCIndAreasQueryFile.DETAILS_ST_STORAGE,
MCAllAreasQueryFile.DETAILS_ST_STORAGE,
MCIndAreasQueryFile.DETAILS_RES,
MCAllAreasQueryFile.DETAILS_RES,
]
final_df = pd.DataFrame()
nb_files = len(files)
for k, file_path in enumerate(files):
df = self._parse_output_file(file_path)

# columns filtering
df = self.columns_filtering(df)
df = self.columns_filtering(df, is_details)

# if no columns, no need to continue
list_of_df_columns = df.columns.tolist()
Expand Down
12 changes: 4 additions & 8 deletions antarest/study/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@ def aggregate_output_data(
frequency: MatrixFrequency,
mc_years: t.Sequence[int],
columns_names: t.Sequence[str],
columns_regexes: t.Sequence[str],
ids_to_consider: t.Sequence[str],
params: RequestParameters,
) -> pd.DataFrame:
Expand All @@ -353,8 +352,7 @@ def aggregate_output_data(
query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res")
frequency: yearly, monthly, weekly, daily or hourly.
mc_years: list of monte-carlo years, if empty, all years are selected
columns_names: columns to be selected, if empty, all columns are selected
columns_regexes: columns to be selected using list of regexes, if empty, all columns are selected
columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected
ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected
params: request parameters
Expand All @@ -366,7 +364,7 @@ def aggregate_output_data(
study_path = self.storage_service.raw_study_service.get_study_path(study)
# fmt: off
aggregator_manager = AggregatorManager(study_path, output_id, query_file, frequency, ids_to_consider,
columns_names, columns_regexes, mc_years)
columns_names, mc_years)
# fmt: on
return aggregator_manager.aggregate_output_data()

Expand All @@ -377,7 +375,6 @@ def aggregate_output_data__all(
query_file: t.Union[MCAllAreasQueryFile, MCAllLinksQueryFile],
frequency: MatrixFrequency,
columns_names: t.Sequence[str],
columns_regexes: t.Sequence[str],
ids_to_consider: t.Sequence[str],
params: RequestParameters,
) -> pd.DataFrame:
Expand All @@ -388,8 +385,7 @@ def aggregate_output_data__all(
output_id: simulation output ID
query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res")
frequency: yearly, monthly, weekly, daily or hourly.
columns_names: columns to be selected, if empty, all columns are selected
columns_regexes: columns to be selected using list of regexes, if empty, all columns are selected
columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected
ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected
params: request parameters
Expand All @@ -401,7 +397,7 @@ def aggregate_output_data__all(
study_path = self.storage_service.raw_study_service.get_study_path(study)
# fmt: off
aggregator_manager = AggregatorManager(study_path, output_id, query_file, frequency, ids_to_consider,
columns_names, columns_regexes)
columns_names)
# fmt: on
return aggregator_manager.aggregate_output_data__all()

Expand Down
20 changes: 4 additions & 16 deletions antarest/study/web/raw_studies_blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,6 @@ def aggregate_areas_raw_data(
mc_years: str = "",
areas_ids: str = "",
columns_names: str = "",
columns_regexes: str = "",
export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore
current_user: JWTUser = Depends(auth.get_current_user),
) -> FileResponse:
Expand All @@ -207,8 +206,7 @@ def aggregate_areas_raw_data(
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `mc_years`: which Monte Carlo years to be selected. If empty, all are selected (comma separated)
- `areas_ids`: which areas to be selected. If empty, all are selected (comma separated)
- `columns_names`: which columns to be selected. If empty, all are selected (comma separated)
- `columns_regexes`: which columns to be selected using comma separated regexes. If empty, all are selected
- `columns_names`: regexes (if details) or columns to be selected, if empty, all columns are selected
- `export_format`: Returned file format (csv by default).
Returns:
Expand All @@ -232,7 +230,6 @@ def aggregate_areas_raw_data(
frequency=frequency,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
columns_names=_split_comma_separated_values(columns_names),
columns_regexes=_split_comma_separated_values(columns_regexes),
ids_to_consider=_split_comma_separated_values(areas_ids),
params=parameters,
)
Expand Down Expand Up @@ -264,7 +261,6 @@ def aggregate_links_raw_data(
mc_years: str = "",
links_ids: str = "",
columns_names: str = "",
columns_regexes: str = "",
export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore
current_user: JWTUser = Depends(auth.get_current_user),
) -> FileResponse:
Expand All @@ -278,8 +274,7 @@ def aggregate_links_raw_data(
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `mc_years`: which Monte Carlo years to be selected. If empty, all are selected (comma separated)
- `links_ids`: which links to be selected (ex: "be - fr"). If empty, all are selected (comma separated)
- `columns_names`: which columns to be selected. If empty, all are selected (comma separated)
- `columns_regexes`: which columns to be selected using comma separated regexes. If empty, all are selected
- `columns_names`: regexes (if details) or columns to be selected, if empty, all columns are selected
- `export_format`: Returned file format (csv by default).
Returns:
Expand All @@ -303,7 +298,6 @@ def aggregate_links_raw_data(
frequency=frequency,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
columns_names=_split_comma_separated_values(columns_names),
columns_regexes=_split_comma_separated_values(columns_regexes),
ids_to_consider=_split_comma_separated_values(links_ids),
params=parameters,
)
Expand Down Expand Up @@ -334,7 +328,6 @@ def aggregate_areas_raw_data__all(
frequency: MatrixFrequency,
areas_ids: str = "",
columns_names: str = "",
columns_regexes: str = "",
export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore
current_user: JWTUser = Depends(auth.get_current_user),
) -> FileResponse:
Expand All @@ -348,8 +341,7 @@ def aggregate_areas_raw_data__all(
- `query_file`: "values", "details", "details-STstorage", "details-res"
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `areas_ids`: which areas to be selected. If empty, all are selected (comma separated)
- `columns_names`: which columns to be selected. If empty, all are selected (comma separated)
- `columns_regexes`: which columns to be selected using comma separated regexes. If empty, all are selected
- `columns_names`: regexes (if details) or columns to be selected, if empty, all columns are selected
- `export_format`: Returned file format (csv by default).
Returns:
Expand All @@ -372,7 +364,6 @@ def aggregate_areas_raw_data__all(
query_file=query_file,
frequency=frequency,
columns_names=_split_comma_separated_values(columns_names),
columns_regexes=_split_comma_separated_values(columns_regexes),
ids_to_consider=_split_comma_separated_values(areas_ids),
params=parameters,
)
Expand Down Expand Up @@ -403,7 +394,6 @@ def aggregate_links_raw_data__all(
frequency: MatrixFrequency,
links_ids: str = "",
columns_names: str = "",
columns_regexes: str = "",
export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore
current_user: JWTUser = Depends(auth.get_current_user),
) -> FileResponse:
Expand All @@ -416,8 +406,7 @@ def aggregate_links_raw_data__all(
- `query_file`: "values" (currently the only available option)
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `links_ids`: which links to be selected (ex: "be - fr"). If empty, all are selected (comma separated)
- `columns_names`: which columns to be selected. If empty, all are selected (comma separated)
- `columns_regexes`: which columns to be selected using comma separated regexes. If empty, all are selected
- `columns_names`: regexes (if details) or columns to be selected, if empty, all columns are selected
- `export_format`: Returned file format (csv by default).
Returns:
Expand All @@ -440,7 +429,6 @@ def aggregate_links_raw_data__all(
query_file=query_file,
frequency=frequency,
columns_names=_split_comma_separated_values(columns_names),
columns_regexes=_split_comma_separated_values(columns_regexes),
ids_to_consider=_split_comma_separated_values(links_ids),
params=parameters,
)
Expand Down

0 comments on commit 6d7e372

Please sign in to comment.