Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(aggregation-api)!: add new endpoint for economy/mc-all aggregation #2092

Merged
merged 19 commits into from
Aug 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
feb729f
feat(aggregation-api): add new endpoints for `economy/mc-all` aggrega…
mabw-rte Jul 15, 2024
5796401
test(aggregation-api): add tests for the new `economy/mc-all` endpoints
mabw-rte Jul 16, 2024
82439d6
feat(aggregation-api): separate `mc-ind\all` query files enums
mabw-rte Jul 31, 2024
7497699
feat(aggregation-api): add extra outputs to the `STA-mini` study
mabw-rte Jul 31, 2024
5db89ae
test(aggregation-api): update the links `mc-all` aggregation testing …
mabw-rte Jul 31, 2024
2d7919c
test(aggregation-api): update unittests following appending extra out…
mabw-rte Aug 1, 2024
3d9683e
feat(aggregation-api): add regex based column-filtering, remove dupli…
mabw-rte Aug 5, 2024
0668d91
test(aggregation-api): add unittests for regex based column-filtering
mabw-rte Aug 5, 2024
18ca98c
feat(aggregation-api): use columns regex only for `details-XXX`, remo…
mabw-rte Aug 6, 2024
ad7386e
test(aggregation-api): use columns regex only for `details-XXX`, remo…
mabw-rte Aug 6, 2024
10a0650
feat(aggregation-api): add `cluster` column for `details-XXX` query f…
mabw-rte Aug 8, 2024
c20b891
docs(aggregation-api): improve `columns_names` description, improve `…
mabw-rte Aug 20, 2024
6190c97
docs(aggregation-api): correct `query_file` description for `.../link…
mabw-rte Aug 20, 2024
252ebf6
feat(aggregation-api): optimize code
mabw-rte Aug 20, 2024
a20c3a5
feat(aggregation-api): raise `InvalidFieldForVersionError` for not ha…
mabw-rte Aug 20, 2024
ca3e923
refactor(aggregation-api): rename `normalize` param, optimize code
mabw-rte Aug 20, 2024
e28697e
refactor(aggregation-api): remove `query_file` type in attrib initial…
mabw-rte Aug 21, 2024
bc5baed
feat(aggregation-api): update following code review
mabw-rte Aug 22, 2024
8416433
test(aggregation-api): check that an Error `OutputSubFolderNotFound` …
mabw-rte Aug 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions antarest/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,20 @@ def __str__(self) -> str:
return self.detail


class OutputSubFolderNotFound(HTTPException):
"""
Exception raised when an output sub folders do not exist
"""

def __init__(self, output_id: str, mc_root: str) -> None:
message = f"The output '{output_id}' sub-folder '{mc_root}' does not exist"
super().__init__(HTTPStatus.NOT_FOUND, message)

def __str__(self) -> str:
"""Return a string representation of the exception."""
return self.detail


class BadZipBinary(HTTPException):
def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNSUPPORTED_MEDIA_TYPE, message)
Expand Down Expand Up @@ -446,6 +460,11 @@ def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)


class MCRootNotHandled(HTTPException):
def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)


class MatrixWidthMismatchError(HTTPException):
def __init__(self, message: str) -> None:
super().__init__(HTTPStatus.UNPROCESSABLE_ENTITY, message)
Expand Down
347 changes: 275 additions & 72 deletions antarest/study/business/aggregator_management.py

Large diffs are not rendered by default.

29 changes: 14 additions & 15 deletions antarest/study/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,13 @@
from antarest.matrixstore.matrix_editor import MatrixEditInstruction
from antarest.study.business.adequacy_patch_management import AdequacyPatchManager
from antarest.study.business.advanced_parameters_management import AdvancedParamsManager
from antarest.study.business.aggregator_management import AggregatorManager, AreasQueryFile, LinksQueryFile
from antarest.study.business.aggregator_management import (
AggregatorManager,
MCAllAreasQueryFile,
MCAllLinksQueryFile,
MCIndAreasQueryFile,
MCIndLinksQueryFile,
)
from antarest.study.business.allocation_management import AllocationManager
from antarest.study.business.area_management import AreaCreationDTO, AreaInfoDTO, AreaManager, AreaType, UpdateAreaUi
from antarest.study.business.areas.hydro_management import HydroManager
Expand Down Expand Up @@ -372,42 +378,35 @@ def aggregate_output_data(
self,
uuid: str,
output_id: str,
query_file: t.Union[AreasQueryFile, LinksQueryFile],
query_file: t.Union[MCIndAreasQueryFile, MCAllAreasQueryFile, MCIndLinksQueryFile, MCAllLinksQueryFile],
frequency: MatrixFrequency,
mc_years: t.Sequence[int],
columns_names: t.Sequence[str],
ids_to_consider: t.Sequence[str],
params: RequestParameters,
mc_years: t.Optional[t.Sequence[int]] = None,
) -> pd.DataFrame:
"""
Aggregates output data based on several filtering conditions

Args:
uuid: study uuid
output_id: simulation output ID
query_file: which types of data to retrieve ("values", "details", "details-st-storage", "details-res")
query_file: which types of data to retrieve: "values", "details", "details-st-storage", "details-res", "ids"
frequency: yearly, monthly, weekly, daily or hourly.
mc_years: list of monte-carlo years, if empty, all years are selected
columns_names: columns to be selected, if empty, all columns are selected
columns_names: regexes (if details) or columns to be selected, if empty, all columns are selected
ids_to_consider: list of areas or links ids to consider, if empty, all areas are selected
params: request parameters
mc_years: list of monte-carlo years, if empty, all years are selected (only for mc-ind)

Returns: the aggregated data as a DataFrame

"""
study = self.get_study(uuid)
assert_permission(params.user, study, StudyPermissionType.READ)
study_path = self.storage_service.raw_study_service.get_study_path(study)
# fmt: off
aggregator_manager = AggregatorManager(
study_path,
output_id,
query_file,
frequency,
mc_years,
columns_names,
ids_to_consider
study_path, output_id, query_file, frequency, ids_to_consider, columns_names, mc_years
)
# fmt: on
return aggregator_manager.aggregate_output_data()

def get_logs(
Expand Down
163 changes: 153 additions & 10 deletions antarest/study/web/raw_studies_blueprint.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,12 @@
from antarest.core.utils.utils import sanitize_string, sanitize_uuid
from antarest.core.utils.web import APITag
from antarest.login.auth import Auth
from antarest.study.business.aggregator_management import AreasQueryFile, LinksQueryFile
from antarest.study.business.aggregator_management import (
MCAllAreasQueryFile,
MCAllLinksQueryFile,
MCIndAreasQueryFile,
MCIndLinksQueryFile,
)
from antarest.study.service import StudyService
from antarest.study.storage.df_download import TableExportFormat, export_file
from antarest.study.storage.rawstudy.model.filesystem.matrix.matrix import MatrixFrequency
Expand Down Expand Up @@ -175,14 +180,14 @@ def get_study(
return Response(content=json_response, media_type="application/json")

@bp.get(
"/studies/{uuid}/areas/aggregate/{output_id}",
mabw-rte marked this conversation as resolved.
Show resolved Hide resolved
"/studies/{uuid}/areas/aggregate/mc-ind/{output_id}",
tags=[APITag.study_raw_data],
summary="Retrieve Aggregated Areas Raw Data from Study Output",
summary="Retrieve Aggregated Areas Raw Data from Study Economy MCs individual Outputs",
)
def aggregate_areas_raw_data(
uuid: str,
output_id: str,
query_file: AreasQueryFile,
query_file: MCIndAreasQueryFile,
frequency: MatrixFrequency,
mc_years: str = "",
areas_ids: str = "",
Expand All @@ -195,13 +200,14 @@ def aggregate_areas_raw_data(
Create an aggregation of areas raw data

Parameters:

- `uuid`: study ID
- `output_id`: the output ID aka the simulation ID
- `query_file`: "values", "details", "details-STstorage", "details-res"
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `mc_years`: which Monte Carlo years to be selected. If empty, all are selected (comma separated)
- `areas_ids`: which areas to be selected. If empty, all are selected (comma separated)
- `columns_names`: which columns to be selected. If empty, all are selected (comma separated)
- `columns_names`: names or regexes (if `query_file` is of type `details`) to select columns (comma separated)
- `export_format`: Returned file format (csv by default).

Returns:
Expand All @@ -223,10 +229,10 @@ def aggregate_areas_raw_data(
output_id=output_id,
query_file=query_file,
frequency=frequency,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(areas_ids),
params=parameters,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
)

download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
Expand All @@ -244,14 +250,14 @@ def aggregate_areas_raw_data(
)

@bp.get(
"/studies/{uuid}/links/aggregate/{output_id}",
"/studies/{uuid}/links/aggregate/mc-ind/{output_id}",
tags=[APITag.study_raw_data],
summary="Retrieve Aggregated Links Raw Data from Study Output",
summary="Retrieve Aggregated Links Raw Data from Study Economy MCs individual Outputs",
)
def aggregate_links_raw_data(
uuid: str,
output_id: str,
query_file: LinksQueryFile,
query_file: MCIndLinksQueryFile,
frequency: MatrixFrequency,
mc_years: str = "",
links_ids: str = "",
Expand All @@ -263,13 +269,14 @@ def aggregate_links_raw_data(
Create an aggregation of links raw data

Parameters:

- `uuid`: study ID
- `output_id`: the output ID aka the simulation ID
- `query_file`: "values" (currently the only available option)
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `mc_years`: which Monte Carlo years to be selected. If empty, all are selected (comma separated)
- `links_ids`: which links to be selected (ex: "be - fr"). If empty, all are selected (comma separated)
- `columns_names`: which columns to be selected. If empty, all are selected (comma separated)
- `columns_names`: names or regexes (if `query_file` is of type `details`) to select columns (comma separated)
- `export_format`: Returned file format (csv by default).

Returns:
Expand All @@ -291,7 +298,140 @@ def aggregate_links_raw_data(
output_id=output_id,
query_file=query_file,
frequency=frequency,
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(links_ids),
params=parameters,
mc_years=[int(mc_year) for mc_year in _split_comma_separated_values(mc_years)],
)

download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
download_log = f"Exporting aggregated output data for study '{uuid}' as {export_format} file"

return export_file(
df_matrix,
study_service.file_transfer_manager,
export_format,
True,
True,
download_name,
download_log,
current_user,
)

@bp.get(
"/studies/{uuid}/areas/aggregate/mc-all/{output_id}",
tags=[APITag.study_raw_data],
summary="Retrieve Aggregated Areas Raw Data from Study Economy MCs All Outputs",
)
def aggregate_areas_raw_data__all(
uuid: str,
output_id: str,
query_file: MCAllAreasQueryFile,
frequency: MatrixFrequency,
areas_ids: str = "",
columns_names: str = "",
export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore
current_user: JWTUser = Depends(auth.get_current_user),
) -> FileResponse:
# noinspection SpellCheckingInspection
"""
Create an aggregation of areas raw data in mc-all

Parameters:

- `uuid`: study ID
- `output_id`: the output ID aka the simulation ID
- `query_file`: "values", "details", "details-STstorage", "details-res", "id"
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `areas_ids`: which areas to be selected. If empty, all are selected (comma separated)
- `columns_names`: names or regexes (if `query_file` is of type `details`) to select columns (comma separated)
- `export_format`: Returned file format (csv by default).

Returns:
FileResponse that corresponds to a dataframe with the aggregated areas raw data
"""
logger.info(
f"Aggregating areas output data for study {uuid}, output {output_id},"
f"from files '{query_file}-{frequency}.txt'",
extra={"user": current_user.id},
)

# Avoid vulnerabilities by sanitizing the `uuid` and `output_id` parameters
uuid = sanitize_uuid(uuid)
output_id = sanitize_string(output_id)

parameters = RequestParameters(user=current_user)
df_matrix = study_service.aggregate_output_data(
uuid,
output_id=output_id,
query_file=query_file,
frequency=frequency,
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(areas_ids),
params=parameters,
)

download_name = f"aggregated_output_{uuid}_{output_id}{export_format.suffix}"
download_log = f"Exporting aggregated output data for study '{uuid}' as {export_format} file"

return export_file(
df_matrix,
study_service.file_transfer_manager,
export_format,
True,
True,
download_name,
download_log,
current_user,
)

@bp.get(
"/studies/{uuid}/links/aggregate/mc-all/{output_id}",
tags=[APITag.study_raw_data],
summary="Retrieve Aggregated Links Raw Data from Study Economy MC-All Outputs",
)
def aggregate_links_raw_data__all(
uuid: str,
output_id: str,
query_file: MCAllLinksQueryFile,
frequency: MatrixFrequency,
links_ids: str = "",
columns_names: str = "",
export_format: TableExportFormat = DEFAULT_EXPORT_FORMAT, # type: ignore
current_user: JWTUser = Depends(auth.get_current_user),
) -> FileResponse:
"""
Create an aggregation of links in mc-all

Parameters:

- `uuid`: study ID
- `output_id`: the output ID aka the simulation ID
- `query_file`: "values", "id"
- `frequency`: "hourly", "daily", "weekly", "monthly", "annual"
- `links_ids`: which links to be selected (ex: "be - fr"). If empty, all are selected (comma separated)
- `columns_names`: names or regexes (if `query_file` is of type `details`) to select columns (comma separated)
- `export_format`: Returned file format (csv by default).

Returns:
FileResponse that corresponds to a dataframe with the aggregated links raw data
"""
logger.info(
f"Aggregating links mc-all data for study {uuid}, output {output_id},"
f"from files '{query_file}-{frequency}.txt'",
extra={"user": current_user.id},
)

# Avoid vulnerabilities by sanitizing the `uuid` and `output_id` parameters
uuid = sanitize_uuid(uuid)
output_id = sanitize_string(output_id)

parameters = RequestParameters(user=current_user)
df_matrix = study_service.aggregate_output_data(
uuid,
output_id=output_id,
query_file=query_file,
frequency=frequency,
columns_names=_split_comma_separated_values(columns_names),
ids_to_consider=_split_comma_separated_values(links_ids),
params=parameters,
Expand Down Expand Up @@ -329,6 +469,7 @@ def edit_study(
> NOTE: use the PUT endpoint to upload a file.

Parameters:

- `uuid`: The UUID of the study.
- `path`: The path to the data to update. Defaults to "/".
- `data`: The formatted data to be posted. Defaults to an empty string.
Expand Down Expand Up @@ -362,6 +503,7 @@ def replace_study_file(
Update raw data for a study by posting a raw file.

Parameters:

- `uuid`: The UUID of the study.
- `path`: The path to the data to update. Defaults to "/".
- `file`: The raw file to be posted (e.g. a CSV file opened in binary mode).
Expand Down Expand Up @@ -425,6 +567,7 @@ def get_matrix(
Download a matrix in a given format.

Parameters:

- `uuid`: study ID
- `matrix_path`: Relative path of the matrix to download.
- `export_format`: Returned file format (csv by default).
Expand Down
Binary file modified tests/integration/assets/STA-mini.7z
Binary file not shown.
Binary file modified tests/integration/assets/STA-mini.zip
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,8 @@ def test_lifecycle(
{
"path": str(ext_workspace_path / "STA-mini"),
"file_type": "directory",
"file_count": IntegerRange(900, 1000), # 918
"size_bytes": IntegerRange(7_000_000, 9_000_000), # nt: 7_741_619, posix: 8_597_683
"file_count": IntegerRange(1000, 1100), # 1043
"size_bytes": IntegerRange(9_000_000, 11_000_000), # 10_428_620
"created": AnyIsoDateTime(),
"accessed": AnyIsoDateTime(),
"modified": AnyIsoDateTime(),
Expand Down Expand Up @@ -415,7 +415,7 @@ def test_size_of_studies(
sizes.append(actual[0]["size_bytes"])

# Check the sizes
# The size of the new study should be between 140 and 300 KB.
# The suze of 'STA-mini' should be between 7 and 9 MB.
# The size of the new study should be between 140 and 350 KB.
# The suze of 'STA-mini' should be between 9 and 11 MB.
sizes.sort()
assert sizes == [IntegerRange(140_000, 300_000), IntegerRange(7_000_000, 9_000_000)]
assert sizes == [IntegerRange(140_000, 350_000), IntegerRange(9_000_000, 11_000_000)]
Loading
Loading