Skip to content

Commit

Permalink
Réparation du flow recompute_controls_segments (#2535)
Browse files Browse the repository at this point in the history
## Linked issues

- Resolve #2534
  • Loading branch information
VincentAntoine authored Sep 21, 2023
2 parents bb830fb + 2d98a07 commit 5d9e8f0
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 11 deletions.
10 changes: 5 additions & 5 deletions datascience/src/pipeline/flows/recompute_controls_segments.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Tuple
from typing import List

import pandas as pd
import prefect
Expand All @@ -18,7 +18,7 @@


@task(checkpoint=False)
def extract_controls_catches(year: int, control_types: Tuple[str]) -> pd.DataFrame:
def extract_controls_catches(year: int, control_types: List[str]) -> pd.DataFrame:
"""
Extracts controls data from the specified year.
Expand All @@ -37,10 +37,10 @@ def extract_controls_catches(year: int, control_types: Tuple[str]) -> pd.DataFra
raise ValueError(f"year must be of type int, got {type(year)}")

try:
assert isinstance(control_types, tuple)
assert isinstance(control_types, List)
except AssertionError:
raise ValueError(
f"control_types must be of type tuple, got {type(control_types)}"
f"control_types must be of type list, got {type(control_types)}"
)

for control_type in control_types:
Expand All @@ -54,7 +54,7 @@ def extract_controls_catches(year: int, control_types: Tuple[str]) -> pd.DataFra
query_filepath="monitorfish/controls_catches.sql",
parse_dates=parse_dates,
dtypes=dtypes,
params={"year": year, "control_types": control_types},
params={"year": year, "control_types": tuple(control_types)},
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,21 +86,32 @@ def test_extract_controls_catches(reset_test_data):
)
)
controls_catches = extract_controls_catches.run(
year=2022, control_types=("LAND_CONTROL", "SEA_CONTROL")
year=2022, control_types=["LAND_CONTROL", "SEA_CONTROL"]
)
assert len(controls_catches) == 73
assert controls_catches.id.nunique() == 25
controls_catches = extract_controls_catches.run(
year=2022, control_types=("SEA_CONTROL",)
year=2022, control_types=["SEA_CONTROL"]
)
assert len(controls_catches) == 18
assert controls_catches.id.nunique() == 12
controls_catches = extract_controls_catches.run(
year=2022, control_types=("LAND_CONTROL",)
year=2022, control_types=["LAND_CONTROL"]
)
assert len(controls_catches) == 55
assert controls_catches.id.nunique() == 13

with pytest.raises(ValueError):
extract_controls_catches.run(
year=2022, control_types=["UNKNWOWN_CONTROL_TYPEZZZ"]
)

with pytest.raises(ValueError):
extract_controls_catches.run(year="2022", control_types=["LAND_CONTROL"])

with pytest.raises(ValueError):
extract_controls_catches.run(year=2022, control_types="THIS_SHOULD_BE_A_LIST")


def test_compute_controls_segments():
segments = pd.DataFrame(
Expand Down Expand Up @@ -179,13 +190,13 @@ def test_recompute_controls_segments_flow(reset_test_data, updated_controls_segm
initial_controls_segments = read_query(query, db="monitorfish_remote")

# Running the flow on a year without data should not update any row
state = flow.run(year=1950, control_types=("LAND_CONTROL", "SEA_CONTROL"))
state = flow.run(year=1950, control_types=["LAND_CONTROL", "SEA_CONTROL"])
assert state.is_successful()
controls_segments = read_query(query, db="monitorfish_remote")
pd.testing.assert_frame_equal(controls_segments, initial_controls_segments)

# Running the flow on land controls should update only land controls
state = flow.run(year=2022, control_types=("LAND_CONTROL",))
state = flow.run(year=2022, control_types=["LAND_CONTROL"])
assert state.is_successful()
controls_segments = read_query(query, db="monitorfish_remote")
pd.testing.assert_frame_equal(
Expand All @@ -203,7 +214,7 @@ def test_recompute_controls_segments_flow(reset_test_data, updated_controls_segm
)

# Running the flow on land and sea controls should update land and sea controls
state = flow.run(year=2022, control_types=("LAND_CONTROL", "SEA_CONTROL"))
state = flow.run(year=2022, control_types=["LAND_CONTROL", "SEA_CONTROL"])
assert state.is_successful()
controls_segments = read_query(query, db="monitorfish_remote")
pd.testing.assert_frame_equal(controls_segments, updated_controls_segments)

0 comments on commit 5d9e8f0

Please sign in to comment.