Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Réparation du flow recompute_controls_segments #2535

Merged
merged 1 commit into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions datascience/src/pipeline/flows/recompute_controls_segments.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Tuple
from typing import List

import pandas as pd
import prefect
Expand All @@ -18,7 +18,7 @@


@task(checkpoint=False)
def extract_controls_catches(year: int, control_types: Tuple[str]) -> pd.DataFrame:
def extract_controls_catches(year: int, control_types: List[str]) -> pd.DataFrame:
"""
Extracts controls data from the specified year.
Expand All @@ -37,10 +37,10 @@ def extract_controls_catches(year: int, control_types: Tuple[str]) -> pd.DataFra
raise ValueError(f"year must be of type int, got {type(year)}")

try:
assert isinstance(control_types, tuple)
assert isinstance(control_types, List)
except AssertionError:
raise ValueError(
f"control_types must be of type tuple, got {type(control_types)}"
f"control_types must be of type list, got {type(control_types)}"
)

for control_type in control_types:
Expand All @@ -54,7 +54,7 @@ def extract_controls_catches(year: int, control_types: Tuple[str]) -> pd.DataFra
query_filepath="monitorfish/controls_catches.sql",
parse_dates=parse_dates,
dtypes=dtypes,
params={"year": year, "control_types": control_types},
params={"year": year, "control_types": tuple(control_types)},
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,21 +86,32 @@ def test_extract_controls_catches(reset_test_data):
)
)
controls_catches = extract_controls_catches.run(
year=2022, control_types=("LAND_CONTROL", "SEA_CONTROL")
year=2022, control_types=["LAND_CONTROL", "SEA_CONTROL"]
)
assert len(controls_catches) == 73
assert controls_catches.id.nunique() == 25
controls_catches = extract_controls_catches.run(
year=2022, control_types=("SEA_CONTROL",)
year=2022, control_types=["SEA_CONTROL"]
)
assert len(controls_catches) == 18
assert controls_catches.id.nunique() == 12
controls_catches = extract_controls_catches.run(
year=2022, control_types=("LAND_CONTROL",)
year=2022, control_types=["LAND_CONTROL"]
)
assert len(controls_catches) == 55
assert controls_catches.id.nunique() == 13

with pytest.raises(ValueError):
extract_controls_catches.run(
year=2022, control_types=["UNKNWOWN_CONTROL_TYPEZZZ"]
)

with pytest.raises(ValueError):
extract_controls_catches.run(year="2022", control_types=["LAND_CONTROL"])

with pytest.raises(ValueError):
extract_controls_catches.run(year=2022, control_types="THIS_SHOULD_BE_A_LIST")


def test_compute_controls_segments():
segments = pd.DataFrame(
Expand Down Expand Up @@ -179,13 +190,13 @@ def test_recompute_controls_segments_flow(reset_test_data, updated_controls_segm
initial_controls_segments = read_query(query, db="monitorfish_remote")

# Running the flow on a year without data should not update any row
state = flow.run(year=1950, control_types=("LAND_CONTROL", "SEA_CONTROL"))
state = flow.run(year=1950, control_types=["LAND_CONTROL", "SEA_CONTROL"])
assert state.is_successful()
controls_segments = read_query(query, db="monitorfish_remote")
pd.testing.assert_frame_equal(controls_segments, initial_controls_segments)

# Running the flow on land controls should update only land controls
state = flow.run(year=2022, control_types=("LAND_CONTROL",))
state = flow.run(year=2022, control_types=["LAND_CONTROL"])
assert state.is_successful()
controls_segments = read_query(query, db="monitorfish_remote")
pd.testing.assert_frame_equal(
Expand All @@ -203,7 +214,7 @@ def test_recompute_controls_segments_flow(reset_test_data, updated_controls_segm
)

# Running the flow on land and sea controls should update land and sea controls
state = flow.run(year=2022, control_types=("LAND_CONTROL", "SEA_CONTROL"))
state = flow.run(year=2022, control_types=["LAND_CONTROL", "SEA_CONTROL"])
assert state.is_successful()
controls_segments = read_query(query, db="monitorfish_remote")
pd.testing.assert_frame_equal(controls_segments, updated_controls_segments)
Loading