Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor of synchronisation #211

Merged
merged 8 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/cicd_main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ jobs:
run: make test

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v3
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
uses: codecov/codecov-action@v4.2.0
with:
token: ${{ secrets.CODECOV_TOKEN }}


87 changes: 63 additions & 24 deletions databallpy/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@
from databallpy.events import DribbleEvent, PassEvent, ShotEvent
from databallpy.utils.errors import DataBallPyError
from databallpy.utils.logging import create_logger
from databallpy.utils.match_utils import (
player_column_id_to_full_name,
player_id_to_column_id,
)
from databallpy.utils.synchronise_tracking_and_event_data import (
align_event_data_datetime,
pre_compute_synchronisation_variables,
synchronise_tracking_and_event_data,
)
from databallpy.utils.utils import MISSING_INT, get_next_possession_frame
Expand Down Expand Up @@ -196,15 +202,9 @@ def player_column_id_to_full_name(self, column_id: str) -> str:
Returns:
str: full name of the player
"""
shirt_num = int(column_id.split("_")[1])
if column_id[:4] == "home":
return self.home_players.loc[
self.home_players["shirt_num"] == shirt_num, "full_name"
].iloc[0]
else:
return self.away_players.loc[
self.away_players["shirt_num"] == shirt_num, "full_name"
].iloc[0]
return player_column_id_to_full_name(
self.home_players, self.away_players, column_id
)

@property
def preprocessing_status(self):
Expand All @@ -219,17 +219,11 @@ def player_id_to_column_id(self, player_id: int) -> str:
Returns:
str: column id of the player, for instance "home_1"
"""
if (self.home_players["id"].eq(player_id)).any():
num = self.home_players[self.home_players["id"] == player_id][
"shirt_num"
].iloc[0]
return f"home_{num}"
elif (self.away_players["id"].eq(player_id)).any():
num = self.away_players[self.away_players["id"] == player_id][
"shirt_num"
].iloc[0]
return f"away_{num}"
else:
try:
return player_id_to_column_id(
self.home_players, self.away_players, player_id
)
except ValueError:
LOGGER.error(
f"Player_id {player_id} is not in either one of the teams, could not "
"obtain column id of player in match.player_id_to_column_id()."
Expand Down Expand Up @@ -725,20 +719,65 @@ def synchronise_tracking_and_event_data(
'pass', 'shot', and 'dribble'

"""
LOGGER.info("Trying to synchronise tracking and event data.")
LOGGER.info(f"Trying to synchronise tracking and event data of {self.name}.")
if not self.allow_synchronise_tracking_and_event_data:
message = (
"Synchronising tracking and event data is not allowed. The quality "
"checks of the tracking data showed that the quality was poor."
)
LOGGER.error(message)
raise DataBallPyError(message)
synchronise_tracking_and_event_data(
self,

self.tracking_data = pre_compute_synchronisation_variables(
self.tracking_data, self.frame_rate, self.pitch_dimensions
)
# reduce standard error by aligning trakcing and event data on first event
changed_event_data = align_event_data_datetime(
self.event_data.copy(), self.tracking_data, offset=offset
)

tracking_info, event_info = synchronise_tracking_and_event_data(
tracking_data=self.tracking_data,
event_data=changed_event_data,
home_players=self.home_players,
away_players=self.away_players,
home_team_id=self.home_team_id,
n_batches=n_batches,
verbose=verbose,
offset=offset,
)
# update tracking and event data
self.tracking_data = pd.concat([self.tracking_data, tracking_info], axis=1)
self.event_data = pd.concat([self.event_data, event_info], axis=1)
self.tracking_data["databallpy_event"] = self.tracking_data[
"databallpy_event"
].replace({np.nan: None})
self.tracking_data["event_id"] = (
self.tracking_data["event_id"]
.infer_objects(copy=False)
.fillna(MISSING_INT)
.astype(np.int64)
)
self.tracking_data["sync_certainty"] = self.tracking_data[
"sync_certainty"
].infer_objects()
self.event_data["tracking_frame"] = (
self.event_data["tracking_frame"]
.infer_objects(copy=False)
.fillna(MISSING_INT)
.astype(np.int64)
)
self.event_data["sync_certainty"] = self.event_data[
"sync_certainty"
].infer_objects()

# remove columns that are not needed anymore (added for synchronisation)
self.tracking_data.drop(
["goal_angle_home_team", "goal_angle_away_team"],
axis=1,
inplace=True,
)

self._is_synchronised = True

def __eq__(self, other):
if isinstance(other, Match):
Expand Down
5 changes: 3 additions & 2 deletions databallpy/utils/anonymise_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def anonymise_match(
match: Match,
keys_df: pd.DataFrame,
base_time: Timestamp = pd.to_datetime("1980-1-1 15:00:00", utc=True),
) -> Match:
) -> tuple[Match, pd.DataFrame]:
"""Function to anonymise a match. The function will replace all player names with a
unique identifier as well as all teams. Furthermore, it will replace all player
jersey numbers with a counter from 1 to n_players in that team. Finally, it will
Expand All @@ -38,7 +38,8 @@ def anonymise_match(
Defaults to pd.to_datetime("1980-1-1 15:00:00", utc=True).

Returns:
Match: anonymised match, potentially updated keys dataframe
tuple[Match, pd.DataFrame]: tuple containing the match with anonymised players
and teams and the potentially updated keys dataframe.

Raises:
ValueError: if base_time is not a timezone aware timestamp
Expand Down
48 changes: 48 additions & 0 deletions databallpy/utils/match_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import pandas as pd


def player_column_id_to_full_name(
home_players: pd.DataFrame, away_players: pd.DataFrame, column_id: str
) -> str:
"""Simple function to get the full name of a player from the column id

Args:
home_players (pd.DataFrame): DataFrame containing all the home players
away_players (pd.DataFrame): DataFrame containing all the away players
column_id (str): the column id of a player, for instance "home_1"

Returns:
str: full name of the player
"""
shirt_num = int(column_id.split("_")[1])
if column_id[:4] == "home":
return home_players.loc[
home_players["shirt_num"] == shirt_num, "full_name"
].iloc[0]
else:
return away_players.loc[
away_players["shirt_num"] == shirt_num, "full_name"
].iloc[0]


def player_id_to_column_id(
home_players: pd.DataFrame, away_players: pd.DataFrame, player_id: int
) -> str:
"""Simple function to get the column id based on player id

Args:
home_players (pd.DataFrame): DataFrame containing all the home players
away_players (pd.DataFrame): DataFrame containing all the away players
player_id (int): id of the player

Returns:
str: column id of the player, for instance "home_1"
"""
if (home_players["id"].eq(player_id)).any():
num = home_players[home_players["id"] == player_id]["shirt_num"].iloc[0]
return f"home_{num}"
elif (away_players["id"].eq(player_id)).any():
num = away_players[away_players["id"] == player_id]["shirt_num"].iloc[0]
return f"away_{num}"
else:
raise ValueError(f"{player_id} is not in either one of the teams")
Loading
Loading