Skip to content

Commit

Permalink
Added to all event data providers (#115)
Browse files Browse the repository at this point in the history
  • Loading branch information
Alek050 authored Aug 17, 2023
1 parent aadda0b commit 084325d
Show file tree
Hide file tree
Showing 16 changed files with 473 additions and 138 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
def _normalize_playing_direction_events(
event_data: pd.DataFrame, home_team_id: int, away_team_id: int
) -> pd.DataFrame:
"""Function to normalize the playing direction based on shots, goals and misses so
"""Function to normalize the playing direction based on databallpy shots so
that the home team is always represented as playing from left to right and the
away team from right to left.
Expand All @@ -16,14 +16,15 @@ def _normalize_playing_direction_events(
Returns:
pd.DataFrame: normalized event data.
"""
events = ["shot", "goal", "miss"]

to_changes_cols = [xy for xy in event_data.columns if "_x" in xy or "_y" in xy]
for period in event_data["period_id"].unique():
temp_ed = event_data[event_data["period_id"] == period]

# home team
home = temp_ed.loc[
(temp_ed["team_id"] == home_team_id) & (temp_ed["event"].isin(events))
(temp_ed["team_id"] == home_team_id)
& (temp_ed["databallpy_event"] == "shot")
]
if home["start_x"].mean() < 0: # home players shoot on the left goal
event_data.loc[
Expand All @@ -32,7 +33,8 @@ def _normalize_playing_direction_events(

# away team
away = temp_ed.loc[
(temp_ed["team_id"] == away_team_id) & (temp_ed["event"].isin(events))
(temp_ed["team_id"] == away_team_id)
& (temp_ed["databallpy_event"] == "shot")
]
if away["start_x"].mean() > 0: # away players shoot on the right goal
event_data.loc[
Expand Down
101 changes: 66 additions & 35 deletions databallpy/load_data/event_data/instat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,30 @@
from databallpy.utils.tz_modification import utc_to_local_datetime
from databallpy.utils.utils import MISSING_INT

instat_databallpy_map = {
"Attacking pass accurate": "pass",
"Attacking pass inaccurate": "pass",
"Unsuccessful dribbling": "dribble",
"Successful dribbling": "dribble",
"Dribbling": "dribble",
"Inaccurate key pass": "pass",
"Crosses inaccurate": "pass",
"Blocked shot": "shot",
"Shots blocked": "shot",
"Wide shot": "shot",
"Accurate crossing from set piece with a shot": "pass",
"Shot on target": "shot",
"Crosses accurate": "pass",
"Pass into offside": "pass",
"Accurate key pass": "pass",
"Shot blocked by field player": "shot",
"Inaccurate set-piece cross": "pass",
"Accurate crossing from set piece": "pass",
"Key assist": "pass",
"Goal": "shot",
"Accurate crossing from set piece with a goal": "pass",
}


def load_instat_event_data(
event_data_loc: str, metadata_loc: str
Expand Down Expand Up @@ -186,27 +210,27 @@ def _load_event_data(event_data_loc: str, metadata: Metadata) -> pd.DataFrame:
Returns:
pd.DataFrame: event data of the match
"""
EVENT_AND_OUTCOME_INSTAT_EVENTS = {
"Attacking pass accurate": ["pass", 1],
"Accurate key pass": ["pass", 1],
"Attacking pass inaccurate": ["pass", 0],
"Inaccurate key pass": ["pass", 0],
"Pass into offside": ["pass", 0],
"Successful dribbling": ["dribbling", 1],
"Unsuccessful dribbling": ["dribbling", 0],
"Dribbling": ["dribbling", MISSING_INT],
"Crosses accurate": ["cross", 1],
"Accurate crossing from set piece with a shot": ["cross", 1],
"Accurate crossing from set piece": ["cross", 1],
"Accurate crossing from set piece with a goal": ["cross", 1],
"Crosses inaccurate": ["cross", 0],
"Cross interception": ["cross", 0],
"Inaccurate blocked cross": ["cross", 0],
"Inaccurate set-piece cross": ["cross", 0],
"Shot on target": ["shot", 1],
"Blocked shot": ["shot", 0],
"Wide shot": ["shot", 0],
}
# EVENT_AND_OUTCOME_INSTAT_EVENTS = {
# "Attacking pass accurate": ["pass", 1],
# "Accurate key pass": ["pass", 1],
# "Attacking pass inaccurate": ["pass", 0],
# "Inaccurate key pass": ["pass", 0],
# "Pass into offside": ["pass", 0],
# "Successful dribbling": ["dribbling", 1],
# "Unsuccessful dribbling": ["dribbling", 0],
# "Dribbling": ["dribbling", MISSING_INT],
# "Crosses accurate": ["cross", 1],
# "Accurate crossing from set piece with a shot": ["cross", 1],
# "Accurate crossing from set piece": ["cross", 1],
# "Accurate crossing from set piece with a goal": ["cross", 1],
# "Crosses inaccurate": ["cross", 0],
# "Cross interception": ["cross", 0],
# "Inaccurate blocked cross": ["cross", 0],
# "Inaccurate set-piece cross": ["cross", 0],
# "Shot on target": ["shot", 1],
# "Blocked shot": ["shot", 0],
# "Wide shot": ["shot", 0],
# }

with open(event_data_loc, "r") as f:
data = f.read()
Expand All @@ -216,7 +240,8 @@ def _load_event_data(event_data_loc: str, metadata: Metadata) -> pd.DataFrame:
result_dict = {
"event_id": [],
"type_id": [],
"event": [],
"instat_event": [],
"databallpy_event": [],
"period_id": [],
"minutes": [],
"seconds": [],
Expand All @@ -228,7 +253,6 @@ def _load_event_data(event_data_loc: str, metadata: Metadata) -> pd.DataFrame:
"end_x": [],
"end_y": [],
"datetime": [],
"instat_event": [],
}

start_time_period = {
Expand All @@ -247,15 +271,17 @@ def _load_event_data(event_data_loc: str, metadata: Metadata) -> pd.DataFrame:
if not event["action_id"].startswith(("16", "15")):
result_dict["event_id"].append(int(event["id"]))
result_dict["type_id"].append(int(event["action_id"]))
if str(event["action_name"]) in EVENT_AND_OUTCOME_INSTAT_EVENTS.keys():
event_name, outcome = EVENT_AND_OUTCOME_INSTAT_EVENTS[
str(event["action_name"])
]
result_dict["event"].append(event_name)
result_dict["outcome"].append(outcome)
else:
result_dict["event"].append(str(event["action_name"]))
result_dict["outcome"].append(np.nan)

# if str(event["action_name"]) in EVENT_AND_OUTCOME_INSTAT_EVENTS.keys():
# event_name, outcome = EVENT_AND_OUTCOME_INSTAT_EVENTS[
# str(event["action_name"])
# ]
# result_dict["event"].append(event_name)
# result_dict["outcome"].append(outcome)
# else:
result_dict["instat_event"].append(str(event["action_name"]))
result_dict["outcome"].append(np.nan)

result_dict["period_id"].append(int(event["half"]))
result_dict["minutes"].append(float(event["second"]) // 60)
result_dict["seconds"].append(float(event["second"]) % 60)
Expand Down Expand Up @@ -283,12 +309,17 @@ def _load_event_data(event_data_loc: str, metadata: Metadata) -> pd.DataFrame:
start_time_period[int(event["half"])]
+ dt.timedelta(milliseconds=float(event["second"]) * 1000)
)
result_dict["instat_event"].append(str(event["action_name"]))

result_dict["databallpy_event"] = [np.nan] * len(result_dict["event_id"])

event_data = pd.DataFrame(result_dict)
start_events = ["pass", "shot", "Goal"]
event_data["databallpy_event"] = event_data["instat_event"].map(
instat_databallpy_map
)

start_events = ["pass", "shot"]
x_start, y_start = (
event_data[event_data["event"].isin(start_events)]
event_data[event_data["databallpy_event"].isin(start_events)]
.reset_index()
.loc[0, ["start_x", "start_y"]]
)
Expand Down
55 changes: 52 additions & 3 deletions databallpy/load_data/event_data/metrica_event_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@
)
from databallpy.utils.utils import _to_float, _to_int

metrica_databallpy_map = {
"pass": "pass",
"carry": "dribble",
"shot": "shot",
}


def load_metrica_event_data(
event_data_loc: str, metadata_loc: str
Expand Down Expand Up @@ -78,6 +84,7 @@ def load_metrica_event_data(
dt.timedelta(milliseconds=(x - first_frame) / frame_rate * 1000)
for x in event_data["td_frame"]
]

# no idea about time zone since we have no real data, so just assume utc
event_data["datetime"] = [
pd.to_datetime(start_time, utc=True) + x for x in rel_timedelta
Expand Down Expand Up @@ -124,7 +131,8 @@ def _get_event_data(event_data_loc: Union[str, io.StringIO]) -> pd.DataFrame:
result_dict = {
"event_id": [],
"type_id": [],
"event": [],
"metrica_event": [],
"databallpy_event": [],
"period_id": [],
"minutes": [],
"seconds": [],
Expand All @@ -141,18 +149,57 @@ def _get_event_data(event_data_loc: Union[str, io.StringIO]) -> pd.DataFrame:
"td_frame": [],
}

check_outcome_last_event = False

in_posession_events = ["pass", "carry", "recovery", "shot"]
out_of_posession_events = ["fault received", "ball out", "ball lost"]

for event in events_dict["data"]:
result_dict["event_id"].append(event["index"])
result_dict["type_id"].append(event["type"]["id"])
event_name = event["type"]["name"].lower()
result_dict["event"].append(event_name)
result_dict["metrica_event"].append(event_name)
result_dict["period_id"].append(event["period"])
result_dict["minutes"].append(_to_int((event["start"]["time"] // 60)))
result_dict["seconds"].append(_to_float(event["start"]["time"] % 60))
result_dict["player_id"].append(_to_int(event["from"]["id"][1:]))
result_dict["player_name"].append(event["from"]["name"])

# set outcome for pass or carry events
if check_outcome_last_event:
if (
event_name in out_of_posession_events
and result_dict["team_id"][-1] == event["team"]["id"]
) or (
event_name in in_posession_events
and result_dict["team_id"][-1] != event["team"]["id"]
):

result_dict["outcome"][-1] = 0
else:
result_dict["outcome"][-1] = 1
check_outcome_last_event = False

# set outcome for shot events
if event_name == "shot":
if isinstance(event["subtypes"], list):
outcome = 0
for sub in event["subtypes"]:
if sub["name"] == "GOAL":
outcome = 1
break
else:
subtypes = event["subtypes"]
outcome = 1 if subtypes["name"] == "GOAL" else 0
result_dict["outcome"].append(outcome)
else:
result_dict["outcome"].append(np.nan)

# Check if outcome needs to be set based on next event
if event_name in ["pass", "carry"]:
check_outcome_last_event = True

result_dict["team_id"].append(event["team"]["id"])
result_dict["outcome"].append(np.nan)
result_dict["start_x"].append(_to_float(event["start"]["x"]))
result_dict["start_y"].append(_to_float(event["start"]["y"]))
if event["to"] is not None:
Expand All @@ -165,5 +212,7 @@ def _get_event_data(event_data_loc: Union[str, io.StringIO]) -> pd.DataFrame:
result_dict["end_y"].append(_to_float(event["end"]["y"]))
result_dict["td_frame"].append(event["start"]["frame"])

result_dict["databallpy_event"] = [np.nan] * len(result_dict["event_id"])
events = pd.DataFrame(result_dict)
events["databallpy_event"] = events["metrica_event"].map(metrica_databallpy_map)
return events
24 changes: 19 additions & 5 deletions databallpy/load_data/event_data/opta.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@
77: "player off pitch",
}

opta_to_databallpy_map = {
"pass": "pass",
"take on": "dribble",
"offside pass": "pass",
"miss": "shot",
"post": "shot",
"attempt saved": "shot",
"goal": "shot",
}


def load_opta_event_data(
f7_loc: str, f24_loc: str, pitch_dimensions: list = [106.0, 68.0]
Expand Down Expand Up @@ -323,14 +333,15 @@ def _load_event_data(f24_loc: str, country: str) -> pd.DataFrame:
pd.DataFrame: all events of the match in a pd dataframe
"""

file = open(f24_loc, "r")
lines = file.read()
with open(f24_loc, "r") as file:
lines = file.read()
soup = BeautifulSoup(lines, "xml")

result_dict = {
"event_id": [],
"type_id": [],
"event": [],
"opta_event": [],
"databallpy_event": [],
"period_id": [],
"minutes": [],
"seconds": [],
Expand Down Expand Up @@ -363,7 +374,7 @@ def _load_event_data(f24_loc: str, country: str) -> pd.DataFrame:
# Unknown event
event_name = None

result_dict["event"].append(event_name)
result_dict["opta_event"].append(event_name)
result_dict["period_id"].append(int(event.attrs["period_id"]))
result_dict["minutes"].append(int(event.attrs["min"]))
result_dict["seconds"].append(int(event.attrs["sec"]))
Expand All @@ -381,7 +392,10 @@ def _load_event_data(f24_loc: str, country: str) -> pd.DataFrame:
pd.to_datetime(event.attrs["timestamp"], utc=True)
)

file.close()
result_dict["databallpy_event"] = [np.nan] * len(result_dict["event_id"])
event_data = pd.DataFrame(result_dict)
event_data["databallpy_event"] = event_data["opta_event"].map(
opta_to_databallpy_map
)
event_data["datetime"] = utc_to_local_datetime(event_data["datetime"], country)
return event_data
2 changes: 1 addition & 1 deletion databallpy/match.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def check_inputs_match_object(match: Match):
if len(match.event_data) > 0:
for col in [
"event_id",
"event",
"databallpy_event",
"period_id",
"team_id",
"player_id",
Expand Down
Loading

0 comments on commit 084325d

Please sign in to comment.