Skip to content

Commit

Permalink
Added DataBallPy events to opta/tracab and metrica (#136)
Browse files Browse the repository at this point in the history
* Added  to all event data providers (#115)

* Changed outcome to represent databallpy_events (#121)

* Created base event class (#122)

* Created initial ShotEvent class with information from opta (#123)

* Added first shot class

* Added ShotEvent class with initial event information of shots from Opta

* with added new files

* Added requested changes

* linters

* Added tracking data features to shot events, dribble class, individual player poessions, and changed the synchronise cost function (#124)

* Added first shot class

* Added ShotEvent class with initial event information of shots from Opta

* with added new files

* Added requested changes

* linters

* Added tracking data features to shots

* Added calculations to obtain invdividual player posession

* Added dribble event and changed synchronise cost function

* Added doccstrings

* refacted angle

* linter

* Created pass class (#126)

* Changed base event class

* Added first shot class

* Added PassEvent class

* added tests

* fixed some bugs and add passes to match

---------

Co-authored-by: Daan <daaniogrob@gmail.com>

* I'm sorry this PR is too big (#133)

* I'm sorry this pr is too big

* add forgotten docstring

* linters

* Added databallpy_events to metrica (#134)

* Added databallpy_events to metrica

* added extra tests for metrica databallpy events

---------

Co-authored-by: Daan Grob <91313814+DaanGro@users.noreply.github.com>
Co-authored-by: Daan <daaniogrob@gmail.com>
  • Loading branch information
3 people authored Sep 20, 2023
1 parent fcc5899 commit 1384b41
Show file tree
Hide file tree
Showing 64 changed files with 8,246 additions and 936 deletions.
57 changes: 57 additions & 0 deletions databallpy/features/angle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import numpy as np


def get_smallest_angle(a_vec: np.ndarray, b_vec: np.ndarray, angle_format="radian"):
"""
Function to calculate the smallest angle between 2 2D vectors.
:param a: numpy array, first vector of shape (n, 2)
:param b: numpy array, second vector of shape (n, 2)
:param angle_format: str, how to return the angle {"degree", "radian"}
:returns: numpy array, the smallest angle of shape (n,)
"""

for input_list in [a_vec, b_vec]:
if not isinstance(input_list, list) and not isinstance(input_list, np.ndarray):
raise TypeError(f"Input must be a numpy array, not a {type(input_list)}")

a_vec = (
np.array(a_vec).astype("float64")
if isinstance(a_vec, list)
else a_vec.astype("float64")
)
b_vec = (
np.array(b_vec).astype("float64")
if isinstance(b_vec, list)
else b_vec.astype("float64")
)

if not a_vec.shape == b_vec.shape:
raise ValueError("a and b should have the same shape")
if angle_format not in ["degree", "radian"]:
raise ValueError(
f"input 'format' must be 'degree' or 'radian', not '{angle_format}'."
)

if len(a_vec.shape) == 1: # 1D array
a_vec = a_vec.reshape(1, -1)
b_vec = b_vec.reshape(1, -1)

if not a_vec.shape[1] == 2 or not b_vec.shape[1] == 2:
raise ValueError(
f"a and b should have shape (n, 2), not {a_vec.shape} and {b_vec.shape}"
)

angle_a = np.arctan2(a_vec[:, 1], a_vec[:, 0])
angle_b = np.arctan2(b_vec[:, 1], b_vec[:, 0])

smallest_angle_radians = np.min(
[np.abs(angle_a - angle_b), 2 * np.pi - np.abs(angle_a - angle_b)], axis=0
)
if len(smallest_angle_radians) == 1:
smallest_angle_radians = smallest_angle_radians[0]

if angle_format == "radian":
return smallest_angle_radians
else:
return np.rad2deg(smallest_angle_radians)
121 changes: 121 additions & 0 deletions databallpy/features/differentiate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
import numpy as np
import pandas as pd

from databallpy.utils.filters import filter_data
from databallpy.utils.utils import MISSING_INT


def get_velocity(
df: pd.DataFrame,
input_columns: list,
framerate: float,
filter_type: str = None,
window: int = 7,
poly_order: int = 2,
) -> pd.DataFrame:
"""Function that adds velocity columns based on the position columns
Args:
df (pd.DataFrame): tracking data
input_columns (list): columns for which velocity should be calculated
framerate (float): framerate of the tracking data
filter_type (str, optional): filter type to use. Defaults to None.
window (int, optional): window size for the filter. Defaults to 7.
poly_order (int, optional): polynomial order for the filter. Defaults to 2.
Returns:
pd.DataFrame: tracking data with the added velocity columns
"""

if filter_type not in ["moving_average", "savitzky_golay", None]:
raise ValueError(
"filter_type should be one of: 'moving_average', "
"'savitzky_golay', None, got: {filter_type}"
)

res_df = _differentiate(
df,
new_name="velocity",
metric="",
frame_rate=framerate,
filter_type=filter_type,
window=window,
poly_order=poly_order,
column_ids=input_columns,
)

return res_df


def _differentiate(
df: pd.DataFrame,
*,
new_name: str,
metric: str = "",
frame_rate: int = 25,
filter_type: str = "savitzky_golay",
window: int = 7,
max_val: int = MISSING_INT,
poly_order: int = 2,
column_ids: list = None,
):

"""
Function to differentiate the metric in x and y direction and update the df with the
differentiated values in x and y direction and the absolute magnitude.
:param df: pandas df with position data in x and y direction of players and ball
:param metric: str, over what metric to differentiate the value on, note that
f"{player}_{metrix}x" and y should exist.
:param new_name: str, name of the magnitude, first letter will be used for the x and
y dirctions: f"{player}_vx" and f"{player}_velocity" if new_name = "velocity"
:param Fs: int, sample frequency of the data
:param filter_type: str, which filter to use:
{"moving average", "savitzky_golay", None}
:param window: int, the window of the filter
:param max_val: float, max value of the differentiated value, for instance, a speed
of higher than 12 m/s is very unlikely.
:param polyorder: int, polynomial for the Savitzky-Golay filter
:returns: pandas df with added differentiated values
"""

to_skip = len(metric) + 2
if column_ids is None:
column_ids = [x[:-to_skip] for x in df.columns if f"_{metric}x" in x]

dt = 1.0 / frame_rate

res_dict = {}
for player in column_ids:
diff_x = df[player + f"_{metric}x"].diff() / dt
diff_y = df[player + f"_{metric}y"].diff() / dt

# remove outliers
raw_differentiated = np.linalg.norm([diff_x, diff_y], axis=0)
if max_val != MISSING_INT:
diff_x[(raw_differentiated > max_val) & (diff_x > max_val)] = max_val
diff_x[(raw_differentiated > max_val) & (diff_x < -max_val)] = -max_val
diff_y[(raw_differentiated > max_val) & (diff_y > max_val)] = max_val
diff_y[(raw_differentiated > max_val) & (diff_y < -max_val)] = -max_val

# smoothing the signal
if filter_type is not None:
diff_x = filter_data(
diff_x.values,
filter_type=filter_type,
window_length=window,
polyorder=poly_order,
)
diff_y = filter_data(
diff_y.values,
filter_type=filter_type,
window_length=window,
polyorder=poly_order,
)

res_dict[player + f"_{new_name[0]}x"] = np.array(diff_x)
res_dict[player + f"_{new_name[0]}y"] = np.array(diff_y)
res_dict[player + f"_{new_name}"] = np.linalg.norm([diff_x, diff_y], axis=0)

new_df = pd.concat([df, pd.DataFrame(res_dict)], axis=1)
return new_df
Loading

0 comments on commit 1384b41

Please sign in to comment.