From 8f6eb9abbf2d86f1a6bf502217ccea6be08426dc Mon Sep 17 00:00:00 2001 From: Robert Clark Date: Sat, 25 Apr 2020 14:02:49 -0500 Subject: [PATCH] Enable NCAAF Team to be directly accessible Instead of requiring users to go through the Teams class to get a specific team, the NCAAF modules now enable a specific team to be directly queried by using the Team class. This reduces computational complexity by removing the need to instantiate every team while also making it more intuitive for users. Signed-Off-By: Robert Clark --- docs/ncaaf.rst | 11 ++ sportsreference/ncaaf/ncaaf_utils.py | 91 ++++++++++++ sportsreference/ncaaf/teams.py | 134 ++++++++---------- tests/integration/roster/test_ncaaf_roster.py | 2 +- .../teams/test_ncaaf_integration.py | 9 +- 5 files changed, 168 insertions(+), 79 deletions(-) create mode 100644 sportsreference/ncaaf/ncaaf_utils.py diff --git a/docs/ncaaf.rst b/docs/ncaaf.rst index 796bf867..dde82fdc 100644 --- a/docs/ncaaf.rst +++ b/docs/ncaaf.rst @@ -247,6 +247,17 @@ number of pass yards, and much more. print(team.name) # Prints the team's name print(team.pass_yards) # Prints the team's total passing yards +A team can also be requested directly by calling the ``Team`` class which +returns a Team instance identical to the one in each element in the loop above. +To request a specific team, use the team's abbreviation while calling the Team +class. + +.. code-block:: python + + from sportsreference.ncaaf.teams import Team + + purdue = Team('PURDUE') + Each Team instance contains a link to the ``Schedule`` class which enables easy iteration over all games for a particular team. A Pandas DataFrame can also be queried to easily grab all stats for all games. diff --git a/sportsreference/ncaaf/ncaaf_utils.py b/sportsreference/ncaaf/ncaaf_utils.py new file mode 100644 index 00000000..48de9075 --- /dev/null +++ b/sportsreference/ncaaf/ncaaf_utils.py @@ -0,0 +1,91 @@ +from pyquery import PyQuery as pq +from sportsreference import utils +from .constants import (DEFENSIVE_STATS_URL, + OFFENSIVE_STATS_URL, + PARSING_SCHEME, + SEASON_PAGE_URL) + + +def _add_stats_data(teams_list, team_data_dict): + """ + Add a team's stats row to a dictionary. + + Pass table contents and a stats dictionary of all teams to accumulate all + stats for each team in a single variable. + + Parameters + ---------- + teams_list : generator + A generator of all row items in a given table. + team_data_dict : {str: {'data': str}} dictionary + A dictionary where every key is the team's abbreviation and every value + is another dictionary with a 'data' key which contains the string + version of the row data for the matched team. + + Returns + ------- + dictionary + An updated version of the team_data_dict with the passed table row + information included. + """ + if not teams_list: + return team_data_dict + for team_data in teams_list: + # Skip the sub-header rows + if 'class="over_header thead"' in str(team_data) or \ + 'class="thead"' in str(team_data): + continue + abbr = utils._parse_field(PARSING_SCHEME, team_data, 'abbreviation') + try: + team_data_dict[abbr]['data'] += team_data + except KeyError: + team_data_dict[abbr] = {'data': team_data} + return team_data_dict + + +def _retrieve_all_teams(year): + """ + Find and create Team instances for all teams in the given season. + + For a given season, parses the specified NCAAF stats table and finds + all requested stats. Each team then has a Team instance created which + includes all requested stats and a few identifiers, such as the team's + name and abbreviation. All of the individual Team instances are added + to a list. + + Note that this method is called directly once Teams is invoked and does + not need to be called manually. + + Parameters + ---------- + year : string + The requested year to pull stats from. + + Returns + ------- + tuple + Returns a ``tuple`` of the team_data_dict and year which represent all + stats for all teams, and the given year that should be used to pull + stats from, respectively. + """ + team_data_dict = {} + + if not year: + year = utils._find_year_for_season('ncaaf') + # If stats for the requested season do not exist yet (as is the case + # right before a new season begins), attempt to pull the previous + # year's stats. If it exists, use the previous year instead. + if not utils._url_exists(SEASON_PAGE_URL % year) and \ + utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)): + year = str(int(year) - 1) + doc = pq(SEASON_PAGE_URL % year) + teams_list = utils._get_stats_table(doc, 'div#div_standings') + offense_doc = pq(OFFENSIVE_STATS_URL % year) + offense_list = utils._get_stats_table(offense_doc, 'table#offense') + defense_doc = pq(DEFENSIVE_STATS_URL % year) + defense_list = utils._get_stats_table(defense_doc, 'table#defense') + if not teams_list and not offense_list and not defense_list: + utils._no_data_found() + for stats_list in [teams_list, offense_list, defense_list]: + team_data_dict = _add_stats_data(stats_list, team_data_dict) + return team_data_dict, year diff --git a/sportsreference/ncaaf/teams.py b/sportsreference/ncaaf/teams.py index 707cbe8a..348937af 100644 --- a/sportsreference/ncaaf/teams.py +++ b/sportsreference/ncaaf/teams.py @@ -1,11 +1,10 @@ import pandas as pd import re -from .constants import (PARSING_SCHEME, OFFENSIVE_STATS_URL, - DEFENSIVE_STATS_URL, SEASON_PAGE_URL) -from pyquery import PyQuery as pq +from .constants import PARSING_SCHEME from ..decorators import float_property_decorator, int_property_decorator from .. import utils from .conferences import Conferences +from .ncaaf_utils import _retrieve_all_teams from .roster import Roster from .schedule import Schedule @@ -18,9 +17,14 @@ class Team: and short names, and sets them as properties which can be directly read from for easy reference. + If calling directly, the team's abbreviation needs to be passed. Otherwise, + the Teams class will handle all arguments. + Parameters ---------- - team_data : string + team_name : string (optional) + The name of the team to pull if being called directly. + team_data : string (optional) A string containing all of the rows of stats for a given team. If multiple tables are being referenced, this will be comprised of multiple rows in a single string. @@ -29,7 +33,8 @@ class Team: year : string (optional) The requested year to pull stats from. """ - def __init__(self, team_data, team_conference=None, year=None): + def __init__(self, team_name=None, team_data=None, team_conference=None, + year=None): self._team_conference = team_conference self._year = year self._abbreviation = None @@ -88,8 +93,39 @@ def __init__(self, team_data, team_conference=None, year=None): self._opponents_penalties = None self._opponents_yards_from_penalties = None + if team_name: + team_data = self._retrieve_team_data(year, team_name) + conferences_dict = Conferences(year).team_conference + self._team_conference = conferences_dict[team_name.lower()] self._parse_team_data(team_data) + def _retrieve_team_data(self, year, team_name): + """ + Pull all stats for a specific team. + + By first retrieving a dictionary containing all information for all + teams in the league, only select the desired team for a specific year + and return only their relevant results. + + Parameters + ---------- + year : string + A ``string`` of the requested year to pull stats from. + team_name : string + A ``string`` of the team's abbreviation, such as 'PURDUE' for the + Purdue Boilermakers. + + Returns + ------- + PyQuery object + Returns a PyQuery object containing all stats and information for + the specified team. + """ + team_data_dict, year = _retrieve_all_teams(year) + self._year = year + team_data = team_data_dict[team_name]['data'] + return team_data + def _parse_team_data(self, team_data): """ Parses a value for every attribute. @@ -669,7 +705,8 @@ def __init__(self, year=None): self._teams = [] self._conferences_dict = Conferences(year, True).team_conference - self._retrieve_all_teams(year) + team_data_dict, year = _retrieve_all_teams(year) + self._instantiate_teams(team_data_dict, year) def __getitem__(self, abbreviation): """ @@ -731,89 +768,32 @@ def __len__(self): """Returns the number of NCAAF teams for a given season.""" return len(self.__repr__()) - def _add_stats_data(self, teams_list, team_data_dict): + def _instantiate_teams(self, team_data_dict, year): """ - Add a team's stats row to a dictionary. + Create a Team instance for all teams. - Pass table contents and a stats dictionary of all teams to accumulate - all stats for each team in a single variable. - - Parameters - ---------- - teams_list : generator - A generator of all row items in a given table. - team_data_dict : {str: {'data': str}} dictionary - A dictionary where every key is the team's abbreviation and every - value is another dictionary with a 'data' key which contains the - string version of the row data for the matched team. - - Returns - ------- - dictionary - An updated version of the team_data_dict with the passed table row - information included. - """ - for team_data in teams_list: - # Skip the sub-header rows - if 'class="over_header thead"' in str(team_data) or \ - 'class="thead"' in str(team_data): - continue - abbr = utils._parse_field(PARSING_SCHEME, - team_data, - 'abbreviation') - try: - team_data_dict[abbr]['data'] += team_data - except KeyError: - team_data_dict[abbr] = {'data': team_data} - return team_data_dict - - def _retrieve_all_teams(self, year): - """ - Find and create Team instances for all teams in the given season. - - For a given season, parses the specified NCAAF stats table and finds - all requested stats. Each team then has a Team instance created which - includes all requested stats and a few identifiers, such as the team's - name and abbreviation. All of the individual Team instances are added - to a list. - - Note that this method is called directly once Teams is invoked and does - not need to be called manually. + Once all team information has been pulled from the various webpages, + create a Team instance for each team and append it to a larger list of + team instances for later use. Parameters ---------- + team_data_dict : dictionary + A ``dictionary`` containing all stats information in HTML format as + well as team rankings, indexed by team abbreviation. year : string - The requested year to pull stats from. - """ - team_data_dict = {} - - if not year: - year = utils._find_year_for_season('ncaaf') - # If stats for the requested season do not exist yet (as is the - # case right before a new season begins), attempt to pull the - # previous year's stats. If it exists, use the previous year - # instead. - if not utils._url_exists(SEASON_PAGE_URL % year) and \ - utils._url_exists(SEASON_PAGE_URL % str(int(year) - 1)): - year = str(int(year) - 1) - doc = pq(SEASON_PAGE_URL % year) - teams_list = utils._get_stats_table(doc, 'div#div_standings') - offense_doc = pq(OFFENSIVE_STATS_URL % year) - offense_list = utils._get_stats_table(offense_doc, 'table#offense') - defense_doc = pq(DEFENSIVE_STATS_URL % year) - defense_list = utils._get_stats_table(defense_doc, 'table#defense') - if not teams_list and not offense_list and not defense_list: - utils._no_data_found() + A ``string`` of the requested year to pull stats from. + """ + if not team_data_dict: return - for stats_list in [teams_list, offense_list, defense_list]: - team_data_dict = self._add_stats_data(stats_list, team_data_dict) - for team_name, team_data in team_data_dict.items(): if team_name.lower() not in self._conferences_dict: conference = None else: conference = self._conferences_dict[team_name.lower()] - team = Team(team_data['data'], conference, year) + team = Team(team_data=team_data['data'], + team_conference=conference, + year=year) self._teams.append(team) @property diff --git a/tests/integration/roster/test_ncaaf_roster.py b/tests/integration/roster/test_ncaaf_roster.py index 0c363c22..d2171328 100644 --- a/tests/integration/roster/test_ncaaf_roster.py +++ b/tests/integration/roster/test_ncaaf_roster.py @@ -515,7 +515,7 @@ def test_roster_from_team_class(self, *args, **kwargs): flexmock(Team) \ .should_receive('_parse_team_data') \ .and_return(None) - team = Team(None, 1, '2018') + team = Team(team_data=None, team_conference=None, year='2018') mock_abbreviation = mock.PropertyMock(return_value='PURDUE') type(team)._abbreviation = mock_abbreviation diff --git a/tests/integration/teams/test_ncaaf_integration.py b/tests/integration/teams/test_ncaaf_integration.py index 957416dd..91e746dc 100644 --- a/tests/integration/teams/test_ncaaf_integration.py +++ b/tests/integration/teams/test_ncaaf_integration.py @@ -8,7 +8,7 @@ from sportsreference.ncaaf.constants import (OFFENSIVE_STATS_URL, DEFENSIVE_STATS_URL, SEASON_PAGE_URL) -from sportsreference.ncaaf.teams import Teams +from sportsreference.ncaaf.teams import Team, Teams MONTH = 9 @@ -358,6 +358,13 @@ def test_ncaaf_empty_page_returns_no_teams(self): assert len(teams) == 0 + @mock.patch('requests.get', side_effect=mock_pyquery) + def test_pulling_team_directly(self, *args, **kwargs): + purdue = Team('PURDUE') + + for attribute, value in self.results.items(): + assert getattr(purdue, attribute) == value + class TestNCAAFIntegrationInvalidYear: @mock.patch('requests.get', side_effect=mock_pyquery)