Skip to content

Commit

Permalink
Merge pull request #26 from nflverse/clean_home_away
Browse files Browse the repository at this point in the history
clean_homeaway
  • Loading branch information
john-b-edwards authored Sep 20, 2024
2 parents 7ba76d6 + e0a38b6 commit cb68e6e
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 4 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "NFLData"
uuid = "38e18452-fdda-4cae-b91e-088906595f57"
authors = ["John Edwards"]
version = "1.0.1"
version = "1.1.1"

[deps]
Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
Expand Down
2 changes: 1 addition & 1 deletion docs/Manifest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

julia_version = "1.10.5"
manifest_format = "2.0"
project_hash = "f97fbff8b171474864bc6f12303a9122440eaed6"
project_hash = "cddd238c9b58118c59f6d32cb36945f8eb1002a3"

[[deps.ANSIColoredPrinters]]
git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
Expand Down
1 change: 1 addition & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
[deps]
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
NFLData = "38e18452-fdda-4cae-b91e-088906595f57"
2 changes: 1 addition & 1 deletion docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using Documenter, NFLData
using Documenter, NFLData, DataFrames

makedocs(
sitename="NFLData.jl",
Expand Down
4 changes: 4 additions & 0 deletions docs/src/helpers.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ Some helper functions are made available in `NFLData.jl` for use in loading and
clean_player_names(player_name::String; lowercase::Bool = false, convert_lastfirst::Bool = true, use_name_database::Bool = true, convert_to_ascii::Bool = true)
```

```@docs
clean_homeaway(dataframe::AbstractDataFrame;invert=missing)
```

```@docs
most_recent_season(roster::Bool = false)
```
Expand Down
1 change: 1 addition & 0 deletions src/NFLData.jl
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,6 @@ export get_current_week
export nflverse_game_id
export clean_team_abbrs
export clean_player_names
export clean_homeaway

end
53 changes: 52 additions & 1 deletion src/helpers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export compute_labor_day
export nflverse_game_id
export clean_team_abbrs
export clean_player_names
export clean_homeaway

function __init__()
global team_abbr_mapping = CSV.read(joinpath(artifact"data","team_abbr_mapping.csv"),DataFrame)
Expand Down Expand Up @@ -101,7 +102,6 @@ julia> clean_player_names("Gordon Jr., Melvin", convert_lastfirst = true)
"Melvin Gordon"
```
"""

function clean_player_names(player_name::String; lowercase::Bool = false, convert_lastfirst::Bool = true, use_name_database::Bool = true, convert_to_ascii::Bool = true)

player_name = strip(replace(player_name,r"\s+"=>" "))
Expand Down Expand Up @@ -158,4 +158,55 @@ function nflverse_game_id(season::Number,week::Number,away::String,home::String)
return ids
end

"""
clean_homeaway(dataframe::AbstractDataFrame;invert = missing)
Take a dataframe that is formatted with one record for a game between two teams and pivot it such that there exists two records per game, one for each team.
Columns should be formatted such that any columns for data belonging to the home team are prefixed or suffixed with "home_*" and "*_home", likewise for away teams.
Pass in a list of columns to `invert` to have these values multiplied by -1 before being returned to the new dataframe (such as margin of victory, which may be +7 for a home team and -7 for an away team in a given game).
"""
function clean_homeaway(dataframe::AbstractDataFrame;invert = missing)
home = deepcopy(dataframe)
away = deepcopy(dataframe)

rename!(home, replace.(names(home),r"^home_"=>"team_"))
rename!(home, replace.(names(home),r"^away_"=>"opponent_"))
rename!(home, replace.(names(home),r"_home$"=>""))
rename!(home, replace.(names(home),r"_away$"=>"_opponent"))
rename!(home, replace.(names(home),r"team_team"=>"team"))
rename!(home, replace.(names(home),r"opponent_team"=>"opponent"))

if "location" in names(home)
home.location = coalesce.(ifelse.(uppercase.(home.location) .== "NEUTRAL","neutral","home"),"home")
else
home.location .= "home"
end

rename!(away, replace.(names(away),r"^away_"=>"team_"))
rename!(away, replace.(names(away),r"^home_"=>"opponent_"))
rename!(away, replace.(names(away),r"_away$"=>""))
rename!(away, replace.(names(away),r"_home$"=>"_opponent"))
rename!(away, replace.(names(away),r"team_team"=>"team"))
rename!(away, replace.(names(away),r"opponent_team"=>"opponent"))

if "location" in names(away)
away.location = coalesce.(ifelse.(uppercase.(away.location) .== "NEUTRAL","neutral","away"),"away")
else
away.location .= "away"
end

if !ismissing(invert)
if !all(in.(invert, [names(away)]))
throw(DomainError(invert[.!in.(invert, [names(away)])],"Invalid cols passed to `invert`!"))
end
for col in invert
away[:,col] = -1 .* away[:,col]
end
end

return(vcat(home, away))
end

end

0 comments on commit cb68e6e

Please sign in to comment.