Skip to content

Analysis of over 400,000 Manitoba master angler records

Notifications You must be signed in to change notification settings

cookm346/master_angler_analysis

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

1 Commit
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Manitoba master angler analysis

library(tidyverse)
library(lubridate)
library(scales)
library(tidytext)
ma_data <- read_csv("data/master_angler_data.csv")

ma_data <- ma_data %>%
  mutate(year = year(date),
         month = month(date, label = TRUE),
         address = str_to_title(address))

This analysis includes 414,635 Manitoba master angler records that I scraped from the official website.


How many trophy fish has the average master angler caught?

ma_counts <- ma_data %>% 
  group_by(first_name, last_name, address, inquirer_id) %>% 
  summarize(n_submissions = n()) %>%
  ungroup() 

ma_counts %>%
  filter(n_submissions < 25) %>%
  ggplot(aes(n_submissions)) + 
  geom_histogram(binwidth = 1) +
  labs(x = "Number of master angler submissions", 
       y = "Number of anglers",
       title = "Most master anglers have one submission",
       subtitle = "Note: Only anglers with 25 or less submissions are shown") +
  scale_x_continuous(label = comma_format()) +
  scale_y_continuous(label = comma_format())

ma_counts <- ma_counts %>%
  summarize(submissions_1 = mean(n_submissions == 1),
            submissions_5 = mean(n_submissions >= 5),
            submissions_10 = mean(n_submissions >= 10),
            submissions_100 = mean(n_submissions >= 100))

Over 50% (53.31%) of master anglers have only one trophy catch, 17.55% have 5 or more trophy fish, 6.4% have 10 or more trophy fish, and only 0.21% have more than 100!


How many trophy fish have been caught over time?

ma_data %>%
  filter(date >= "1960-01-01") %>%
  count(year = year(date)) %>%
  ggplot(aes(year, n)) + 
  geom_line() +
  scale_x_continuous(breaks = seq(1960, 2021, 5)) +
  labs(x = NULL,
       y = "Number of trophy fish caught",
       title = "The 2010s have seen the most trophy fish catches",
       subtitle = "Note: 2021 has incomplete data") +
  scale_y_continuous(label = comma_format())


How has the number of trophy fish caught changed over time (by species)?

ma_data %>%
  filter(species != "Kokanee") %>%
  filter(date >= "1960-01-01") %>%
  count(year = year(date), species) %>%
  ggplot(aes(year, n)) + 
  geom_line(size = 1.2) +
  scale_x_continuous(breaks = seq(1960, 2021, 20)) +
  labs(x = NULL,
       y = "Number of trophy fish caught",
       title = "The trends of master angler catches differs drasticaly between species",
       subtitle = "Note: 2021 has incomplete data") +
  facet_wrap(~species) +
  scale_y_continuous(label = comma_format())


What trophy fish species have been caught the most?

ma_data %>%
  count(species) %>% 
  mutate(species = fct_reorder(species, n)) %>%
  ggplot(aes(n, species)) +
  geom_col() +
  labs(x = "Number of trophy fish caught",
       y = "Species",
       title = "Northern Pike, Walleye, and Catfish are the most frequently caught records") +
  scale_x_continuous(label = comma_format())


What bodies of waters produce the most trophy fish?

ma_data %>%
  mutate(location = fct_lump(location, 50)) %>%
  filter(location != "Other") %>%
  count(location) %>%
  mutate(location = fct_reorder(location, n)) %>%
  ggplot(aes(n, location)) + 
  geom_col() +
  labs(x = "Number of trophy fish caught",
       y = NULL,
       title = "The Red River is by far the biggest master angler producer in Manitoba",
       subtitle = "Top 50 locations shown") +
  scale_x_continuous(labels = comma_format())


What species are the top trophy fish waters producing?

ma_data %>%
  mutate(location = fct_lump(location, 9)) %>%
  filter(location != "Other") %>%
  count(location, species) %>%
  group_by(location) %>%
  slice_max(n = 5, n) %>%
  ungroup() %>%
  mutate(species = reorder_within(species, n, location)) %>% 
  ggplot(aes(n, species)) + 
  geom_col() +
  facet_wrap(~location, scales = "free") +
  scale_y_reordered() +
  scale_x_continuous(labels = comma_format()) +
  labs(x = "Number of trophy fish caught", 
       y = "Top 5 species", 
       title = "The top 5 species for the biggest trophy fish waters in Manitoba")


Which waters produce the largest variety of trophy fish species?

ma_data %>%
  group_by(location) %>%
  summarize(n_unique_species = n_distinct(species)) %>%
  slice_max(n = 40, n_unique_species) %>%
  ungroup() %>%
  mutate(location = fct_reorder(location, n_unique_species)) %>%
  ggplot(aes(n_unique_species, location)) + 
  geom_col() +
  labs(x = "Number of unique species",
       y = NULL,
       title = "The Winnipeg River, Red River, and Lake Winnipeg have 20+ varieties of master angler species",
       subtitle = "Top 40 locations shown")


How do trophy fish species vary in size?

ma_data %>%
  filter(length_in > 0) %>%
  mutate(species = fct_reorder(species, length_in)) %>%
  ggplot(aes(length_in, species)) +
  geom_boxplot() +
  labs(x = "Length in inches",
       y = NULL,
       title = "There are a lot of differences in size within and between species")


How often are trophy fish released?

ma_data %>%
  count(released) %>%
  mutate(released = ifelse(released, "Released", "Not released")) %>%
  ggplot(aes(n, released)) +
  geom_col() +
  labs(x = "Count",
       y = NULL, 
       title = "Most trophy fish are released") +
  scale_x_continuous(label = comma_format())


How often are trophy fish released for each species?

ma_data %>%
  count(species, released)  %>%
  mutate(released = ifelse(released, "Released", "Not released")) %>%
  mutate(species = fct_reorder(species, n)) %>%
  ggplot(aes(n, species, fill = released)) +
  geom_col() +
  labs(x = "Number of trophy fish caught",
       y = NULL, 
       title = "Catch and release varies greatly between species") +
  scale_x_continuous(label = comma_format())


Which species are released the most/least?

ma_data %>%
  group_by(species) %>%
  summarize(release_prop = sum(released) / n()) %>%
  ungroup() %>%
  mutate(species = fct_reorder(species, release_prop)) %>%
  ggplot(aes(release_prop, species)) + 
  geom_col() +
  labs(x = "Percent of fish released",
       y = NULL,
       title = "Catch and release varies greatly between species") +
  scale_x_continuous(labels = percent_format())


How often are trophy fish catches photographed?

ma_data %>%
  count(has_photo) %>%
  mutate(has_photo = ifelse(has_photo, "Photographed", "Not photographed")) %>%
  ggplot(aes(n, has_photo)) +
  geom_col() +
  labs(x = "Number of trophy fish caught",
       y = NULL, 
       title = "Most trophy fish are camera shy") +
  scale_x_continuous(label = comma_format())


Whch trophy fish species are photographed the most/least?

ma_data %>%
  group_by(species) %>%
  summarize(photo_prop = sum(has_photo) / n()) %>%
  ungroup() %>%
  mutate(species = fct_reorder(species, photo_prop)) %>%
  ggplot(aes(photo_prop, species)) + 
  geom_col() +
  labs(x = "Percent of fish photographed",
       y = NULL,
       title = "Including a photo submission varies greatly betwen species") +
  scale_x_continuous(labels = percent_format())


How many trophy fish are caught be woman/men?

library(babynames)

names <- babynames %>% 
  filter(prop > 0.001) %>%
  distinct(name, sex) %>% 
  add_count(name) %>%
  mutate(sex = ifelse(n == 1, sex, "Androgynous")) %>% 
  distinct(name, sex) %>%
  rename(first_name = name)

ma_data <- ma_data %>%
  left_join(names) %>% 
  mutate(sex = ifelse(is.na(sex) | sex == "Androgynous", "Unknown", sex)) %>%
  mutate(sex = fct_recode(sex, "Male" = "M", "Female" = "F"))

ma_data %>%
  count(sex) %>%
  mutate(prop = n / sum(n)) %>%
  mutate(sex = fct_reorder(sex, prop)) %>%
  mutate(sex = fct_relevel(sex, "Unknown")) %>%
  ggplot(aes(prop, sex)) + 
  geom_col() +
  labs(x = "Percent of master angler submissions",
       y = NULL,
       title = "Most master anglers are men") +
  scale_x_continuous(labels = percent_format())


How has the number of female anglers changed over time?

ma_data %>%
  filter(sex %in% c("Male", "Female")) %>%
  group_by(year = year(date)) %>%
  summarize(prop_female = sum(sex == "Female") / n()) %>%
  ungroup() %>% 
  ggplot(aes(year, prop_female)) + 
  geom_line() +
  scale_y_continuous(label = percent_format()) +
  scale_x_continuous(breaks = seq(1960, 2021, 10)) +
  labs(x = NULL,
       y = "Percent of female master anglers",
       title = "The number of female master anglers has increased over time") +
  expand_limits(y = .15)


What months are most trophy fish caught?

ma_data %>%
  mutate(month = month(date, label = TRUE)) %>% 
  count(month) %>%
  ggplot(aes(month, n)) + 
  geom_col() +
  labs(x = NULL, 
       y = "Number of trophy fish caught",
       title = "Most master anglers are caught in June") +
  scale_y_continuous(label = comma_format())


What days of the week are most trophy fish caught?

ma_data %>%
  mutate(weekday = wday(date, label = TRUE)) %>% 
  count(weekday) %>%
  ggplot(aes(weekday, n)) + 
  geom_col() +
  labs(x = NULL, 
       y = "Number of trophy fish caught",
       title = "Most master anglers are caught in on the weekends") +
  scale_y_continuous(label = comma_format())


What days of the week are most trophy fish caught?

ma_data %>%
  mutate(month = month(date, label = TRUE)) %>% 
  mutate(weekday = wday(date, label = TRUE)) %>% 
  count(month, weekday) %>%
  ggplot(aes(weekday, n)) + 
  geom_col() +
  labs(x = NULL, 
       y = "Number of trophy fish caught",
       title = "More master anglers are caught on the weekend",
       subtitle = "But this pattern is weaker during July and August") +
  scale_y_continuous(label = comma_format()) +
  facet_wrap(~month)


What combination of lakes and months produce the most trophy fish?

ma_data %>% 
  mutate(month = month(date, label = TRUE)) %>% 
  mutate(season = case_when(
    month %in% c("Jan", "Feb", "Mar") ~ "Winter",
    month %in% c("Apr", "May") ~ "Spring",
    month %in% c("Jun", "Jul", "Aug") ~ "Summer",
    month %in% c("Sep", "Oct", "Nov", "dec") ~ "Winter"
  )) %>%
  count(location, season, species, sort = TRUE) %>% 
  mutate(label = paste(location, "in", season, "for", species)) %>%
  mutate(label = fct_reorder(label, n)) %>% 
  slice_max(n, n = 25) %>% 
  ggplot(aes(n, label)) + 
  geom_col() +
  labs(x = "Number of trophy fish caught",
       y = NULL,
       title = "Top locations/seasons/species") +
  scale_x_continuous(labels = comma_format(), breaks = seq(0, 40000, 5000))


Where are most anglers from?

ma_data %>% 
  mutate(local = ifelse(address == "Manitoba", "Manitoban", "Out of province")) %>%
  count(local) %>%
  mutate(local = fct_reorder(local, n)) %>%
  ggplot(aes(n, local)) + 
  geom_col() +
  labs(x = "Number of master angler submissions", 
       y = NULL,
       title = "Most anglers submitting master anglers are Manitobans") +
  scale_x_continuous(label = comma_format())


Where are most out-of-province anglers from?

ma_data %>% 
  filter(address != "Manitoba") %>%
  mutate(address = str_to_title(address)) %>%
  mutate(address = fct_lump(address, 40)) %>% 
  count(address) %>% 
  mutate(address = fct_reorder(address, n)) %>% 
  ggplot(aes(n, address)) + 
  geom_col() +
  labs(x = "Number of master angler submissions",
       y = NULL,
       title = "Most out of province anglers are from Minnestoa, Illinois, and Wisconsin",
       subtitle = "Plot shows top 40 locations and an Other category") +
  scale_x_continuous(labels = comma_format())


How many trophy fish are caught ice fishing?

ma_data %>%
  mutate(method = ifelse(ice_fishing, "Ice fishing", "Open water")) %>%
  count(method) %>%
  ggplot(aes(n, method)) +
  geom_col() +
  labs(x = "Number of master angler submissions",
       y = NULL, 
       title = "Most master anglers are caught in open water") +
  scale_x_continuous(label = comma_format())


Which trophy fish species are most often caught ice fishing?

ma_data %>%
  filter(ice_fishing) %>%
  count(species) %>%
  mutate(species = fct_reorder(species, n)) %>%
  ggplot(aes(n, species)) + 
  geom_col() +
  labs(x = "Number of trophy fish caught",
       y = NULL,
       title = "Most master anglers caught through the ice are walleye, cisco, and perch") +
  scale_x_continuous(label = comma_format())


What combination of lakes and months produce the most trophy fish ice fishing?

ma_data %>% 
  filter(ice_fishing) %>% 
  mutate(month = month(date, label = TRUE, abbr = FALSE)) %>% 
  count(location, month, species, sort = TRUE) %>% 
  mutate(label = paste(location, "in", month, "for", species)) %>%
  mutate(label = fct_reorder(label, n)) %>% 
  slice_max(n, n = 25) %>% 
  ggplot(aes(n, label)) + 
  geom_col() +
  labs(x = "Number of trophy fish caught",
       y = NULL,
       title = "Top locations/seasons/species for ice fishing") +
  scale_x_continuous(labels = comma_format())






About

Analysis of over 400,000 Manitoba master angler records

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages