forked from gulfofmaine/continuous_plankton_recorder
-
Notifications
You must be signed in to change notification settings - Fork 0
/
17_noaa_sahfos_eda.R
228 lines (173 loc) · 7.92 KB
/
17_noaa_sahfos_eda.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
#### NOAA / SAHFOS EDA ####
# About:
# This script steps through dataset comparisons between the NOAA and SAHFOS data sources
# differences in taxon groups are explored and resolved
# a combined dataset is then exported.
#### Packages ####
library(patchwork)
library(sf)
library(janitor)
library(tidyverse)
library(rnaturalearth)
#### Functions ####
source(here::here("R", "cpr_helper_funs.R"))
#### Load Data ####
#### 1. NOAA Consolidated ####
#Reduced NOAA dataset obtained from 15_NOAA_CPR_Key.R
source("R/15_NOAA_CPR_Cleanup.R")
# Loading targets for comparison
# library(targets)
# tar_load(noaa_taxa_resolved)
#### 2. SAHFOS Consolidated ####
#SAHFOS data is converted to number per meters cubed and combined across subsampling methods in:
# 16_SAHFOS_CPR_Cleanup.R
source("R/16_SAHFOS_CPR_Cleanup.R")
# # Loading targets for comparison
# tar_load(sahfos_zoo_100m)
####__####
#### Data Exploration ####
#### 1. Spatial Coverage ####
#devtools::install_github("https://github.com/ropensci/rnaturalearthhires") #need this to use ne_states()
#Shapefiles
northeast <- ne_states(country = "united states of america") %>% st_as_sf() %>% filter(region == "Northeast")
canada <- ne_states(country = "canada") %>% st_as_sf()
#Noaa spatial coverage
noaa_coverage <- noaa_zoo_2 %>%
mutate(`longitude (degrees)` = -1 * `longitude (degrees)`,
decade = floor_decade(year)) %>%
st_as_sf(coords = c("longitude (degrees)", "latitude (degrees)"), crs = 4326, remove = FALSE) %>%
ggplot() +
geom_sf(aes(color = decade)) +
geom_sf(data = northeast) +
geom_sf(data = canada) +
coord_sf(xlim = c(-71, -62.5), ylim = c(41,44.5)) +
theme(panel.border = element_rect(colour = "black", fill = NA),
axis.line = element_blank()) +
labs(caption = "NOAA CPR data")
#Sahfos data spatial coverage
sahfos_coverage <- sahfos_zoo %>%
mutate(year = factor(year)) %>%
st_as_sf(coords = c("longitude (degrees)", "latitude (degrees)"), crs = 4326, remove = FALSE) %>%
ggplot() +
geom_sf(aes(color = year)) +
geom_sf(data = northeast) +
geom_sf(data = canada) +
coord_sf(xlim = c(-71, -62.5), ylim = c(41,44.5)) +
theme(panel.border = element_rect(colour = "black", fill = NA),
axis.line = element_blank()) +
labs(caption = "SAHFOS CPR data")
#Plot them together
noaa_coverage / sahfos_coverage
#If we decide to clip the SAHFOS data we can use this:
noaa_zoo_2 %>%
mutate(`longitude (degrees)` = -1 * `longitude (degrees)`,
decade = floor_decade(year)) %>%
st_as_sf(coords = c("longitude (degrees)", "latitude (degrees)"), crs = 4326, remove = FALSE) %>%
st_bbox(noaa_zoo_2 %>% st_as_sf())
### 2. Abundance Units ####
#Catch differences
noaa_calanus <- noaa_zoo_2 %>%
group_by(year) %>%
dplyr::summarise(`calanus_v-vi` = sum(`calanus finmarchicus v-vi`, na.rm = T),
`calanus_i-iv` = sum(`calanus i-iv`, na.rm = T)) %>%
ggplot() +
geom_point(aes(year, `calanus_v-vi`, color = "calanus_v-vi")) +
geom_point(aes(year, `calanus_i-iv`, color = "calanus_i-iv")) +
scale_y_continuous(labels = scales::comma_format()) +
#xlim(c(2010,2018)) +
labs(caption = "source: NOAA",
y = "C. finmarchicus",
x = NULL)
sahfos_calanus <- sahfos_zoo %>%
group_by(year) %>%
dplyr::summarise(`calanus_v-vi` = sum(`calanus finmarchicus`, na.rm = T),
`calanus_i-iv` = sum(`calanus i-iv`, na.rm = T)) %>%
ggplot() +
geom_point(aes(year, `calanus_v-vi`, color = "calanus_v-vi")) +
geom_point(aes(year, `calanus_i-iv`, color = "calanus_i-iv")) +
scale_y_continuous(labels = scales::comma_format()) +
#xlim(c(2010,2018)) +
labs(caption = "source: SAHFOS",
y = "C. finmarchicus",
x = NULL)
noaa_calanus / sahfos_calanus
#### Taxon Mismatches ####
#Quick comparison test of which columns match and/or are missing from either
compare_df_cols(noaa_zoo_2, sahfos_zoo)
#Renaming of sahfos data to match the refined noaa list
sahfos_zoo_2 <- sahfos_zoo %>%
rename(
`acartia spp.` = `acartia spp. (unidentified)`,
`amphipoda spp.` = `amphipoda (unidentified)`,
`appendicularia spp.` = appendicularia,
`bivalvia spp.` = `bivalvia larvae`,
`calanus finmarchicus v-vi` = `calanus finmarchicus`,
`calanus i-iv` = `calanus i-iv`,
`calanus spp.` = `calanus v-vi unidentified`,
`centropages spp.` = `centropages spp. (unidentified)`,
`cumacea spp.` = cumacea,
`doliolidae spp.` = doliolidae,
`euchaeta spp.` = `euchaetidae (unidentified)`,
`euphausiacea spp.` = `euphausiacea total`,
`foraminifera spp.` = `foraminifera (total)`,
`gammaridea spp.` = gammaridea,
`gastropoda spp.` = `gastropoda (unidentified)`,
`gymnosomoata spp.` = `gymnosomata (unidentified)`,
`harpacticoida spp.` = `harpacticoida total traverse`,
`hyperiidea spp.` = `hyperiidea (total)`,
`ischnocalanus spp.` = ischnocalanus,
`lepas spp.` = `lepas nauplii`,
`metridia spp.` = `metridia spp. (v-vi) (unidentified)`,
`monstrilloida spp.` = monstrilloida,
`ostracoda spp.` = ostracoda,
`pleuromamma spp.` = `pleuromamma spp. (unidentified)`,
`polychaeta larva` = `polychaete larvae (unidentified)`,
`salpidae spp.` = `salpidae (total)`,
`siphonostomatoida spp.` = siphonostomatoida,
`thecosomata spp.` = `thecosomata (north atlantic)`,
`tintinnidae spp.` = `tintinnida total`) %>%
#This section is for when multiple columns need to be reduced to a single aggregate
mutate(
`candacia spp.` = `candacia i-iv` + `candacia spp. (unidentified)`,
`candacia i-iv` = NULL,
`candacia spp. (unidentified)` = NULL,
`copepoda spp.` = `copepod eggs` + `copepod nauplii`,
`copepod eggs` = NULL,
`copepod nauplii` = NULL,
`decapoda spp.` = `decapod megalopa` + `decapod zoea` + `decapoda larvae (total)`,
`decapod megalopa` = NULL,
`decapod zoea` = NULL,
`decapoda larvae (total)` = NULL,
`fish eggs` = `fish eggs (total)` + `fish eggs with oil globules` + `fish eggs without oil globules`,
`fish eggs (total)` = NULL,
`fish eggs with oil globules` = NULL,
`fish eggs without oil globules` = NULL,
`pseudocalanus spp.` = `pseudocalanus spp. adult atlantic` + `pseudocalanus spp. adult total`,
`pseudocalanus spp. adult atlantic` = NULL,
`pseudocalanus spp. adult total` = NULL,
`radiolaria spp.` = `radiolaria non-acantharian` + `radiolaria total`,
`radiolaria non-acantharian` = NULL,
`radiolaria total` = NULL,
sample_id = NULL
)
#### Compare Columns ####
compare_df_cols(noaa_zoo_2, sahfos_zoo_2)
####__####
#### Zooplankton Merge ####
# #Make sample id column present in sahfos data
# noaa_zoo_2 <- noaa_zoo_2 %>% mutate(sample_id = str_c(cruise, station, sep = "-"))
#Bind them
combined_set <- bind_rows(list("NOAA" = noaa_zoo_2, "SAHFOS" = sahfos_zoo_2), .id = "Data Source")
#Plot Calanus I-V
combined_set %>%
ggplot(aes(year, `calanus i-iv`, color = `Data Source`)) +
geom_point() +
labs(y = "Calanus I-IV (#/100 cubic meters)",
x = NULL) +
scale_y_continuous(labels = scales::comma_format()) +
theme_minimal() +
theme(axis.text = element_text(size = 12), legend.text = element_text(size = 12))
#### Export ####
write_csv(combined_set,
str_c(ccel_boxpath, "Data", "Gulf of Maine CPR", "2020_combined_data", "zooplankton_combined.csv", sep = "/"),
col_names = TRUE)