Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smol updates #330

Merged
merged 5 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 20 additions & 4 deletions R/Figures.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,12 @@ TADA_Boxplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) {
TADA_CheckType(.data, "data.frame", "Input object")

# ensure comparable data identifier is in the id_col vector
id_cols <- unique(c("TADA.ComparableDataIdentifier", id_cols))
if(is.null(id_cols)){
id_cols = "TADA.ComparableDataIdentifier"
}
if(!"TADA.ComparableDataIdentifier"%in%id_cols){
print("TADA.ComparableDataIdentifier not found in id_cols argument and is highly recommended: plotting without it may produce errors in the plot.")
}

# check .data has required columns
TADA_CheckColumns(.data, id_cols)
Expand Down Expand Up @@ -218,7 +223,12 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier"))
TADA_CheckType(.data, "data.frame", "Input object")

# ensure comparable data identifier is in the id_col vector
id_cols <- unique(c("TADA.ComparableDataIdentifier", id_cols))
if(is.null(id_cols)){
id_cols = "TADA.ComparableDataIdentifier"
}
if(!"TADA.ComparableDataIdentifier"%in%id_cols){
print("TADA.ComparableDataIdentifier not found in id_cols argument and is highly recommended: plotting without it may produce errors in the plot.")
}

# check .data has required columns
TADA_CheckColumns(.data, id_cols)
Expand Down Expand Up @@ -514,13 +524,19 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")
TADA_CheckType(.data, "data.frame", "Input object")

# ensure comparable data identifier is in the id_col vector
id_cols <- unique(c("TADA.ComparableDataIdentifier", id_cols))
if(is.null(id_cols)){
id_cols = "TADA.ComparableDataIdentifier"
}

if(!"TADA.ComparableDataIdentifier"%in%id_cols){
print("TADA.ComparableDataIdentifier not found in id_cols argument and is highly recommended: plotting without it may produce errors in the plot.")
}

# check .data has required columns
TADA_CheckColumns(.data, id_cols)

# check .data has required columns
TADA_CheckColumns(.data, c("TADA.ResultMeasureValue", "TADA.ResultMeasure.MeasureUnitCode"))
TADA_CheckColumns(.data, c("TADA.ResultMeasureValue", "TADA.ResultMeasure.MeasureUnitCode","ResultDepthHeightMeasure.MeasureValue", "ActivityStartDate", "ActivityStartTime.Time"))

start <- dim(.data)[1]

Expand Down
2 changes: 1 addition & 1 deletion R/GenerateRefTables.R
Original file line number Diff line number Diff line change
Expand Up @@ -715,6 +715,6 @@ TADA_UpdateMeasureQualifierCodeRef <- function() {
#' @export

TADA_GetNutrientSummationRef <- function() {
ref <- utils::read.csv(system.file("extdata", "Nsummation_key.csv", package = "TADA"))
ref <- utils::read.csv(system.file("extdata", "NPsummation_key.csv", package = "TADA"))
return(ref)
}
71 changes: 41 additions & 30 deletions R/Transformations.R
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) {

#' Calculate Total Nitrogen and Phosphorus
#'
#' This function uses the [Nutrient Aggregation logic](https://echo.epa.gov/trends/loading-tool/resources/nutrient-aggregation#nitrogen)
#' This function applies the [Nutrient Aggregation logic](https://echo.epa.gov/trends/loading-tool/resources/nutrient-aggregation#nitrogen)
#' from ECHO's Water Pollutant Loading Tool to add nitrogen subspecies together
#' to approximate a total nitrogen value on a single day at a single site.
#' Before summing subspecies, this function runs TADA_AggregateMeasurements to
Expand All @@ -334,9 +334,29 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) {
#' nitrogen subspecies expressed as nitrate, nitrite, ammonia, ammonium, etc. to
#' as nitrogen based on the atomic weights of the different elements in the
#' compound. The reference table is contained within the package but may be
#' edited/customized by users. Future development may include total P summations
#' as well.
#'
#' edited/customized by users. Nutrient equations are as follows:
#'
#' NITROGEN:
#' 1. TOTAL N (UNFILTERED)
#' 2. TOTAL N (FILTERED) + TOTAL N (PARTICULE)
#' 3. TOTAL KJELDAHL NITROGEN + NITRATE + NITRITE
#' 4. ORGANIC N + AMMONIA + NITRATE + NITRITE
#' 5. OTHER NITROGEN FORMS
#'
#' PHOSPHORUS:
#' 1. TOTAL PHOSPHORUS
#' 2. PHOSPHATE
#' 3. OTHER PHOSPHORUS FORMS
#'
#' Equations are applied in the order above. The function looks for groups of
#' nutrients that exactly match each equation before looking for every
#' combination within each equation (for example, a group of nitrogen subspecies
#' including AMMONIA and NITRATE will be passed over in an intial sweep of
#' groups of subspecies containing ORG N, AMMONIA, NITRATE, and NITRITE, but
#' will be caught as the function moves down the hierarchy of equations to fewer
#' and fewer subspecies). Eventually, even groups with only one subspecies will
#' be used to represent a TOTAL N value for that site/day/depth.
#'
#' @param .data TADA dataframe, ideally harmonized using TADA_HarmonizeSynonyms.
#' If user wants to consider grouping N or P subspecies across multiple
#' organizations, user should have run TADA_FindNearbySites and grouped all
Expand Down Expand Up @@ -402,13 +422,14 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m

# join data to summation table and keep only those that match for summations
sum_dat <- merge(dat, sum_ref, all.x = TRUE)
sum_dat <- subset(sum_dat, !is.na(sum_dat$SummationRank))
sum_dat <- subset(sum_dat, !is.na(sum_dat$NutrientGroup))

## REMINDER FOR TADA TEAM: NEED TO ENSURE ALL COMBOS PRESENT IN TABLE

# If the join results in matching rows
if (dim(sum_dat)[1] > 0) {
thecols <- grpcols[!grpcols %in% c("TADA.ComparableDataIdentifier")]

# # find nearby sites
# nearsites = unique(sum_dat[,c("MonitoringLocationIdentifier","TADA.LatitudeMeasure","TADA.LongitudeMeasure")])
# nearsites = TADA_FindNearbySites(nearsites)
Expand All @@ -418,41 +439,31 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m
sum_dat <- sum_dat %>%
dplyr::group_by(dplyr::across(dplyr::all_of(thecols))) %>%
dplyr::mutate(TADA.NutrientSummationGroup = dplyr::cur_group_id())

# bring in equations
eqns = utils::read.csv(system.file("extdata","NP_equations.csv", package = "TADA"))

# Create list of summation equations in order of preference, can also accommodate phosphorus
eqns <- list(
"N" = list(
N1 = "TOTAL N",
N2 = c("TKN", "NITRATE", "NITRITE"),
N2_a = c("TKN", "NITRATE + NITRITE"),
N3 = c("ORG N", "AMMON", "NITRATE", "NITRITE"),
N3_a = c("ORG N", "AMMON", "NITRATE + NITRITE"),
N4 = c("AMMON", "NITRATE", "NITRITE"),
N4_a = c("AMMON", "NITRATE + NITRITE")
),
"P" = list(
P1 = c("PHOSP"),
P2 = c("PO4")
)
)

# dataframe to hold results
summeddata <- data.frame()
grps <- vector()

for (i in 1:length(eqns)) {
eq <- eqns[[i]]
for (j in 1:length(eq)) {
eq1 <- eq[[j]]
nutrient <- ifelse(grepl("N", names(eq[j])), "Total Nitrogen as N", "Total Phosphorus as P")
for (i in 1:length(unique(eqns$Nutrient))) {
nut = unique(eqns$Nutrient)[i]
nutqns = subset(eqns, eqns$Nutrient==nut)
for (j in 1:length(unique(nutqns$EQN))) {
eqnum = unique(nutqns$EQN)[j]
eqn = subset(nutqns, nutqns$EQN==eqnum)$SummationName
nutrient <- ifelse(nut=="N", "Total Nitrogen as N", "Total Phosphorus as P")
# for each equation, see if any groups contain all required subspecies, and for each pick the variant with the lowest rank.
# combine group with other groups and remove group ID from consideration for the next equation
out <- sum_dat %>%
dplyr::filter(!TADA.NutrientSummationGroup %in% grps) %>%
dplyr::group_by(TADA.NutrientSummationGroup) %>%
dplyr::filter(all(eq1 %in% SummationName)) %>%
dplyr::filter(SummationName %in% eq1) %>%
dplyr::mutate(TADA.NutrientSummationEquation = paste0(eq1, collapse = " + "))
dplyr::filter(all(eqn %in% SummationName)) %>% # this line ensures that ALL subspecies are present within an equation group, not just one or more
dplyr::filter(SummationName %in% eqn) %>%
dplyr::mutate(TADA.NutrientSummationEquation = paste0(unique(SummationName), collapse = " + "))

out <- out %>%
dplyr::group_by(TADA.NutrientSummationGroup, SummationName) %>%
dplyr::slice_min(SummationRank, with_ties = FALSE)
Expand Down Expand Up @@ -493,7 +504,7 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m
# combine all data back into input dataset and get rid of unneeded columns
.data <- merge(.data, summeddata, all.x = TRUE)
.data <- plyr::rbind.fill(.data, Totals)
.data <- .data %>% dplyr::select(-c(SummationFractionNotes, SummationSpeciationNotes, SummationSpeciationConversionFactor, SummationName, SummationRank, SummationNote, nutrient))
.data <- .data %>% dplyr::select(-c(SummationFractionNotes, SummationSpeciationNotes, SummationSpeciationConversionFactor, SummationName, SummationRank, SummationNote, nutrient, NutrientGroup))
.data$TADA.NutrientSummation.Flag[is.na(.data$TADA.NutrientSummation.Flag)] <- "Not used to calculate Total N or P."
} else {
# if there are no data to sum
Expand Down
47 changes: 47 additions & 0 deletions inst/extdata/NP_equations.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
Nutrient,EQN,SummationName
N,0,TOTAL N (UNFILTERED)
N,1,TOTAL N (FILTERED)
N,1,TOTAL N (PARTICLE)
N,2,TKN
N,2,NITRATE
N,2,NITRITE
N,3,TKN
N,3,NITRATE + NITRITE
N,4,ORG N
N,4,AMMON
N,4,NITRATE
N,4,NITRITE
N,5,ORG N
N,5,AMMON
N,5,NITRATE + NITRITE
N,6,AMMON
N,6,NITRATE
N,6,NITRITE
N,7,AMMON
N,7,NITRATE + NITRITE
N,8,TKN
N,8,NITRATE
N,9,TKN
N,9,NITRITE
N,10,ORG N
N,10,AMMON
N,11,ORG N
N,11,NITRATE
N,12,ORG N
N,12,NITRITE
N,13,ORG N
N,13,NITRATE + NITRITE
N,14,AMMON
N,14,NITRATE
N,15,AMMON
N,15,NITRITE
N,16,TKN
N,17,NITRATE
N,18,NITRITE
N,19,NITRATE + NITRITE
N,20,AMMON
N,21,ORG N
N,22,OTHER N
P,1,PHOSP
P,2,PO4
P,3,OTHER P
Loading
Loading