Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/small_summation_tweaks' into fig…
Browse files Browse the repository at this point in the history
…ures
  • Loading branch information
cristinamullin committed Aug 10, 2023
2 parents 52176a9 + f884f53 commit bf5f6fd
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 51 deletions.
10 changes: 9 additions & 1 deletion R/Tables.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,15 @@ TADA_Stats <- function(.data, group_cols = c("TADA.ComparableDataIdentifier")) {
.data <- TADA_IDCensoredData(.data)
}

group_cols <- unique(c("TADA.ComparableDataIdentifier", group_cols))
if(!"TADA.CensoredData.Flag"%in%names(.data)){
.data = TADA_IDCensoredData(.data)
}

if("TADA.NutrientSummation.Flag"%in%names(.data)){
print("Note: Your dataset contains TADA-generated total nutrient results, which have fewer columns populated with metadata. This might affect how groups are displayed in the stats table.")
}

group_cols = unique(c("TADA.ComparableDataIdentifier", group_cols))

StatsTable <- .data %>%
dplyr::filter(!is.na(TADA.ResultMeasureValue)) %>%
Expand Down
78 changes: 38 additions & 40 deletions R/Transformations.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,11 @@ TADA_GetSynonymRef <- function(.data, download = FALSE) {
}

# check to see if any invalid data flags exist
check_inv <- .data[, names(.data) %in% c("TADA.MethodSpeciation.Flag", "TADA.SampleFraction.Flag", "TADA.ResultUnit.Flag")]
check_inv <- check_inv %>%
tidyr::pivot_longer(cols = names(check_inv), names_to = "Flag Column") %>%
dplyr::filter(value == "Invalid")

if (dim(check_inv)[1] > 0) {
check_inv <- check_inv %>%
dplyr::group_by(Flag) %>%
dplyr::summarise("Result Count" = length(value))
check_inv = .data[,names(.data)%in%c("TADA.MethodSpeciation.Flag","TADA.SampleFraction.Flag","TADA.ResultUnit.Flag")]
check_inv = check_inv %>% tidyr::pivot_longer(cols = names(check_inv), names_to = "Flag_Column") %>% dplyr::filter(value == "Invalid")

if(dim(check_inv)[1]>0){
check_inv = check_inv %>% dplyr::group_by(Flag_Column) %>% dplyr::summarise("Result Count" = length(value))
print("Warning: Your dataframe contains invalid metadata combinations in the following flag columns:")
print(as.data.frame(check_inv))
}
Expand Down Expand Up @@ -322,13 +318,16 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) {
#'
#' This function uses the [Nutrient Aggregation logic](https://echo.epa.gov/trends/loading-tool/resources/nutrient-aggregation#nitrogen)
#' from ECHO's Water Pollutant Loading Tool to add nitrogen subspecies together
#' to approximate a total nitrogen value on a single day at a single site. Where
#' necessary, it uses conversion factors to convert nitrogen subspecies
#' expressed as nitrate, nitrite, ammonia, ammonium, etc. to as nitrogen based
#' on the atomic weights of the different elements in the compound. The
#' reference table is contained within the package but may be edited/customized
#' by users. Future development may include total P summations as well.
#'
#' to approximate a total nitrogen value on a single day at a single site.
#' Before summing subspecies, this function runs TADA_AggregateMeasurements to
#' obtain the max value of a characteristic-fraction-speciation at a given site,
#' date, and depth. Where necessary, it uses conversion factors to convert
#' nitrogen subspecies expressed as nitrate, nitrite, ammonia, ammonium, etc. to
#' as nitrogen based on the atomic weights of the different elements in the
#' compound. The reference table is contained within the package but may be
#' edited/customized by users. Future development may include total P summations
#' as well.
#'
#' @param .data TADA dataframe, ideally harmonized using TADA_HarmonizeSynonyms.
#' If user wants to consider grouping N or P subspecies across multiple
#' organizations, user should have run TADA_FindNearbySites and grouped all
Expand All @@ -343,11 +342,15 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) {
#' to 'min' or 'mean'.
#'
#' @return Input TADA dataframe with additional rows representing total N and P
#' summation values from adding up subspecies. These new rows share the same
#' date and monitoring location as the subspecies, but an additional note is
#' added in the TADA.NutrientSummation.Flag column describing how the total was
#' derived. Also adds TADA.NutrientSummationGroup and TADA.NutrientSummationEquation columns.
#'
#' summation values from adding up subspecies. Note that for total phosphorus,
#' these additional rows are simply a re-classification of phosphorus or
#' phosphate into the total phosphorus as P format. These new rows share the
#' same date and monitoring location as the subspecies, but an additional note
#' is added in the TADA.NutrientSummation.Flag column describing how the total
#' was derived. Also adds TADA.NutrientSummationGroup and
#' TADA.NutrientSummationEquation columns, which can be used to trace how the
#' total was calculated and from which subspecies.
#'
#' @export

TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "mean")) {
Expand Down Expand Up @@ -382,11 +385,11 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m

# Get grouping cols for daily aggregation
# create nutrient groups by site, date, and depth
depths <- names(.data)[grepl("DepthHeightMeasure", names(.data))]
depths <- depths[grepl("TADA.", depths)]
grpcols <- c("ActivityStartDate", "MonitoringLocationIdentifier", "ActivityMediaSubdivisionName", "TADA.ComparableDataIdentifier", "TADA.ResultMeasure.MeasureUnitCode", depths)

dat <- suppressMessages(TADA_AggregateMeasurements(.data, grouping_cols = grpcols, agg_fun = daily_agg, clean = TRUE))
depths = names(.data)[grepl("DepthHeightMeasure", names(.data))]
depths = depths[grepl("TADA.", depths)]
grpcols = c("ActivityStartDate", "MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure", "ActivityMediaSubdivisionName","TADA.ComparableDataIdentifier","TADA.ResultMeasure.MeasureUnitCode",depths)
dat = suppressMessages(TADA_AggregateMeasurements(.data, grouping_cols = grpcols, agg_fun = daily_agg, clean = TRUE))

# join data to summation table and keep only those that match for summations
sum_dat <- merge(dat, sum_ref, all.x = TRUE)
Expand Down Expand Up @@ -460,21 +463,16 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m
summeddata$TADA.MethodSpecificationName <- ifelse(!is.na(summeddata$SummationSpeciationConversionFactor) & summeddata$nutrient == "Total Phosphorus as P", "AS P", summeddata$TADA.MethodSpecificationName)

# Get to total N or P
totncols <- c(thecols, "TADA.NutrientSummationGroup", "TADA.NutrientSummationEquation")
TotalN <- summeddata %>%
dplyr::filter(nutrient == "Total Nitrogen as N") %>%
dplyr::group_by(dplyr::across(dplyr::all_of(totncols))) %>%
dplyr::summarise(TADA.ResultMeasureValue = sum(TADA.ResultMeasureValue)) %>%
dplyr::mutate(TADA.CharacteristicName = "TOTAL NITROGEN, MIXED FORMS", TADA.ResultSampleFractionText = "UNFILTERED", TADA.MethodSpecificationName = "AS N", TADA.NutrientSummation.Flag = "Nutrient summation from subspecies.")
TotalP <- summeddata %>%
dplyr::filter(nutrient == "Total Phosphorus as P") %>%
dplyr::group_by(dplyr::across(dplyr::all_of(totncols))) %>%
dplyr::summarise(TADA.ResultMeasureValue = sum(TADA.ResultMeasureValue)) %>%
dplyr::mutate(TADA.CharacteristicName = "TOTAL PHOSPHORUS, MIXED FORMS", TADA.ResultSampleFractionText = "UNFILTERED", TADA.MethodSpecificationName = "AS P", TADA.NutrientSummation.Flag = "Nutrient summation from subspecies.")

Totals <- plyr::rbind.fill(TotalN, TotalP)
Totals$ResultIdentifier <- paste0("TADA-", sample(100000000:1000000000, dim(Totals)[1]))

totncols = c(thecols, "TADA.NutrientSummationGroup","TADA.NutrientSummationEquation")
TotalN = summeddata %>% dplyr::filter(nutrient=="Total Nitrogen as N") %>% dplyr::group_by(dplyr::across(dplyr::all_of(totncols))) %>% dplyr::summarise(TADA.ResultMeasureValue = sum(TADA.ResultMeasureValue)) %>% dplyr::mutate(TADA.CharacteristicName = "TOTAL NITROGEN, MIXED FORMS", TADA.ResultSampleFractionText = "UNFILTERED",TADA.MethodSpecificationName = "AS N", TADA.NutrientSummation.Flag = "Nutrient summation from one or more subspecies.")
TotalP = summeddata %>% dplyr::filter(nutrient=="Total Phosphorus as P") %>% dplyr::group_by(dplyr::across(dplyr::all_of(totncols))) %>% dplyr::summarise(TADA.ResultMeasureValue = sum(TADA.ResultMeasureValue)) %>% dplyr::mutate(TADA.CharacteristicName = "TOTAL PHOSPHORUS, MIXED FORMS", TADA.ResultSampleFractionText = "UNFILTERED",TADA.MethodSpecificationName = "AS P", TADA.NutrientSummation.Flag = "Nutrient summation from one subspecies.")

# if summation is zero....include anyway?

Totals = plyr::rbind.fill(TotalN, TotalP)
Totals$ResultIdentifier = paste0("TADA-",sample(100000000:1000000000,dim(Totals)[1])) # give TADA ResultIdentifier

# combine all data back into input dataset and get rid of unneeded columns
.data <- merge(.data, summeddata, all.x = TRUE)
.data <- plyr::rbind.fill(.data, Totals)
Expand Down
27 changes: 17 additions & 10 deletions man/TADA_CalculateTotalNP.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit bf5f6fd

Please sign in to comment.