Merge remote-tracking branch 'origin/small_summation_tweaks' into fig…

…ures
USEPA · Aug 10, 2023 · bf5f6fd · bf5f6fd
2 parents 52176a9 + f884f53
commit bf5f6fd
Show file tree

Hide file tree

Showing 3 changed files with 64 additions and 51 deletions.
diff --git a/R/Tables.R b/R/Tables.R
@@ -82,7 +82,15 @@ TADA_Stats <- function(.data, group_cols = c("TADA.ComparableDataIdentifier")) {
     .data <- TADA_IDCensoredData(.data)
   }
 
-  group_cols <- unique(c("TADA.ComparableDataIdentifier", group_cols))
+  if(!"TADA.CensoredData.Flag"%in%names(.data)){
+    .data = TADA_IDCensoredData(.data)
+  }
+
+  if("TADA.NutrientSummation.Flag"%in%names(.data)){
+    print("Note: Your dataset contains TADA-generated total nutrient results, which have fewer columns populated with metadata. This might affect how groups are displayed in the stats table.")
+  }
+
+  group_cols = unique(c("TADA.ComparableDataIdentifier", group_cols))
 
   StatsTable <- .data %>%
     dplyr::filter(!is.na(TADA.ResultMeasureValue)) %>%

diff --git a/R/Transformations.R b/R/Transformations.R
@@ -71,15 +71,11 @@ TADA_GetSynonymRef <- function(.data, download = FALSE) {
   }
 
   # check to see if any invalid data flags exist
-  check_inv <- .data[, names(.data) %in% c("TADA.MethodSpeciation.Flag", "TADA.SampleFraction.Flag", "TADA.ResultUnit.Flag")]
-  check_inv <- check_inv %>%
-    tidyr::pivot_longer(cols = names(check_inv), names_to = "Flag Column") %>%
-    dplyr::filter(value == "Invalid")
-
-  if (dim(check_inv)[1] > 0) {
-    check_inv <- check_inv %>%
-      dplyr::group_by(Flag) %>%
-      dplyr::summarise("Result Count" = length(value))
+  check_inv = .data[,names(.data)%in%c("TADA.MethodSpeciation.Flag","TADA.SampleFraction.Flag","TADA.ResultUnit.Flag")]
+  check_inv = check_inv %>% tidyr::pivot_longer(cols = names(check_inv), names_to = "Flag_Column") %>% dplyr::filter(value == "Invalid")
+
+  if(dim(check_inv)[1]>0){
+    check_inv = check_inv %>% dplyr::group_by(Flag_Column) %>% dplyr::summarise("Result Count" = length(value))
     print("Warning: Your dataframe contains invalid metadata combinations in the following flag columns:")
     print(as.data.frame(check_inv))
   }
@@ -322,13 +318,16 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) {
 #'
 #' This function uses the [Nutrient Aggregation logic](https://echo.epa.gov/trends/loading-tool/resources/nutrient-aggregation#nitrogen)
 #' from ECHO's Water Pollutant Loading Tool to add nitrogen subspecies together
-#' to approximate a total nitrogen value on a single day at a single site. Where
-#' necessary, it uses conversion factors to convert nitrogen subspecies
-#' expressed as nitrate, nitrite, ammonia, ammonium, etc. to as nitrogen based
-#' on the atomic weights of the different elements in the compound. The
-#' reference table is contained within the package but may be edited/customized
-#' by users. Future development may include total P summations as well.
-#'
+#' to approximate a total nitrogen value on a single day at a single site.
+#' Before summing subspecies, this function runs TADA_AggregateMeasurements to
+#' obtain the max value of a characteristic-fraction-speciation at a given site,
+#' date, and depth. Where necessary, it uses conversion factors to convert
+#' nitrogen subspecies expressed as nitrate, nitrite, ammonia, ammonium, etc. to
+#' as nitrogen based on the atomic weights of the different elements in the
+#' compound. The reference table is contained within the package but may be
+#' edited/customized by users. Future development may include total P summations
+#' as well.
+#' 
 #' @param .data TADA dataframe, ideally harmonized using TADA_HarmonizeSynonyms.
 #'   If user wants to consider grouping N or P subspecies across multiple
 #'   organizations, user should have run TADA_FindNearbySites and grouped all
@@ -343,11 +342,15 @@ TADA_HarmonizeSynonyms <- function(.data, ref, np_speciation = TRUE) {
 #'   to 'min' or 'mean'.
 #'
 #' @return Input TADA dataframe with additional rows representing total N and P
-#'   summation values from adding up subspecies. These new rows share the same
-#'   date and monitoring location as the subspecies, but an additional note is
-#'   added in the TADA.NutrientSummation.Flag column describing how the total was
-#'   derived. Also adds TADA.NutrientSummationGroup and TADA.NutrientSummationEquation columns.
-#'
+#'   summation values from adding up subspecies. Note that for total phosphorus,
+#'   these additional rows are simply a re-classification of phosphorus or
+#'   phosphate into the total phosphorus as P format. These new rows share the
+#'   same date and monitoring location as the subspecies, but an additional note
+#'   is added in the TADA.NutrientSummation.Flag column describing how the total
+#'   was derived. Also adds TADA.NutrientSummationGroup and
+#'   TADA.NutrientSummationEquation columns, which can be used to trace how the
+#'   total was calculated and from which subspecies.
+#'   
 #' @export
 
 TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "mean")) {
@@ -382,11 +385,11 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m
 
   # Get grouping cols for daily aggregation
   # create nutrient groups by site, date, and depth
-  depths <- names(.data)[grepl("DepthHeightMeasure", names(.data))]
-  depths <- depths[grepl("TADA.", depths)]
-  grpcols <- c("ActivityStartDate", "MonitoringLocationIdentifier", "ActivityMediaSubdivisionName", "TADA.ComparableDataIdentifier", "TADA.ResultMeasure.MeasureUnitCode", depths)
-
-  dat <- suppressMessages(TADA_AggregateMeasurements(.data, grouping_cols = grpcols, agg_fun = daily_agg, clean = TRUE))
+  depths = names(.data)[grepl("DepthHeightMeasure", names(.data))]
+  depths = depths[grepl("TADA.", depths)]
+  grpcols = c("ActivityStartDate", "MonitoringLocationIdentifier", "TADA.LongitudeMeasure", "TADA.LatitudeMeasure", "ActivityMediaSubdivisionName","TADA.ComparableDataIdentifier","TADA.ResultMeasure.MeasureUnitCode",depths)
+  
+  dat = suppressMessages(TADA_AggregateMeasurements(.data, grouping_cols = grpcols, agg_fun = daily_agg, clean = TRUE))
 
   # join data to summation table and keep only those that match for summations
   sum_dat <- merge(dat, sum_ref, all.x = TRUE)
@@ -460,21 +463,16 @@ TADA_CalculateTotalNP <- function(.data, sum_ref, daily_agg = c("max", "min", "m
     summeddata$TADA.MethodSpecificationName <- ifelse(!is.na(summeddata$SummationSpeciationConversionFactor) & summeddata$nutrient == "Total Phosphorus as P", "AS P", summeddata$TADA.MethodSpecificationName)
 
     # Get to total N or P
-    totncols <- c(thecols, "TADA.NutrientSummationGroup", "TADA.NutrientSummationEquation")
-    TotalN <- summeddata %>%
-      dplyr::filter(nutrient == "Total Nitrogen as N") %>%
-      dplyr::group_by(dplyr::across(dplyr::all_of(totncols))) %>%
-      dplyr::summarise(TADA.ResultMeasureValue = sum(TADA.ResultMeasureValue)) %>%
-      dplyr::mutate(TADA.CharacteristicName = "TOTAL NITROGEN, MIXED FORMS", TADA.ResultSampleFractionText = "UNFILTERED", TADA.MethodSpecificationName = "AS N", TADA.NutrientSummation.Flag = "Nutrient summation from subspecies.")
-    TotalP <- summeddata %>%
-      dplyr::filter(nutrient == "Total Phosphorus as P") %>%
-      dplyr::group_by(dplyr::across(dplyr::all_of(totncols))) %>%
-      dplyr::summarise(TADA.ResultMeasureValue = sum(TADA.ResultMeasureValue)) %>%
-      dplyr::mutate(TADA.CharacteristicName = "TOTAL PHOSPHORUS, MIXED FORMS", TADA.ResultSampleFractionText = "UNFILTERED", TADA.MethodSpecificationName = "AS P", TADA.NutrientSummation.Flag = "Nutrient summation from subspecies.")
-
-    Totals <- plyr::rbind.fill(TotalN, TotalP)
-    Totals$ResultIdentifier <- paste0("TADA-", sample(100000000:1000000000, dim(Totals)[1]))
 
+    totncols = c(thecols, "TADA.NutrientSummationGroup","TADA.NutrientSummationEquation")
+    TotalN = summeddata %>% dplyr::filter(nutrient=="Total Nitrogen as N") %>% dplyr::group_by(dplyr::across(dplyr::all_of(totncols))) %>% dplyr::summarise(TADA.ResultMeasureValue = sum(TADA.ResultMeasureValue)) %>% dplyr::mutate(TADA.CharacteristicName = "TOTAL NITROGEN, MIXED FORMS", TADA.ResultSampleFractionText = "UNFILTERED",TADA.MethodSpecificationName = "AS N", TADA.NutrientSummation.Flag = "Nutrient summation from one or more subspecies.")
+    TotalP = summeddata %>% dplyr::filter(nutrient=="Total Phosphorus as P") %>% dplyr::group_by(dplyr::across(dplyr::all_of(totncols))) %>% dplyr::summarise(TADA.ResultMeasureValue = sum(TADA.ResultMeasureValue)) %>% dplyr::mutate(TADA.CharacteristicName = "TOTAL PHOSPHORUS, MIXED FORMS", TADA.ResultSampleFractionText = "UNFILTERED",TADA.MethodSpecificationName = "AS P", TADA.NutrientSummation.Flag = "Nutrient summation from one subspecies.")
+
+    # if summation is zero....include anyway?
+
+    Totals = plyr::rbind.fill(TotalN, TotalP)
+    Totals$ResultIdentifier = paste0("TADA-",sample(100000000:1000000000,dim(Totals)[1])) # give TADA ResultIdentifier
+
     # combine all data back into input dataset and get rid of unneeded columns
     .data <- merge(.data, summeddata, all.x = TRUE)
     .data <- plyr::rbind.fill(.data, Totals)

diff --git a/man/TADA_CalculateTotalNP.Rd b/man/TADA_CalculateTotalNP.Rd