Skip to content

Commit

Permalink
Merge pull request #352 from USEPA/10302023-cm
Browse files Browse the repository at this point in the history
10302023 cm
  • Loading branch information
cristinamullin authored Nov 1, 2023
2 parents 24cd4bf + 42d64a0 commit af167ed
Show file tree
Hide file tree
Showing 30 changed files with 565 additions and 446 deletions.
8 changes: 8 additions & 0 deletions R/CensoredDataSuite.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
#' "Other Condition/Limit Populated", and flags records where there is a
#' conflict between ResultDetectionConditionText and
#' DetectionQuantitationLimitTypeName as "Conflict between Condition and Limit".
#' A "Conflict between Condition and Limit" may occur if: A) the data submitter supplied
#' multiple detection limits and the wrong one is accidentally being associated
#' with the detection condition, or B) the data submitter accidentally choose
#' the wrong detection limit or detection condition to associate with the result.
#' Detection limit results missing ResultDetectionConditionText are flagged as
#' "Detection condition is missing and required for censored data ID." unless
#' the ResultMeasureValue field is populated with "ND" (indicating non-detect).
Expand Down Expand Up @@ -124,6 +128,10 @@ TADA_IDCensoredData <- function(.data) {
#' and the LOWER detection limit. These methods do NOT depend upon censored data frequency
#' in the dataset.
#'
#' This function runs TADA_IDCensoredData within it which add the column
#' TADA.CensoredData.Flag. Enter ?TADA_IDCensoredData into the console for more
#' information.
#'
#' @param .data A TADA dataframe
#' @param nd_method A text string indicating the type of method used to populate a non-detect (lower limit) data value. Can be set to "multiplier" (default),"randombelowlimit", or "as-is".
#' @param nd_multiplier A number to be multiplied to the LOWER detection limit for each entry to obtain the censored data value. Must be supplied if nd_method = "multiplier". Defaults to 0.5, or half the detection limit.
Expand Down
18 changes: 9 additions & 9 deletions R/DataDiscoveryRetrieval.R
Original file line number Diff line number Diff line change
Expand Up @@ -395,17 +395,17 @@ TADA_ReadWQPWebServices <- function(webservice) {
#' the date times to UTC. It also automatically converts the data to dates,
#' datetimes, numerics based on a standard algorithm. See: ?dataRetrieval::readWQPdata
#'
#' @param startDate Start Date string in the format YYYY-MM-DD, for example, "2020-01-01"
#' @param endDate End Date string in the format YYYY-MM-DD, for example, "2020-01-01"
#' @param startDate Start Date string in the format YYYY-MM-DD, for example, "2020-01-01".
#' @param endDate End Date string in the format YYYY-MM-DD, for example, "2020-01-01".
#' @param huc A numeric code denoting a hydrologic unit. Example: "04030202". Different size hucs can be entered.
#' @param siteid Unique monitoring station identifier
#' @param characteristicName Name of parameter
#' @param siteid Unique monitoring station identifier.
#' @param characteristicName Name of parameter.
#' @param characteristicType Groups of environmental measurements/parameters.
#' @param sampleMedia Sampling substrate such as water, air, or sediment
#' @param siteType Type of waterbody
#' @param statecode Code that identifies a state
#' @param sampleMedia Sampling substrate such as water, air, or sediment.
#' @param siteType Type of waterbody.
#' @param statecode Code that identifies a state.
#' @param maxrecs The maximum number of results queried within one call to dataRetrieval.
#' @param applyautoclean Logical, defaults to TRUE. Applies TADA_AutoClean function on the returned data profile.
#' @param applyautoclean Logical, defaults to FALSE. If TRUE, applies TADA_AutoClean function on the returned data profile.
#'
#' @return TADA-compatible dataframe
#'
Expand All @@ -426,7 +426,7 @@ TADA_BigDataRetrieval <- function(startDate = "null",
siteType = "null",
characteristicName = "null",
characteristicType = "null",
sampleMedia = "Water",
sampleMedia = "null",
statecode = "null",
maxrecs = 250000,
applyautoclean = FALSE) {
Expand Down
138 changes: 80 additions & 58 deletions R/Figures.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,10 @@ TADA_Boxplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")) {
TADA_CheckColumns(.data, id_cols)

# check .data has required columns
TADA_CheckColumns(.data, c("TADA.ResultMeasureValue",
"TADA.ResultMeasure.MeasureUnitCode"))
TADA_CheckColumns(.data, c(
"TADA.ResultMeasureValue",
"TADA.ResultMeasure.MeasureUnitCode"
))

start <- dim(.data)[1]

Expand Down Expand Up @@ -235,11 +237,13 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier"))

# check .data has required columns
TADA_CheckColumns(.data, id_cols)

# check .data has required columns
TADA_CheckColumns(.data, c("TADA.ResultMeasureValue",
"TADA.ResultMeasure.MeasureUnitCode"))

TADA_CheckColumns(.data, c(
"TADA.ResultMeasureValue",
"TADA.ResultMeasure.MeasureUnitCode"
))

start <- dim(.data)[1]

.data <- subset(.data, !is.na(.data$TADA.ResultMeasureValue))
Expand Down Expand Up @@ -544,11 +548,13 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")

# check .data has required columns
TADA_CheckColumns(.data, id_cols)

# check .data has required columns
TADA_CheckColumns(.data, c("ActivityStartDate",
"TADA.ResultMeasureValue",
"TADA.ResultMeasure.MeasureUnitCode"))
TADA_CheckColumns(.data, c(
"ActivityStartDate",
"TADA.ResultMeasureValue",
"TADA.ResultMeasure.MeasureUnitCode"
))

.data <- .data %>%
dplyr::group_by(dplyr::across(dplyr::all_of(id_cols))) %>%
Expand All @@ -574,7 +580,7 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")
x = plot.data$ActivityStartDate, # currently uses start date only, may want to change to just ActivityStartDateTime in the future, but for now ActivityStartDateTime includes NAs when time is not available. Including ActivityStartDateTime in hover feature instead.
y = plot.data$TADA.ResultMeasureValue,
# consider adding color or shapes to make it easier to see sites and/or possible realtive result values
# color = ~MonitoringLocationName,
# color = ~MonitoringLocationName,
# colors = RColorBrewer::brewer.pal(3, "Set2"),
marker = list(color = "#00bde3"), # marker color
stroke = I("#005ea2"), # marker border color
Expand All @@ -583,8 +589,8 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")
hovertext = paste(
"Result:", paste0(plot.data$TADA.ResultMeasureValue, " ", plot.data$TADA.ResultMeasure.MeasureUnitCode), "<br>",
"Activity Start Date:", plot.data$ActivityStartDate, "<br>",
"Activity Start Date Time:", plot.data$ActivityStartDateTime, "<br>",
"Monitoring Location Name:", plot.data$MonitoringLocationName, "<br>",
"Activity Start Date Time:", plot.data$ActivityStartDateTime, "<br>",
"Monitoring Location Name:", plot.data$MonitoringLocationName, "<br>",
"Media:", plot.data$TADA.ActivityMediaName, "<br>",
"Media Subdivision:", plot.data$ActivityMediaSubdivisionName, "<br>",
"Result Depth:", paste0(
Expand Down Expand Up @@ -634,7 +640,7 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")
margin = mrg
) %>%
# config options https://plotly.com/r/configuration-options/
plotly::config(displaylogo = FALSE) #, displayModeBar = TRUE) # TRUE makes bar always visible
plotly::config(displaylogo = FALSE) # , displayModeBar = TRUE) # TRUE makes bar always visible

# create plot for all groupid's
all_scatterplots[[i]] <- one_scatterplot
Expand Down Expand Up @@ -667,7 +673,7 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")
#' @param groups A vector of two identifiers from the id_cols column. For
#' example, if the id_cols is 'TADA.ComparableDataIdentifier', the groups could
#' be 'DISSOLVED OXYGEN (DO)_NA_NA_UG/L' and 'PH_NA_NA_NA'. These groups will
#' be specific to your dataset. If the id_cols is 'MonitoringLocationName',
#' be specific to your dataset. If the id_cols is 'MonitoringLocationName',
#' the groups could be 'Upper Red Lake: West' and 'Upper Red Lake: West-Central'.
#'
#' @return A single plotly scatterplot figure with one x-axis (Date/Time) and a
Expand All @@ -688,31 +694,33 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier")
#' df <- dplyr::filter(Data_6Tribes_5y_Harmonized, TADA.ComparableDataIdentifier == "TOTAL PHOSPHORUS, MIXED FORMS_UNFILTERED_AS P_UG/L")
#' # Creates a scatterplot including the two specified sites in the same plot:
#' TADA_TwoCharacteristicScatterplot(df, id_cols = "MonitoringLocationName", groups = c("Upper Red Lake: West", "Upper Red Lake: West-Central"))
#'
#'
TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableDataIdentifier", groups) {
# check .data is data.frame
TADA_CheckType(.data, "data.frame", "Input object")

# check .data has required columns
TADA_CheckColumns(.data, id_cols)

# check .data has required columns
reqcols <- c("TADA.ResultMeasureValue",
"TADA.ResultMeasure.MeasureUnitCode",
"ActivityStartDate")

reqcols <- c(
"TADA.ResultMeasureValue",
"TADA.ResultMeasure.MeasureUnitCode",
"ActivityStartDate"
)

# check .data has required columns
TADA_CheckColumns(.data, reqcols)

# if left blank, ensure comparable data identifier is in the id_cols vector
if (is.null(id_cols)) {
id_cols <- "TADA.ComparableDataIdentifier"
}

if (!"TADA.ComparableDataIdentifier" %in% id_cols) {
print("Note: TADA.ComparableDataIdentifier not found in id_cols argument and is highly recommended.")
}

# check that groups are in id_cols
id <- unlist(unique(.data[, id_cols]))
if (any(!groups %in% id)) {
Expand All @@ -734,11 +742,15 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD
param1 <- subset(plot.data, plot.data[, id_cols] %in% groups[1])
param2 <- subset(plot.data, plot.data[, id_cols] %in% groups[2])

title <- TADA::TADA_InsertBreaks(paste0(param1$TADA.CharacteristicName[1],
" and ",
param2$TADA.CharacteristicName[1],
" Over Time"),
len = 45)
title <- TADA::TADA_InsertBreaks(
paste0(
param1$TADA.CharacteristicName[1],
" and ",
param2$TADA.CharacteristicName[1],
" Over Time"
),
len = 45
)

# figure margin
mrg <- list(
Expand All @@ -750,7 +762,7 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD
scatterplot <- plotly::plot_ly(type = "scatter", mode = "markers") %>%
plotly::layout(
xaxis = list(
#title = "Activity Start Date", # not necessary?
# title = "Activity Start Date", # not necessary?
titlefont = list(size = 16, family = "Arial"),
tickfont = list(size = 16, family = "Arial"),
hoverformat = ",.4r", linecolor = "black", rangemode = "tozero",
Expand All @@ -764,8 +776,8 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD
showgrid = FALSE, tickcolor = "black"
),
yaxis2 = list(
side = "right",
overlaying = "y",
side = "right",
overlaying = "y",
title = paste0(param2$TADA.CharacteristicName[1], " ", param2$TADA.ResultMeasure.MeasureUnitCode[1]),
titlefont = list(size = 16, family = "Arial"),
tickfont = list(size = 16, family = "Arial"),
Expand All @@ -775,29 +787,35 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD
hoverlabel = list(bgcolor = "white"),
title = title,
plot_bgcolor = "#e5ecf6",
margin = mrg,
legend = list(orientation = "h",
xanchor = "center",
x = 0.5)
margin = mrg,
legend = list(
orientation = "h",
xanchor = "center",
x = 0.5
)
) %>%
# config options https://plotly.com/r/configuration-options/
plotly::config(displaylogo = FALSE) %>% #, displayModeBar = TRUE) # TRUE makes bar always visible
plotly::config(displaylogo = FALSE) %>% # , displayModeBar = TRUE) # TRUE makes bar always visible
plotly::add_trace(
data = param1,
x = ~ActivityStartDate,
y = ~TADA.ResultMeasureValue,
name = paste0(param1$TADA.ResultSampleFractionText, " ",
param1$TADA.CharacteristicName, " ",
param1$TADA.MethodSpecificationName),
marker = list(size = 10,
color = "#E34234",
line = list(color = "#005ea2", width = 2)),
data = param1,
x = ~ActivityStartDate,
y = ~TADA.ResultMeasureValue,
name = paste0(
param1$TADA.ResultSampleFractionText, " ",
param1$TADA.CharacteristicName, " ",
param1$TADA.MethodSpecificationName
),
marker = list(
size = 10,
color = "#E34234",
line = list(color = "#005ea2", width = 2)
),
hoverinfo = "text",
hovertext = paste(
"Result:", paste0(param1$TADA.ResultMeasureValue, " ", param1$TADA.ResultMeasure.MeasureUnitCode), "<br>",
"Activity Start Date:", param1$ActivityStartDate, "<br>",
"Activity Start Date Time:", param1$ActivityStartDateTime, "<br>",
"Monitoring Location Name:", param1$MonitoringLocationName, "<br>",
"Activity Start Date Time:", param1$ActivityStartDateTime, "<br>",
"Monitoring Location Name:", param1$MonitoringLocationName, "<br>",
"Media:", param1$TADA.ActivityMediaName, "<br>",
"Media Subdivision:", param1$ActivityMediaSubdivisionName, "<br>",
"Result Depth:", paste0(
Expand All @@ -820,21 +838,25 @@ TADA_TwoCharacteristicScatterplot <- function(.data, id_cols = "TADA.ComparableD
)
) %>%
plotly::add_trace(
data = param2,
x = ~ActivityStartDate,
y = ~TADA.ResultMeasureValue,
name = paste0(param2$TADA.ResultSampleFractionText, " ",
param2$TADA.CharacteristicName, " ",
param2$TADA.MethodSpecificationName),
marker = list(size = 10, color = "#00bde3",
line = list(color = "#005ea2", width = 2)),
data = param2,
x = ~ActivityStartDate,
y = ~TADA.ResultMeasureValue,
name = paste0(
param2$TADA.ResultSampleFractionText, " ",
param2$TADA.CharacteristicName, " ",
param2$TADA.MethodSpecificationName
),
marker = list(
size = 10, color = "#00bde3",
line = list(color = "#005ea2", width = 2)
),
yaxis = "y2",
hoverinfo = "text",
hovertext = paste(
"Result:", paste0(param2$TADA.ResultMeasureValue, " ", param2$TADA.ResultMeasure.MeasureUnitCode), "<br>",
"Activity Start Date:", param2$ActivityStartDate, "<br>",
"Activity Start Date Time:", param2$ActivityStartDateTime, "<br>",
"Monitoring Location Name:", param2$MonitoringLocationName, "<br>",
"Activity Start Date Time:", param2$ActivityStartDateTime, "<br>",
"Monitoring Location Name:", param2$MonitoringLocationName, "<br>",
"Media:", param2$TADA.ActivityMediaName, "<br>",
"Media Subdivision:", param2$ActivityMediaSubdivisionName, "<br>",
"Result Depth:", paste0(
Expand Down
Loading

0 comments on commit af167ed

Please sign in to comment.