Skip to content

Commit

Permalink
version 1.0 checked
Browse files Browse the repository at this point in the history
  • Loading branch information
alexpkeil1 committed Aug 17, 2021
1 parent 3e9bf56 commit 5f2a221
Show file tree
Hide file tree
Showing 11 changed files with 121 additions and 45 deletions.
33 changes: 23 additions & 10 deletions R/base.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,16 +142,19 @@ varimp <- function(X,
#' @export
#' @importFrom stats pnorm gaussian binomial
#' @examples
#' \dontrun{
#'
#' library(future)
#' plan(multisession) # fit models in parallel
#' data(metals, package="qgcomp")
#' XYlist = list(X=metals[,c(1:22, 23)], Y=metals$y)
#' XYlist = list(X=metals[,c(1:10, 15:23)], Y=metals$y)
#' Y_learners = .default_continuous_learners_big()
#' Xbinary_learners = .default_binary_learners_big()
#' Xdensity_learners = .default_density_learners_big()[c(1:4,6:7)]
#' vi <- varimp(X=XYlist$X,Y=XYlist$Y, delta=0.1, Y_learners = Y_learners,
#' Xdensity_learners=Xdensity_learners, Xbinary_learners=Xbinary_learners,
#' estimator="TMLE", updatetype="unweighted",estimand="diff")
#' vi
#' plan(transparent) # go back to standard evaluation
#' vi1 <- varimp_refit(vi, X=XYlist$X,Y=XYlist$Y, delta=0.1,
#' estimator="TMLE", updatetype="weighted", estimand="diff")
#' vi1
Expand All @@ -165,15 +168,25 @@ varimp <- function(X,
#' estimator="IPW")
#' vi4
#'
#' hist(metals$y)
#' hist(metals$calcium)
#' hist(metals$total_hardness)
#' # find the fit corresponding to calcium
#' caidx <- which(names(XYlist$X)=="calcium")
#' plot(metals$calcium, vi1$gfits[[4]]$predict()[[1]], pch=19, cex=0.2)
#' plot(metals$calcium, metals$y, pch=19, cex=0.2)
#' plot(metals$total_hardness, vi1$gfits[[21]]$predict()[[1]], pch=19, cex=0.2)
#' plot(metals$total_hardness, metals$y, pch=19, cex=0.2)
#' }
#' thidx <- which(names(XYlist$X)=="total_hardness")
#' # can confirm
#' # vi1$gfits[[caidx]]$training_task$nodes$outcome
#' calpredict = vi1$gfits[[caidx]]$predict()[[1]]
#' thpredict = vi1$gfits[[thidx]]$predict()[[1]]
#' # plot predicted density (not predicted value!) against original value,
#' # compare with kernel density
#' plot(metals$calcium, calpredict/max(calpredict), pch=19, cex=0.2,
#' ylab="scaled conditional density")
#' lines(density(metals$calcium))
#' plot(metals$total_hardness, thpredict/max(thpredict), pch=19, cex=0.2,
#' ylab="scaled conditional density")
#' lines(density(metals$total_hardness))
#' # note these are effectively measuring much of the same quantity
#' plot(metals$calcium, metals$total_hardness)
#' plot(calpredict, thpredict)
#'
varimp_refit <- function(vibr_fit,
X,
W = NULL,
Expand Down
26 changes: 16 additions & 10 deletions R/base_identify.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#' @param vibr_fit a fit from varimp
#' @param Acol (integer) which column of predictors in call to varimp to diagnose
#' @param delta (numeric, default=0.01) change in each column of predictors in call to varimp corresponding to stochastic intervention
#' @param ... not used
#'
#' @return ggplot2 plot object
#' @export
Expand All @@ -30,7 +31,7 @@
#' estimator="TMLE")
#' plotshift_dens(vi_ipw, Acol=1, delta=0.01)
#' }
plotshift_dens <- function(vibr_fit, Acol=1, delta){
plotshift_dens <- function(vibr_fit, Acol=1, delta, ...){
if(is.null(delta)) delta <- vibr_fit$delta
if(!is.null(vibr_fit$qfit)){
task <- vibr_fit$qfit$training_task
Expand Down Expand Up @@ -64,8 +65,8 @@ plotshift_dens <- function(vibr_fit, Acol=1, delta){

p1 <-
ggplot() + theme_classic() + scale_color_grey(name="") +
geom_step(data=X1,aes(x=ord, y=dens, color="Observed"))+
geom_step(data=X2,aes(x=ord, y=densshift, color="Shifted"))+
geom_step(data=X1,aes_string(x="ord", y="dens", color='"Observed"'))+
geom_step(data=X2,aes_string(x="ord", y="densshift", color='"Shifted"'))+
scale_y_continuous(name=paste0("density(",xnm[Acol[1]],")"), expand = expansion(0))+
scale_x_continuous(name="Sorted index", expand = expansion(.01))
print(p1)
Expand All @@ -80,6 +81,7 @@ plotshift_dens <- function(vibr_fit, Acol=1, delta){
#' @param vibr_fit a vibr_fit object from varimp
#' @param Acol (integer) which column of predictors in call to varimp to diagnose (can only be continuous column of predictors in call to varimp)
#' @param delta (numeric, default=0.01) change in each column of predictors in call to varimp corresponding to stochastic intervention
#' @param ... not used
#'
#' @export
#' @import ggplot2
Expand Down Expand Up @@ -110,7 +112,7 @@ plotshift_dens <- function(vibr_fit, Acol=1, delta){
#' # or density estimation (not very many non-zero weights even with large
#' # value of delta, or really large weights)
#' }
plotshift_wt <- function(vibr_fit, Acol=1, delta=0.01){
plotshift_wt <- function(vibr_fit, Acol=1, delta=0.01, ...){
if(is.null(delta)) delta <- vibr_fit$delta
if(!is.null(vibr_fit$qfit)){
task <- vibr_fit$qfit$training_task
Expand All @@ -120,8 +122,8 @@ plotshift_wt <- function(vibr_fit, Acol=1, delta=0.01){
varnms <- c(task$nodes$outcome, task$nodes$covariates)
}
dat <- task$data
ft <- vibr_fit$gfits[[Acol[1]]]
X <- data.frame(dat)[,varnms,drop=FALSE]
ft <- vibr_fit$gfits[[Acol[1]]]
xnm = names(X)
Xc <- .shift(X,Acol,shift = -delta)
X$set="obs"
Expand All @@ -139,7 +141,7 @@ plotshift_wt <- function(vibr_fit, Acol=1, delta=0.01){
X$wt = X$densshift/X$dens
p1 <-
ggplot(data = X) + theme_classic() + scale_color_grey() +
geom_point(aes(x=dens, y=wt), pch=19, size=1, alpha=0.5)+
geom_point(aes_string(x="dens", y="wt"), pch=19, size=1, alpha=0.5)+
scale_x_continuous(name=paste0("density(observed ",xnm[Acol[1]],")"))+
scale_y_continuous(name="density(shifted)/density(observed)")
print(p1)
Expand All @@ -153,6 +155,7 @@ plotshift_wt <- function(vibr_fit, Acol=1, delta=0.01){
#' @param Acol (integer) which column of predictors in call to varimp to diagnose
#' @param Bcol (integer) second column of predictors in call to varimp to diagnose
#' @param delta (numeric, default=0.01) change in each column of predictors in call to varimp corresponding to stochastic intervention
#' @param ... not used
#'
#' @export
#' @import ggplot2
Expand All @@ -177,7 +180,7 @@ plotshift_wt <- function(vibr_fit, Acol=1, delta=0.01){
#' plotshift_scatter(vi_ipw, Acol=1, Bcol=2, delta=1)
#' plotshift_scatter(vi_ipw, Acol=1, Bcol=5, delta=1)
#' }
plotshift_scatter <- function(vibr_fit, Acol, Bcol, delta=NULL, joint=FALSE){
plotshift_scatter <- function(vibr_fit, Acol, Bcol, delta=NULL, ...){
if(is.null(delta)) delta <- vibr_fit$delta
if(!is.null(vibr_fit$qfit)){
task <- vibr_fit$qfit$training_task
Expand All @@ -192,7 +195,7 @@ plotshift_scatter <- function(vibr_fit, Acol, Bcol, delta=NULL, joint=FALSE){
requireNamespace("ggplot2")
Xint <- data.frame(
x1a = X[,Acol]+delta,
x2a = X[,Bcol]+ifelse(joint, delta, 0)
x2a = X[,Bcol]
)
X$col <- "Observed"
Xint$col <- "Shifted"
Expand All @@ -214,10 +217,9 @@ plotshift_scatter <- function(vibr_fit, Acol, Bcol, delta=NULL, joint=FALSE){
#'
#' Give a numerical (but cruder) version of the diagnostics in plotshift_dens, where one can track the change in estimated exposure mass/density following a stochastic intervention on exposure.
#' @param vibr_fit a fit from varimp
#' @param X predictors from a varimp fit
#' @param Acol (integer) which column of predictors in call to varimp to diagnose
#' @param delta (numeric, default=0.01) change in each column of predictors in call to varimp corresponding to stochastic intervention
#' @param quantiles (numeric vector, default=c(0, 0.1, 0.9, 1)) cutpoints in the closed interval [0,1] that correspond to quantiles of the estimated density of observed values of a predictor. The length of this vector determines the size of the table. Using values close to 0 or 1 allows one to track whether "intervened" predictors are pushed toward the extreme of the estimated predictor density, which could indicate lack of support for the scale of the implied intervention (e.g. delta is too big).
#' @param quantiles (numeric vector, default=c(0, 0.1, 0.9, 1)) cutpoints in the closed interval \[0,1\] that correspond to quantiles of the estimated density of observed values of a predictor. The length of this vector determines the size of the table. Using values close to 0 or 1 allows one to track whether "intervened" predictors are pushed toward the extreme of the estimated predictor density, which could indicate lack of support for the scale of the implied intervention (e.g. delta is too big).
#'
#' @export
#' @examples
Expand Down Expand Up @@ -245,6 +247,8 @@ dx_dens <- function(vibr_fit, Acol=1, delta=0.01, quantiles=c(0, 0.1, 0.9, 1)){
task <- vibr_fit$gfits[[1]]$training_task
varnms <- c(task$nodes$outcome, task$nodes$covariates)
}
dat <- task$data
X <- data.frame(dat)[,varnms,drop=FALSE]
ft <- vibr_fit$gfits[[Acol[1]]]
if(is.null(delta)) delta = vibr_fit$delta
xnm = names(X)
Expand Down Expand Up @@ -337,6 +341,8 @@ dx_dens <- function(vibr_fit, Acol=1, delta=0.01, quantiles=c(0, 0.1, 0.9, 1)){
#' @param Xdensity_learners list of sl3 learners used to estimate the density of continuous predictors, conditional on all other predictors in X
#' @param Xbinary_learners list of sl3 learners used to estimate the probability mass of continuous predictors, conditional on all other predictors in X
#' @param verbose (logical) print extra information
#' @param scale_continuous (logical) scale continuous variables in X to have standard deviation of 0.5
#' @param threshold (numeric, default=10) threshold for high weights
#' @param ... passed to sl3::make_sl3_Task (e.g. weights)
#'
#' @export
Expand Down
2 changes: 1 addition & 1 deletion R/base_ipw.R
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@
isbin=FALSE,
...){
obj = .prelims(X=X, Y=Y, V=V, whichcols=whichcols, delta=delta, Y_learners=NULL, Xbinary_learners, Xdensity_learners, verbose=verbose, isbin=isbin, ...)
res = .trained_ipw(obj=obj,X=X,Y=Y,delta=delta,qfun=qfun,gfun=gfun,estimand=estimand,bounded=bounded,updatetype=updatetype)
res = .trained_ipw(obj=obj,X=X,Y=Y,delta=delta,qfun=.qfunction,gfun=.gfunction,estimand=estimand,bounded=bounded,updatetype=NULL)
res
}

Expand Down
2 changes: 1 addition & 1 deletion R/base_tmle.R
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@
isbin=FALSE,
...){
obj = .prelims(X=X, Y=Y, V=V, whichcols=whichcols, delta=delta, Y_learners, Xbinary_learners, Xdensity_learners, verbose=verbose, isbin=isbin, ...)
res = .trained_tmle(obj,X,Y,delta,qfun,gfun,estimand,bounded,updatetype)
res = .trained_tmle(obj=obj,X=X, Y=Y,delta=delta,qfun=.qfunction,gfun=.gfunction,estimand=estimand,bounded=bounded,updatetype=updatetype)
res
}

Expand Down
4 changes: 1 addition & 3 deletions man/dx_dens.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/plotshift_dens.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/plotshift_scatter.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/plotshift_wt.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions man/precheck_identification.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 23 additions & 10 deletions man/varimp_refit.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5f2a221

Please sign in to comment.