Skip to content

Releases: robertthiesmeier/unmeasured_confounding

Code for the simulation of low driving mileage bias

21 Jun 10:37
29ecc9b
Compare
Choose a tag to compare

Code for the simulation of low driving mileage bias

Summary: Probabilistic sensitivity analysis using Monte Carlo simulations (MCS) has become an increasingly valuable tool to assess unmeasured confounding. This study applies MCSs to quantify suspected unmeasured confounding, i.e. low driving mileage, on the effect of polypharmacy and road traffic crashes (RTCs) in older adults.

Aim: To determine the impact of low driving mileage bias on the effect of polypharmacy on RTCs in older adults.

This study applies an MCS approach to the case of unmeasured confounding, namely low annual driving mileage, on the effect of polypharmacy on road traffic crashes (RTCs) in older adults. The term unmeasured confounding is used to refer to confounding due to a lack of data for adjustment and consequent omission from the statistical model. In addition, we define concurrent consumption of five or more prescribed medications in 90 days as polypharmacy. The code for the simulation can be used and edited based on different assumptions and parameters.

Stata Code

Loop for simulation

capture program drop sim_logit_driving
program define sim_logit_driving, rclass
version 16.1
syntax [, obs(integer 100000) ]
drop _all
set obs `obs'

*Parameters for K
gen age = rbinomial(1, .3) // >65yo
gen sex = rbinomial(1, .52) // female
gen area = rbinomial(1, .6) // urban
*Baseline b0 characteristics
scalar a0 = logit(.3) // low driving milage: 30%
scalar c0 = logit(0.17) // Polypharmacy:17%
scalar b0 = logit(.002) // RTC 0.2%

*Parameters for the ZK relationship
scalar a1 = log(1.2) // Age
scalar a2 = log(1.8) // Sex
scalar a3 = log(1.2) // Area

*Parameters for the XKZ relationship
scalar c1 = log(2) // Age
scalar c2 = log(0.9) // Sex
scalar c3 = log(1.2) // Area
scalar c4 = log(1.5) // XZ

*Parameters for the YXZK relationship
scalar b1 = rnormal(ln(1.35), (ln(1.56)-ln(1.17))/(2*1.96)) // X
scalar b2 = log(3) // Z
scalar b3 = log(0.6) // Age
scalar b4 = log(0.3) // Sex
scalar b5 = log(0.9) // Area

gen z = rbinomial(1, invlogit(a0 + a1*age + a2*sex + a3*area))
gen x = rbinomial(1, invlogit(c0 + c1*age + c2*sex + c3*area + c4*z))
gen y = rbinomial(1, invlogit(b0 + b1*x + b2*z + b3*age + b4*sex +
b5*area))

* DGM under an effect of X not adjusted for Z
di _n "Ignoring confounding by Z"
logit y x age sex area, or
return scalar b1_1_x = _b[x]
return scalar se_b1_1_x = _se[x]

* DGM under an effect of X adjusted for Z
di _n "Adjusting for confounding by Z"
logit y x z age sex area, or
test _b[x] = 0
return scalar p = r(p)
return scalar b1_1_z = _b[x]
return scalar se_b1_1_z = _se[x] 
end

Simulate one large prospective study of size n = 100000

sim_logit_driving

Simulate a 1000 studies, each n=100000

simulate p = r(p) b1_1_x = r(b1_1_x) se_b1_1_x = r(se_b1_1_x) ///
b1_1_z = r(b1_1_z) se_b1_1_z = r(se_b1_1_z), reps(1000):
sim_logit_driving
count if p < 0.05
display "Simulated Power = " r(N)/c(N)*100

OR1 (effect of X not adjusting for Z) (95%CI)

gen orx = exp(b1_1_x)
su b1_1_x
di exp(r(mean)-1.96*r(sd))
di exp(r(mean)+1.96*r(sd))

OR0 (effect of X adjusting for Z) (95%CI)

gen orz = exp(b1_1_z)
su b1_1_z
di exp(r(mean)-1.96*r(sd))
di exp(r(mean)+1.96*r(sd))

Graphical presentation

twoway ///
    (histogram orx, bin(60) fc(black%10) lc(black%30)) ///
    (histogram orz, bin(60) fc(black%25) lc(black%25)) ///
    , xlabel(#10) ylabel(#10, angle(horiz) format(%3.2f)) ///
    xline(1.37 1.53, lc(black%80) lp(-)) ///
    xtitle("Odds Ratio (ln)") xscale(log) ytitle("Simulated
    Distribution") ///
    scheme(s1mono) plotregion(style(none)) ///
    ylabel(#10) plotregion(style(none)) ///
    legend(order(1 2) label(1 "Crude OR{sub:{it:YX}}=1.53") label(2
    "Adjusted OR{sub:{it:YX|Z}}=1.37") ring(0) col(1) pos(1)
    size(small) region(style(none))) name(fig1, replace)

* Kernel Density: Comparison Effect of X vs no Effect of X after Adjustment of Z
twoway /// 
    (kdensity orx, color(black%10) cmissing(y) recast(area)) ///
    (kdensity orz, color(black%25) cmissing(y) recast(area)), ///
    xline(1.37 1.53, lc(black%50) lp(-)) ///
    ytitle("Simulated Distribution") ///
    xtitle("Odds Ratio (ln)") xscale(log) ///
    ylabel(#10, angle(horiz) format(%3.2f)) ///
    xlabel(#10) scheme(s1mono) plotregion(style(none)) ///
    legend(order(1 2) label(1 "Crude OR{sub:{it:YX}}=1.53") ///
    label(2 "Adjusted OR{sub:{it:YX|Z}}=1.37") ring(0) col(1)
    pos(1) size(small) region(style(none))) name(fig2, replace)
* Combined Graph
graph combine fig1 fig2 ///
    , ycommon cols(1) scheme(s1mono) name(fig_combine, replace)

Python Code

Packages

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import seaborn as sns
from scipy import stats
import statsmodels.api as sm
import statsmodels.formula.api as smf

Define the mathematical function invlogit

def invlogit(x):
return np.exp(x)/(1+np.exp(x))

Define the mathematical function logit

def logit(x):
return np.log(x/(1-x))
def sim_logit_driving():
n = 100000

Parameters

# Parameters for K
age = np.random.binomial(1, .3, n) # >65yo
sex = np.random.binomial(1, .52, n) # female
area = np.random.binomial(1, .6, n) # urban
# Baseline b0 characteristics
a0 = logit(.3) # low driving mileage
c0 = logit(0.17) # Polypharmacy
b0 = logit(.002) # RTC
# Parameters for the ZK relationship
a1 = np.log(1.2)
a2 = np.log(1.8)
a3 = np.log(1.2)
# Parameters for the XKZ relationship
c1 = np.log(2)
c2 = np.log(0.9)
c3 = np.log(1.2)
c4 = np.log(1.5)
# Parameters for the YXZK relationship
b1 = np.random.normal(np.log(1.35), (np.log(1.56)-
np.log(1.17))/(2*1.96), 1)
b2 = np.log(3)
b3 = np.log(0.6)
b4 = np.log(0.3)
b5 = np.log(0.9)
z = np.random.binomial(1, invlogit(a0 + a1*age + a2*sex +
a3*area), n)
x = np.random.binomial(1, invlogit(c0 + c1*age + c2*sex +
c3*area + c4*z), n)
y = np.random.binomial(1, invlogit(b0 + b1*x + b2*z + b3*age +
b4*sex + b5*area), n)
df = pd.DataFrame(data = np.column_stack((y, x, z,age, sex,
area) ))

DGM under an effect of X

model1 = smf.logit('y~x+z+age+sex+area', data=df).fit()
se = np.sqrt(np.diag(model1.cov_params()))
b1_1 = model1.params[1]
se_b1_1 = se[1]
# print(model1.summary())

DGM under no effect of X

b1 = 0
y = np.random.binomial(1, invlogit(b0 + b1*x + b2*z + b3*age +
b4*sex + b5*area), n)
model0 = smf.logit('y~x+z+age+sex+area', data=df).fit()
b1_0 = model0.params[1]
se = np.sqrt(np.diag(model0.cov_params()))
b1_0 = model0.params[1]
se_b1_0 = se[1]
# print(model0.summary())
b = (b1_1, se_b1_1, b1_0, se_b1_0)
return(b)
nsim = 1000
b = []
for i in range(nsim):
b = np.append(b, np.array(sim_logit_driving()), axis=0)
sim_results = np.reshape(b, (nsim, 4))

Extract point estimates and std errors

b1 = sim_results[:,0]
se_b1 = sim_results[:,1]
or1 = np.exp(sim_results[:,0])
or0 = np.exp(sim_results[:,2])
fig, ax = plt.subplots(figsize=(5, 3))
ax.set(xscale="log")
ax.hist(or0, color='red', alpha=.2, density=True, bins=30, rwidth=.9)
ax.set_xticks((0.6, 0.8, 1.0, 1.3, 1.6, 2))
ax.xaxis.set_major_formatter(FormatStrFormatter('%3.2f'))
plt.axvline(np.exp(np.mean(np.log(or0))), color='green', alpha=.5,
linestyle='dashed', linewidth=3)
plt.hist(or1, color='blue', alpha=.2, density=True, bins=30, rwidth=.9)
plt.axvline(np.exp(np.mean(np.log(or1))), color='green', alpha=.5,
linestyle='dashed', linewidth=3)
plt.rcParams['axes.spines.right'] = False
plt.rcParams['axes.spines.top'] = False
plt.ylabel("Sampling distribution")
plt.xlabel(r"$\hat \beta_1$")
plt.show()

average adjusted odds ratio under OR_XY|Z=1.35

np.exp(np.mean(np.log(or1)))

average adjusted odds ratio under OR_XY|Z=0

np.exp(np.mean(np.log(or0)))

Simulated statistical power

z = (b1-0)/se_b1
p = stats.norm.cdf(-abs(z))*2
power = np.sum(p<.05)/nsim
print("Simulated power (%) = " , power*100)

R code

Define the mathematical function invlogit

 inv_logit <- function(x){1 / (1 + exp(-x))}

Loop for simulation

 sim_logit_driving <- function() {
 n = 100000

 # Parameters for K
 age <-  rbinom(n, 1, .3)  # >65yo
 sex <-  rbinom(n, 1, .52) # female
 area <-  rbinom(n, 1, .6) # urban

 # Baseline b0 characteristics
 a0 <-  logit(.3)   # low driving mileage
 c0 <-  logit(0.17) # Polypharmacy
 b0 <-  logit(.002) # RTC

 # Parameters for the ZK relationship
 a1 <-  log(1.2)
 a2 <-  log(1.8)
 a3 <-  log(1.2)

 # Parameters for the XKZ relationship
 c1 <-  log(2)
 c2 <-  log(0.9)
 c3 <-  log(1.2)
 c4 <-  log(1.5)

 # Parameters for the YXZK relationship
 b1 <-  rnorm(1, log(1.35), (log(1.56)-log(1.17))/(2*1.96))
 b2 <-  log(3)
 b3 <-  log(0.6)
 b4 <-  log(0.3)
 b5 <-  log(0.9)

 z <- rbinom(n, 1, inv_logit(a0 + a1*age + a2*sex + a3*area))
 x <- rbinom(n, 1, inv_logit(c0 + c1*age + c2*sex + c3*area + c4*z))
 y <- rbinom(n, 1, inv_logit(b0 + b1*x + b2*z + b3*age + b4*sex + b5*area))

# df = data.frame(data = cbind(y, x, z,age, sex, area) )
# DGM under an effect of X

...

Read more