##### simulation.R ##### # Example code for simulations: boostrap, power, sensitivity. # ##### Simulations: How ##### # 1. create (or find) a distribution to sample from d1 = rnorm(n=1000,sd=2,mean=10) d2 = c(rep(1,100),rep(2,200),rep(3,300),rep(4,400)) d3 = rbinom(n=1000,p=0.2,size=1) d4 = rpois(n=1000,lambda=1+d3) hist(d1) hist(d2) hist(d3) hist(d4) # 2. Sample from the distribution using a function s1 = sample(d1, 1000, replace=TRUE) s2 = sample(d2, 1000, replace=TRUE) s3 = sample(d3, 1000, replace=TRUE) s4 = sample(d4, 1000, replace=TRUE) # 3. Do something with the samples mean(s1) sd(s1) glm(s4 ~ s3) ##### Example: Bootstrap ##### # Bootstrap a confidence interval for the sample mean (n=100) # Step 1. write a function to take a sample my.sample = function(x) { sample(x,100) } # Step 2. write a function to calculate a statistic form a sample my.stat = function(x) { mean(x) } # Step 3. combine 1 and 2, and repeat many times sims = replicate(1000,my.sample(d1)) results = apply(sims,2,my.stat) # Step 4. Summarize the distribution of results mean(results) sd(results) 2/(100^0.5) # exact calculation quantile(results,probs = c(0.025,0.975)) # Boostrap a cauchy distribution for a ratio confidence interval # (Cauchy: can be expressed as a ratio of two normal variables) # Application: calculating a risk ratio form two modeled risks # Risk A: 2.0 (95% CI 1.8 to 2.2) # Risk B: 1.2 (95% CI 1.1 to 1.3) # What is the CI for RR A/B? sd.a = (2.2-1.8)/(2*qnorm(0.975)) sd.b = (1.3-1.1)/(2*qnorm(0.975)) # simulate distribution of risks (obtained from model) risk.a = rnorm(mean=2, sd=sd.a, n=100000) risk.b = rnorm(mean=1.2, sd=sd.b, n=10000) # inspect the modeled risk distribution hist(risk.a,xlim=c(0,4),ylim=c(0,10),freq=FALSE,col="red") hist(risk.b,add=TRUE,freq=FALSE,col="blue") # Step 1&2. write a function to sample and calculate statistic my.sample = function(a,b) { sample(a,1) / sample(b,1) } # Step 3. combine 1 and 2, and repeat many times results = replicate(1000,my.sample(risk.a,risk.b)) # Step 4. Summarize the distribution of results hist(results) mean(results) quantile(results,probs = c(0.025,0.975)) ##### Example: Sensitivity ##### # How sensitive are my results to measurement error? # Simulate an example dataset (you wouldn't do this part) exposure = rbinom(p=0.4,n=1000,size=1) # 40% exposure prevalence error = rnorm(n=1000) covariate = rbinom(p=0.2,n=1000,size=1) outcome = rbinom(p=0.2*exposure+0.1*covariate+0.1*error>0, size=1, n=1000) # you would just use your data instead # crude model m0 = glm(outcome ~ exposure + covariate) summary(m0) # measurement error model (20% Misclassification at random) exp.wrong = exposure # make a copy to mess up wrongs = sample(1:1000,size=200) # sample indices to flip exp.wrong[wrongs] = 1-exp.wrong[wrongs] # flip errors m1 = glm(outcome ~ exp.wrong + covariate) summary(m1) # Bias towards the null! But not enough information... # Characterize based on a simulation: what is the distribution under MAR? my.sample = function() { ex = exposure # make a copy to mess up flip = sample(1:1000,size=200) # sample indices to flip ex[flip] = 1-ex[flip] # flip errors me = glm(outcome ~ ex + covariate) coef(me)[2] - coef(m0)[2] } # run the simulation 1000 times (takes a while) results = replicate(1000,my.sample()) mean(results) # mean for ME quantile(results,probs = c(0.025,0.975)) # confidence bounds on ME ##### Example: Power ##### # How to calculate power for a study with confounding? # Select power assumptions (add your own): exposure.prevalence = 0.5 baseout.prevalence=0.1 outcome.prevalence = 0.2 prior.RD = 0.1 confounding = 0.1 confounder.prevalence = 0.3 # build model for data generation process (simulate this part) lower.bound = function() { confounder = rbinom(p=confounder.prevalence,n=1000,size=1) exposure = rbinom(p=exposure.prevalence+confounding*confounder,n=1000,size=1) outcome = rbinom(p=baseout.prevalence+prior.RD*exposure+confounding*confounder,n=1000,size=1) confint.default(glm(outcome~exposure+confounder,family=binomial("logit")))[2,1] } results = replicate(3000,lower.bound()) mean(results>0) # about 98% power to detect ##### Challenge: Estimate 95% CI for LB and UB (instead of power)