data <- readRDS("~/Documents/EPID 799 R/2018/flu_data.rds")
library(tidyverse)
#Take a look at a subset of observations
data %>%
dplyr::select(age_group, sex_f, health_seek_claims, comorbid, flu_vax, flu_inf, died) %>%
tail(n=10)
## age_group sex_f health_seek_claims comorbid flu_vax flu_inf died
## 19991 85+ Female 0 1 0 0 0
## 19992 80-84 Male 1 1 1 0 0
## 19993 80-84 Male 1 0 0 0 0
## 19994 80-84 Male 0 1 0 0 0
## 19995 80-84 Female 0 1 1 0 0
## 19996 80-84 Female 1 1 1 0 0
## 19997 85+ Male 1 1 0 0 0
## 19998 75-79 Female 1 0 0 0 0
## 19999 80-84 Female 1 1 1 0 0
## 20000 65-69 Male 0 0 1 0 0
#what is the number and percent of participants with influenza vaccination?
data %>%
dplyr::group_by(flu_vax) %>%
dplyr::summarise(number=n(), percent=n()/nrow(data)*100)
## # A tibble: 2 x 3
## flu_vax number percent
## <int> <int> <dbl>
## 1 0 9567 47.8
## 2 1 10433 52.2
#what is the number and percent of participants with influenza infection?
data %>%
dplyr::group_by(flu_inf) %>%
dplyr::summarise(number=n(), percent=n()/nrow(data)*100)
## # A tibble: 2 x 3
## flu_inf number percent
## <int> <int> <dbl>
## 1 0 18885 94.4
## 2 1 1115 5.58
#Number and percent of study participants in each age group
observed <- data %>%
dplyr::group_by(age_group) %>%
dplyr::summarise(size_observed=n(), percent_observed=n()/nrow(data)*100)
#Study population
observed
## # A tibble: 5 x 3
## age_group size_observed percent_observed
## <fct> <int> <dbl>
## 1 65-69 4000 20.0
## 2 70-74 4018 20.1
## 3 75-79 4000 20.0
## 4 80-84 3912 19.6
## 5 85+ 4070 20.3
#US population size for each age group based on census data
age <- data.frame(age_group=c("65-69","70-74","75-79","80-84","85+"),
size_target=c(16820083,11810247,8367895,5865639,6380331))
#Calculating the percent of the older adult population in each age group
age$percent_target <- age$size_target/sum(age$size_target) *100
#Adding variables for age groups in our study population
age <- left_join(age, observed, by="age_group")
#All age data
age
## age_group size_target percent_target size_observed percent_observed
## 1 65-69 16820083 34.15648 4000 20.00
## 2 70-74 11810247 23.98302 4018 20.09
## 3 75-79 8367895 16.99265 4000 20.00
## 4 80-84 5865639 11.91133 3912 19.56
## 5 85+ 6380331 12.95651 4070 20.35
#Going from wide to long data for plotting
a_long <- gather(age, key = key, value = value, percent_target, percent_observed)
#Adding labels
a_long$key_f <- factor(a_long$key,
levels = c("percent_observed", "percent_target"),
labels = c("Observed", "Target"))
#Plot the percentages of the target population and the study population in each age group
ggplot(data=a_long, aes(x=age_group, y=value, fill=key_f))+
geom_bar(stat = "identity",position = "dodge",width = .3)+
theme_classic()+
xlab("Age group")+
ylab("Percent of population")+
theme(legend.title = element_blank(),legend.position = c(.9,.9))
We observed that the prevalence of flu vaccination in our study population was 52%, and the incidence of flu infection was 5.6%. In order to infer these estimates to the target population, do we need to account for the differing age distributions?
#Vaccination and infection by age group
data %>%
dplyr::group_by(age_group) %>%
dplyr::summarise(percent_vaccination=sum(flu_vax)/n(),
percent_infection=sum(flu_inf)/n())
## # A tibble: 5 x 3
## age_group percent_vaccination percent_infection
## <fct> <dbl> <dbl>
## 1 65-69 0.794 0.0272
## 2 70-74 0.735 0.0356
## 3 75-79 0.508 0.0525
## 4 80-84 0.359 0.0713
## 5 85+ 0.212 0.0919
To account for the different age distribution of our study population, we can calculate the sampling fraction for each participant based on their age group. The sampling weight for each participant is then calculated as 1/sampling fraction.
#In our dataset, adding variables for sizes of observed age group and target age group
age <- age %>% select(age_group, size_target, size_observed)
data <- left_join(data, age, by="age_group")
#Then calculate the sampling fraction
data$sample_fraction <- data$size_observed / data$size_target
#The sampling weight is 1/sampling fraction
data$sample_weight <- 1 / data$sample_fraction
#These are the 5 values for sampling fraction and weight in our data, based on the 5 age groups
data %>%
dplyr::group_by(age_group) %>%
dplyr::summarise(sample_fraction=unique(sample_fraction), sample_weight=unique(sample_weight))
## # A tibble: 5 x 3
## age_group sample_fraction sample_weight
## <fct> <dbl> <dbl>
## 1 65-69 0.000238 4205
## 2 70-74 0.000340 2939
## 3 75-79 0.000478 2092
## 4 80-84 0.000667 1499
## 5 85+ 0.000638 1568
The weighted prevalence of vaccination (for the target population) is 60% (compared to 52% in the study population).
#What is the weighted prevalence estimate?
data %>%
dplyr::group_by(flu_vax) %>%
dplyr::summarise(n=n(), n_weight=sum(sample_weight)) %>%
dplyr::mutate(prev_unweight=n/sum(n), prev_weight=n_weight/sum(n_weight))
## # A tibble: 2 x 5
## flu_vax n n_weight prev_unweight prev_weight
## <int> <int> <dbl> <dbl> <dbl>
## 1 0 9567 19486108 0.478 0.396
## 2 1 10433 29758087 0.522 0.604
The weighted incidence of infection (for the target population) is 4.7% (compared to 5.6% in the study population).
#What is the weighted incidence estimate?
data %>%
dplyr::group_by(flu_inf) %>%
dplyr::summarise(n=n(), n_weight=sum(sample_weight)) %>%
dplyr::mutate(prev_unweight=n/sum(n), prev_weight=n_weight/sum(n_weight))
## # A tibble: 2 x 5
## flu_inf n n_weight prev_unweight prev_weight
## <int> <int> <dbl> <dbl> <dbl>
## 1 0 18885 46921576 0.944 0.953
## 2 1 1115 2322619 0.0558 0.0472
births <- readRDS("~/Documents/EPID 799 R/2018/R for epi 2018 data pack/birth_cohort.rds")
library(broom)
library(tableone)
library(dagitty)
library(ggdag)
library(ROCR)
library(plotROC)
library(survey)
library(geepack)
#Subset to variables of interest for our model, exclude missing
df <- births %>%
dplyr::select(x, wksgest, preterm, preterm_f, pnc5, pnc5_f, mage, raceeth_f, smoker_f) %>%
dplyr::filter(!is.na(preterm_f) & !is.na(pnc5_f) & !is.na(mage) & !is.na(raceeth_f) & !is.na(smoker_f))
dag <- dagitty::downloadGraph("dagitty.net/mAqKBcR")
ggdag::ggdag_adjustment_set(dag) +
theme_dag_blank() +
geom_dag_text(color="black")
tableone::CreateTableOne(vars = c("mage","raceeth_f","smoker_f"),
strata = "pnc5_f", test = F,
data = df)
## Stratified by pnc5_f
## No Early PNC Early PNC
## n 5433 56064
## mage (mean (sd)) 26.10 (6.11) 27.65 (5.90)
## raceeth_f (%)
## AA 1834 (33.8) 12763 (22.8)
## AI or AN 86 ( 1.6) 782 ( 1.4)
## Other 1238 (22.8) 8619 (15.4)
## WH 175 ( 3.2) 1387 ( 2.5)
## WnH 2100 (38.7) 32513 (58.0)
## smoker_f = Smoker (%) 898 (16.5) 5646 (10.1)
#Probability of early prenatal care, as a function of maternal age, race/ethnicity, and smoking status
summary(ps_model <- glm(data = df, pnc5_f ~ poly(mage, 2) + raceeth_f + smoker_f, family=binomial("logit")))
##
## Call:
## glm(formula = pnc5_f ~ poly(mage, 2) + raceeth_f + smoker_f,
## family = binomial("logit"), data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5008 0.3064 0.3758 0.4796 0.9484
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.11585 0.02658 79.610 < 2e-16 ***
## poly(mage, 2)1 47.37029 3.57311 13.257 < 2e-16 ***
## poly(mage, 2)2 -27.67095 3.42022 -8.090 5.95e-16 ***
## raceeth_fAI or AN 0.43583 0.11767 3.704 0.000212 ***
## raceeth_fOther -0.16177 0.04023 -4.021 5.79e-05 ***
## raceeth_fWH 0.02710 0.08460 0.320 0.748720
## raceeth_fWnH 0.75215 0.03439 21.871 < 2e-16 ***
## smoker_fSmoker -0.71256 0.04094 -17.407 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 36738 on 61496 degrees of freedom
## Residual deviance: 35404 on 61489 degrees of freedom
## AIC: 35420
##
## Number of Fisher Scoring iterations: 5
#Use the model to predict the probability of early prenatal care (propensity score) for each observation
df$ps <- predict(ps_model, type = "response")
#Distribution of propensity score...
summary(df$ps[df$pnc5==1]) #...among mothers with observed early prenatal care
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6378 0.8901 0.9151 0.9136 0.9521 0.9562
summary(df$ps[df$pnc5==0]) #...among mothers with no observed early prenatal care
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6949 0.8656 0.8953 0.8912 0.9270 0.9562
#Plot propensity score, by observed prenatal care
ggplot(data=df,aes(x=ps, group=factor(pnc5_f), fill=factor(pnc5_f)))+
geom_histogram(aes(y=..density..),alpha = 0.75,binwidth=0.02,position = position_dodge(width=0.01))+
theme_classic()+
xlab("Predicted probability of early prenatal care")+
labs(fill = "Observed")
#Function to calculate the AUC and MSPE (measures of predictive ability)
pred.eval <- function(preds,outcome)
{
pred.sum=prediction(preds,outcome)
auc=as.numeric(performance(pred.sum,"auc")@y.values)
mspe=mean((preds-outcome)^2)
list(auc=auc,mspe=mspe)
}
pred.eval(preds = df$ps, outcome = df$pnc5)
## $auc
## [1] 0.6461399
##
## $mspe
## [1] 0.07879287
#Use propensity score to calculate unstabilized IPTW
df$iptw_u <- ifelse(df$pnc5==1, 1/df$ps, 1/(1-df$ps))
#Distribution of weights in the exposed
summary(df$iptw_u[df$pnc5==1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.046 1.050 1.093 1.097 1.124 1.568
#Distribution of weights in the unexposed
summary(df$iptw_u[df$pnc5==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 3.277 7.442 9.554 11.222 13.696 22.806
#Distribution of weights in the total population
summary(df$iptw_u)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.046 1.051 1.098 1.992 1.139 22.806
#Used marginal probability of exposure (early prenatal care) to stabilize weights
p_exposure <- sum(df$pnc5) / nrow(df)
df$iptw_s <- ifelse(df$pnc5==1, p_exposure/df$ps, (1-p_exposure)/(1-df$ps))
#Distribution of weights in the exposed
summary(df$iptw_s[df$pnc5==1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.9535 0.9575 0.9962 1.0001 1.0243 1.4294
#Distribution of weights in the unexposed
summary(df$iptw_s[df$pnc5==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2895 0.6575 0.8441 0.9914 1.2100 2.0148
#Distribution of weights in the total population
summary(df$iptw_s)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2895 0.9566 0.9851 0.9993 1.0257 2.0148
weighted_df <- svydesign(~0, weights = df$iptw_s, data=df)
svyCreateTableOne(vars = c("mage","raceeth_f","smoker_f"),
strata = "pnc5_f", test = F,
data = weighted_df)
## Stratified by pnc5_f
## No Early PNC Early PNC
## n 5386.47 56069.25
## mage (mean (sd)) 27.36 (5.96) 27.51 (5.93)
## raceeth_f (%)
## AA 1293.7 (24.0) 13309.1 (23.7)
## AI or AN 79.8 ( 1.5) 791.8 ( 1.4)
## Other 879.2 (16.3) 8988.9 (16.0)
## WH 138.6 ( 2.6) 1424.2 ( 2.5)
## WnH 2995.2 (55.6) 31555.2 (56.3)
## smoker_f = Smoker (%) 613.0 (11.4) 5975.3 (10.7)
ggplot(data=df, aes(x=ps, weight=iptw_s, group=factor(pnc5), fill=factor(pnc5)))+
geom_histogram(aes(y=..density..),alpha = 0.75,binwidth=0.02,position = position_dodge(width=0.01))+
theme_classic()+
xlab("Predicted probability of early prenatal care")+
labs(fill = "Observed")
broom::tidy(geeglm(data = df, preterm ~ pnc5, family=binomial("identity"), weight=iptw_s, id = x))
## term estimate std.error statistic p.value
## 1 (Intercept) 0.11939288 0.004730875 636.90384 0.000000e+00
## 2 pnc5 -0.02411346 0.004892532 24.29136 8.280979e-07
#Compare to adjusted model
broom::tidy(glm(data = df, preterm ~ pnc5 + poly(mage, 2) + raceeth_f + smoker_f, family=binomial("identity")))
## term estimate std.error statistic p.value
## 1 (Intercept) 0.153139641 0.005063951 30.2411367 6.823307e-201
## 2 pnc5 -0.022394044 0.004637421 -4.8289870 1.372294e-06
## 3 poly(mage, 2)1 0.468630828 0.309838748 1.5124991 1.304069e-01
## 4 poly(mage, 2)2 2.516000907 0.313959366 8.0137788 1.112367e-15
## 5 raceeth_fAI or AN -0.007913101 0.011861707 -0.6671131 5.046999e-01
## 6 raceeth_fOther -0.037644700 0.004133554 -9.1071033 8.461084e-20
## 7 raceeth_fWH -0.055691625 0.007342586 -7.5847430 3.331469e-14
## 8 raceeth_fWnH -0.055744896 0.003236356 -17.2245897 1.736549e-66
## 9 smoker_fSmoker 0.033199906 0.004262331 7.7891435 6.746493e-15
library(networkD3)
library(magrittr)
library(reshape2)
load("~/Documents/Dissertation/nodes.Rda")
load("~/Documents/Dissertation/links.Rda")
a <- readRDS("~/Documents/Dissertation/data_for_msm_long.rds")
#Time-varying depressive symptoms
networkD3::sankeyNetwork(Links = links, Nodes = nodes, Source = "source",
Target = "target", Value = "value", NodeID ="name",
fontSize = 16, fontFamily = "Arial", nodeWidth = 20,nodePadding = 5)
tableone::CreateTableOne(vars = c("knowStatus_art", "arm_f", "marital_f", "employ_f", "age_bl", "history_od", "drink_f", "anysex_nocondom_3m", "injectrisk_any"),
strata = "cesd23", test = F,
data = a[a$visitnum==1,])
## Stratified by cesd23
## 0 1
## n 253 201
## knowStatus_art (%)
## New HIV+ 194 (76.7) 141 (70.1)
## Know HIV+/No ART 27 (10.7) 41 (20.4)
## Know HIV+/On ART 32 (12.6) 19 ( 9.5)
## arm_f (%)
## Control 48 (19.0) 41 (20.4)
## Subdistrict 80 (31.6) 59 (29.4)
## Individual 54 (21.3) 41 (20.4)
## Both 71 (28.1) 60 (29.9)
## marital_f (%)
## Single 83 (32.8) 92 (45.8)
## Married/cohabitating 141 (55.7) 73 (36.3)
## Widow/Divorce/Sep 29 (11.5) 36 (17.9)
## employ_f (%)
## Work full-time 183 (72.3) 131 (65.2)
## Work part-time 42 (16.6) 40 (19.9)
## Unemployed/other 28 (11.1) 30 (14.9)
## age_bl (mean (sd)) 35.26 (6.20) 35.10 (6.40)
## history_od (mean (sd)) 0.14 (0.35) 0.24 (0.43)
## drink_f = Alcohol use (%) 199 (78.7) 108 (53.7)
## anysex_nocondom_3m (mean (sd)) 0.27 (0.44) 0.19 (0.40)
## injectrisk_any (mean (sd)) 0.66 (0.47) 0.81 (0.39)
Model specifications. Logistic model for the probability of depressive symptoms (D) at each visit, as a of time-fixed confounders at baseline (C) and time-varying depressive symptoms (D), risk behavior (Y), and CD4 count (Z) at the last visit.
Propensity Score Model | Outcome Model |
---|---|
D(t) ~ C + Z(t-1) + Y(t-1) + D(t-1) | Y(t+1) ~ D(t) |
#Note for the purposes of this example, I'm just doing a complete case analysis. However, there is non-negligible missing data, and if not properly addressed, could bias these estimates. Two approaches to deal with this (that I'm currently working on!) are to combine the IPTW with IPOW (missingness weights), or to use multiple imputation and perform the IPTW analysis on the imputed data.
complete <- a %>%
dplyr::filter(!is.na(last_cd4)
& !is.na(last_cesd23)
& !is.na(last_injectrisk)
& !is.na(cesd23)
& !is.na(next_injectrisk))
complete %>%
dplyr::group_by(cesd23) %>%
dplyr::summarise(n=n(), outcome=sum(next_injectrisk, na.rm=T)) %>%
dplyr::mutate(risk=outcome/n)
## # A tibble: 2 x 4
## cesd23 n outcome risk
## <dbl> <int> <dbl> <dbl>
## 1 0 523 29.0 0.0554
## 2 1.00 293 34.0 0.116
broom::tidy(glm(next_injectrisk ~ cesd23, family = binomial(link = "identity"), data = complete))
## term estimate std.error statistic p.value
## 1 (Intercept) 0.05544933 0.01000714 5.540977 3.007883e-08
## 2 cesd23 0.06059162 0.02121862 2.855588 4.295724e-03
broom::tidy(glm(next_injectrisk ~ cesd23, family = binomial(link = "log"), data = complete), exponentiate = T)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.05544933 0.1804678 -16.026598 8.332129e-58
## 2 cesd23 2.09273861 0.2420068 3.051457 2.277334e-03
#Probability of depression as a function of last values of depression, CD4 count, and risk behaviors (time-varying), and baseline values of time-fixed confounders
summary(depression_model <- glm(cesd23 ~ last_cesd23 + last_cd4 + last_injectrisk + arm_f + marital_f + employ_f + knowStatus_art + age_bl + history_od + drink_f, family=binomial("logit"), data = complete))
##
## Call:
## glm(formula = cesd23 ~ last_cesd23 + last_cd4 + last_injectrisk +
## arm_f + marital_f + employ_f + knowStatus_art + age_bl +
## history_od + drink_f, family = binomial("logit"), data = complete)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8771 -0.8074 -0.5637 0.9524 2.2922
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.42422 0.54424 -2.617 0.00887 **
## last_cesd23 1.51168 0.16636 9.087 < 2e-16 ***
## last_cd4200<=CD4<500 -0.07543 0.17907 -0.421 0.67358
## last_cd4CD4>=500 -0.14918 0.27421 -0.544 0.58641
## last_injectrisk 0.74803 0.16746 4.467 7.93e-06 ***
## arm_fSubdistrict -0.34704 0.23787 -1.459 0.14458
## arm_fIndividual -0.67149 0.27758 -2.419 0.01556 *
## arm_fBoth -0.23792 0.22981 -1.035 0.30053
## marital_fMarried/cohabitating -0.34408 0.19604 -1.755 0.07923 .
## marital_fWidow/Divorce/Sep -0.46262 0.29711 -1.557 0.11945
## employ_fWork part-time 0.01070 0.23151 0.046 0.96312
## employ_fUnemployed/other 0.41752 0.26149 1.597 0.11033
## knowStatus_artKnow HIV+/No ART -0.21729 0.25537 -0.851 0.39484
## knowStatus_artKnow HIV+/On ART -0.51781 0.24470 -2.116 0.03434 *
## age_bl 0.01949 0.01419 1.374 0.16952
## history_od 0.09042 0.21663 0.417 0.67638
## drink_fAlcohol use -0.25299 0.19280 -1.312 0.18945
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1065.50 on 815 degrees of freedom
## Residual deviance: 900.98 on 799 degrees of freedom
## AIC: 934.98
##
## Number of Fisher Scoring iterations: 4
#Use the model to predict the probability of depression (propensity score) for each observation
complete$predict <- predict(depression_model, type = "response")
#Plot propensity score, by observed depression
ggplot(data=complete,aes(x=predict, group=factor(cesd23), fill=factor(cesd23)))+
geom_histogram(aes(y=..density..),alpha = 0.75,binwidth=0.02,position = position_dodge(width=0.01))+
theme_classic()+
xlab("Predicted probability of CES-D≥23")+
labs(fill = "CES-D≥23")
pred.eval(complete$predict, complete$cesd23)
## $auc
## [1] 0.7595097
##
## $mspe
## [1] 0.1851221
#Used propensity score to calculate unstabilized inverse probability of depression weights
complete$iptw_u <- ifelse(complete$cesd23==1, 1/complete$predict, 1/(1-complete$predict))
#Distribution of unstabilized weights in the exposed
summary(complete$iptw_u[complete$cesd23==1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.204 1.516 1.885 2.814 3.249 13.833
#Distribution of unstabilized weights in the unexposed
summary(complete$iptw_u[complete$cesd23==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.066 1.187 1.299 1.555 1.649 5.822
#Distribution of unstabilized weights in the total population
summary(complete$iptw_u)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.066 1.229 1.455 2.007 2.103 13.833
#Used marginal probability of exposure (for complete cases) to stabilize weights
p_exposure <- sum(complete$cesd23) / nrow(complete)
complete$iptw_s <- ifelse(complete$cesd23==1, p_exposure/complete$predict, (1-p_exposure)/(1-complete$predict))
#Distribution of stabilized weights in the exposed
summary(complete$iptw_s[complete$cesd23==1])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4323 0.5442 0.6770 1.0104 1.1667 4.9668
#Distribution of unstabilized weights in the unexposed
summary(complete$iptw_s[complete$cesd23==0])
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6834 0.7605 0.8328 0.9965 1.0570 3.7318
#Distribution of weights in the total population
summary(complete$iptw_s)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4323 0.7189 0.8048 1.0015 1.0936 4.9668
#This is the reweighted population
ggplot(data=complete, aes(x=predict, weight=iptw_s, group=factor(cesd23), fill=factor(cesd23)))+
geom_histogram(aes(y=..density..),alpha = 0.75,binwidth=0.02,position = position_dodge(width=0.01))+
theme_classic()+
xlab("Predicted probability of CES-D≥23")+
labs(fill = "CES-D≥23")
#Estimate the weighted measures of effect
broom::tidy(geeglm(next_injectrisk ~ cesd23, family=binomial(link = "identity"), weight=iptw_s, id = pidnum, data = complete))
## term estimate std.error statistic p.value
## 1 (Intercept) 0.06030404 0.01243601 23.514223 1.239938e-06
## 2 cesd23 0.03244441 0.02163592 2.248688 1.337278e-01
broom::tidy(geeglm(next_injectrisk ~ cesd23, family=binomial(link = "log"), weight=iptw_s, id = pidnum, data = complete), exponentiate = T)
## term estimate std.error statistic p.value
## 1 (Intercept) 0.06030404 0.2062219 185.453485 0.0000000
## 2 cesd23 1.53801383 0.2693021 2.555349 0.1099215