4  加权

https://ehsanx.github.io/psm/cat.html

\[ \begin{equation}\begin{aligned} Treatment\ Effect = \frac{\sum Y_{i}A_{i}w_{i}}{\sum A_{i} w_{i}} - \frac{\sum Y_{i}(1 - A_{i}) w_{i}}{\sum (1 - A_{i}) w_{i} } \end{aligned}\end{equation} \]

4.1 案例1

Show the code
data(lalonde, package='Matching')
ps_formula <- treat ~ age + I(age^2) + educ + I(educ^2) + black +
    hisp + married + nodegr + re74  + I(re74^2) + re75 + I(re75^2) + u74 + u75

ps_fit <- glm(formula = ps_formula,
              data = lalonde,
              family = binomial(link = 'logit'))

lalonde$ps <- fitted(ps_fit)

PSAgraphics::cv.bal.psa(covariates = lalonde[,all.vars(ps_formula)[-1]],
                        treatment = lalonde$treat,
                        propensity = lalonde$ps,
                        strata = 5)

4.1.1 平均治疗效应ATE公式

\[ \begin{equation}\begin{aligned} w_{ATE} = \frac{A_i}{PS_i} + \frac{1 - A_i}{1 - PS_i} \end{aligned}\end{equation} \]

Show the code

dat <- lalonde |> mutate(
    ate_weight = psa::calculate_ps_weights(treat, ps, estimand = 'ATE'),
    att_weight = psa::calculate_ps_weights(treat, ps, estimand = 'ATT'),
    atc_weight = psa::calculate_ps_weights(treat, ps, estimand = 'ATC'),
    atm_weight = psa::calculate_ps_weights(treat, ps, estimand = 'ATM')
)
psa::weighting_plot(ps = dat$ps,
                    treatment = dat$treat,
                    outcome = dat$re78, 
                    estimand = 'ATE')

Show the code
# Check Balance with ATE Weights
glm(formula = ps_formula,
    data = dat,
    family = quasibinomial(link = 'logit'),
    weights = ate_weight
) |> summary()
#> 
#> Call:
#> glm(formula = ps_formula, family = quasibinomial(link = "logit"), 
#>     data = dat, weights = ate_weight)
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -2.000e-01  1.977e+00  -0.101    0.919
#> age          2.686e-02  8.513e-02   0.316    0.753
#> I(age^2)    -4.695e-04  1.397e-03  -0.336    0.737
#> educ        -6.326e-02  4.024e-01  -0.157    0.875
#> I(educ^2)    3.510e-03  2.259e-02   0.155    0.877
#> black       -3.695e-03  3.714e-01  -0.010    0.992
#> hisp         2.232e-02  4.904e-01   0.046    0.964
#> married     -8.664e-03  2.784e-01  -0.031    0.975
#> nodegr       4.154e-02  3.889e-01   0.107    0.915
#> re74         2.291e-05  7.493e-05   0.306    0.760
#> I(re74^2)   -9.734e-10  2.337e-09  -0.416    0.677
#> re75         5.001e-06  1.015e-04   0.049    0.961
#> I(re75^2)   -3.543e-10  5.032e-09  -0.070    0.944
#> u74          8.163e-02  4.449e-01   0.183    0.854
#> u75         -4.153e-04  3.566e-01  -0.001    0.999
#> 
#> (Dispersion parameter for quasibinomial family taken to be 2.067727)
#> 
#>     Null deviance: 1232.6  on 444  degrees of freedom
#> Residual deviance: 1231.9  on 430  degrees of freedom
#> AIC: NA
#> 
#> Number of Fisher Scoring iterations: 4

# 估计 ATE
lm(formula = re78 ~ treat, 
   data = dat,
   weights = ate_weight) |> summary()
#> 
#> Call:
#> lm(formula = re78 ~ treat, data = dat, weights = ate_weight)
#> 
#> Weighted Residuals:
#>    Min     1Q Median     3Q    Max 
#> -10856  -5892  -2478   4075  81529 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)     4556        450  10.125   <2e-16 ***
#> treat           1558        637   2.446   0.0148 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 9497 on 443 degrees of freedom
#> Multiple R-squared:  0.01333,    Adjusted R-squared:  0.0111 
#> F-statistic: 5.983 on 1 and 443 DF,  p-value: 0.01483

psa::treatment_effect(treatment = dat$treat,
                      outcome = dat$re78,
                      weights = dat$ate_weight)
#> [1] 1558.088

4.1.2 ATT

\[ \begin{equation}\begin{aligned} w_{ATT} = \frac{PS_i A_i}{PS_i} + \frac{PS_i (1 - A_i)}{1 - PS_i} \end{aligned}\end{equation} \]

Show the code
# Check Balance with ATT Weights
glm(formula = ps_formula,
    data = dat,
    family = quasibinomial(link = 'logit'),
    weights = att_weight
) |> summary()
#> 
#> Call:
#> glm(formula = ps_formula, family = quasibinomial(link = "logit"), 
#>     data = dat, weights = att_weight)
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)
#> (Intercept)  1.122e-01  1.754e+00   0.064    0.949
#> age          2.350e-02  8.362e-02   0.281    0.779
#> I(age^2)    -4.382e-04  1.352e-03  -0.324    0.746
#> educ        -1.279e-01  3.424e-01  -0.374    0.709
#> I(educ^2)    7.725e-03  1.931e-02   0.400    0.689
#> black       -5.090e-02  3.388e-01  -0.150    0.881
#> hisp        -7.925e-02  5.202e-01  -0.152    0.879
#> married     -2.667e-02  2.691e-01  -0.099    0.921
#> nodegr       1.449e-01  3.623e-01   0.400    0.689
#> re74         9.327e-06  7.444e-05   0.125    0.900
#> I(re74^2)   -9.597e-11  2.521e-09  -0.038    0.970
#> re75        -1.340e-05  9.575e-05  -0.140    0.889
#> I(re75^2)    7.444e-10  4.817e-09   0.155    0.877
#> u74         -6.362e-02  4.320e-01  -0.147    0.883
#> u75          8.469e-02  3.390e-01   0.250    0.803
#> 
#> (Dispersion parameter for quasibinomial family taken to be 0.8614842)
#> 
#>     Null deviance: 513.54  on 444  degrees of freedom
#> Residual deviance: 513.06  on 430  degrees of freedom
#> AIC: NA
#> 
#> Number of Fisher Scoring iterations: 4

# Estimate ATT
lm(formula = re78 ~ treat, 
   data = dat,
   weights = att_weight) |> summary()
#> 
#> Call:
#> lm(formula = re78 ~ treat, data = dat, weights = att_weight)
#> 
#> Weighted Residuals:
#>    Min     1Q Median     3Q    Max 
#>  -6349  -3869  -1506   2400  53959 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   4557.4      454.2  10.033  < 2e-16 ***
#> treat         1791.7      642.8   2.787  0.00554 ** 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 6186 on 443 degrees of freedom
#> Multiple R-squared:  0.01724,    Adjusted R-squared:  0.01502 
#> F-statistic:  7.77 on 1 and 443 DF,  p-value: 0.00554

psa::treatment_effect(treatment = dat$treat,
                      outcome = dat$re78,
                      weights = dat$att_weight)
#> [1] 1791.724

4.1.3 ATC

\[ \begin{equation}\begin{aligned} w_{ATC} = \frac{(1 - PS_i) A_i}{PS_i} + \frac{(1 - e_i)(1 - A_i)}{1 - PS_i} \end{aligned}\end{equation} \]

Show the code
# Check Balance with ATC Weights
glm(formula = ps_formula,
    data = dat,
    family = quasibinomial(link = 'logit'),
    weights = atc_weight
) |> summary()
#> 
#> Call:
#> glm(formula = ps_formula, family = quasibinomial(link = "logit"), 
#>     data = dat, weights = atc_weight)
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)
#> (Intercept) -6.598e-01  2.390e+00  -0.276    0.783
#> age          3.097e-02  8.728e-02   0.355    0.723
#> I(age^2)    -5.201e-04  1.449e-03  -0.359    0.720
#> educ         4.722e-02  4.975e-01   0.095    0.924
#> I(educ^2)   -3.225e-03  2.766e-02  -0.117    0.907
#> black        3.598e-02  4.033e-01   0.089    0.929
#> hisp         7.912e-02  4.941e-01   0.160    0.873
#> married      7.290e-03  2.868e-01   0.025    0.980
#> nodegr      -7.488e-02  4.205e-01  -0.178    0.859
#> re74         2.763e-05  7.658e-05   0.361    0.718
#> I(re74^2)   -1.296e-09  2.319e-09  -0.559    0.577
#> re75         2.037e-05  1.073e-04   0.190    0.849
#> I(re75^2)   -1.341e-09  5.282e-09  -0.254    0.800
#> u74          1.831e-01  4.577e-01   0.400    0.689
#> u75         -7.234e-02  3.730e-01  -0.194    0.846
#> 
#> (Dispersion parameter for quasibinomial family taken to be 1.206136)
#> 
#>     Null deviance: 719.09  on 444  degrees of freedom
#> Residual deviance: 717.84  on 430  degrees of freedom
#> AIC: NA
#> 
#> Number of Fisher Scoring iterations: 4

# Estimate ATC
lm(formula = re78 ~ treat, 
   data = dat,
   weights = atc_weight) |> summary()
#> 
#> Call:
#> lm(formula = re78 ~ treat, data = dat, weights = atc_weight)
#> 
#> Weighted Residuals:
#>    Min     1Q Median     3Q    Max 
#>  -9104  -4555  -1672   3055  61098 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   4554.8      446.8  10.195   <2e-16 ***
#> treat         1391.0      632.6   2.199   0.0284 *  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 7204 on 443 degrees of freedom
#> Multiple R-squared:  0.0108, Adjusted R-squared:  0.008564 
#> F-statistic: 4.835 on 1 and 443 DF,  p-value: 0.0284

psa::treatment_effect(treatment = dat$treat,
                      outcome = dat$re78,
                      weights = dat$atc_weight)
#> [1] 1391.019

4.1.4 Average Treatment Effect Among the Evenly Matched (ATM)

\[ \begin{equation}\begin{aligned} w_{ATM} = \frac{min\{PS_i, 1 - PS_i\}}{A_i PS_i (1 - A_i)(1 - PS_i)} \end{aligned}\end{equation} \]

Show the code
# Check Balance with ATC Weights
glm(formula = ps_formula,
    data = dat,
    family = quasibinomial(link = 'logit'),
    weights = atm_weight
) |> summary()
#> 
#> Call:
#> glm(formula = ps_formula, family = quasibinomial(link = "logit"), 
#>     data = dat, weights = atm_weight)
#> 
#> Coefficients:
#>               Estimate Std. Error t value Pr(>|t|)
#> (Intercept)  1.657e-01  2.129e+00   0.078    0.938
#> age         -2.418e-02  8.893e-02  -0.272    0.786
#> I(age^2)     4.192e-04  1.465e-03   0.286    0.775
#> educ         6.308e-02  4.304e-01   0.147    0.884
#> I(educ^2)   -3.347e-03  2.420e-02  -0.138    0.890
#> black        1.480e-02  3.577e-01   0.041    0.967
#> hisp        -3.495e-02  5.203e-01  -0.067    0.946
#> married     -3.486e-03  2.736e-01  -0.013    0.990
#> nodegr      -4.659e-02  3.825e-01  -0.122    0.903
#> re74        -2.333e-05  7.526e-05  -0.310    0.757
#> I(re74^2)    8.298e-10  2.510e-09   0.331    0.741
#> re75        -7.419e-06  9.758e-05  -0.076    0.939
#> I(re75^2)    7.957e-10  4.775e-09   0.167    0.868
#> u74         -6.630e-02  4.360e-01  -0.152    0.879
#> u75         -3.051e-02  3.436e-01  -0.089    0.929
#> 
#> (Dispersion parameter for quasibinomial family taken to be 0.7850194)
#> 
#>     Null deviance: 467.94  on 444  degrees of freedom
#> Residual deviance: 467.73  on 430  degrees of freedom
#> AIC: NA
#> 
#> Number of Fisher Scoring iterations: 3

# Estimate ATC
lm(formula = re78 ~ treat, 
   data = dat,
   weights = atm_weight) |> summary()
#> 
#> Call:
#> lm(formula = re78 ~ treat, data = dat, weights = atm_weight)
#> 
#> Weighted Residuals:
#>    Min     1Q Median     3Q    Max 
#>  -6212  -3734  -1398   2354  54096 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)   4504.6      459.8   9.797  < 2e-16 ***
#> treat         1707.7      648.8   2.632  0.00878 ** 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 5960 on 443 degrees of freedom
#> Multiple R-squared:  0.0154, Adjusted R-squared:  0.01318 
#> F-statistic: 6.928 on 1 and 443 DF,  p-value: 0.008783

psa::treatment_effect(treatment = dat$treat,
                      outcome = dat$re78,
                      weights = dat$atm_weight)
#> [1] 1707.689

5 案例2

Show the code
library(twang)

data(AOD)
head(AOD)
#>       treat        suf12      illact   crimjust   subprob    subdep white
#> 1 community  0.008280087 -0.93095580  0.9439362  1.477879 2.1423973     0
#> 2 community -0.660202154  0.41718868  0.1864991 -0.561166 0.6560225     0
#> 3 community  0.628781873  0.08444314 -0.5298800  1.136902 0.9940437     0
#> 4 community -0.133334668  1.09399427 -0.6323299  1.203522 0.2401439     0
#> 5 community  2.408125409  1.41257274 -1.0493772 -0.165523 0.8834611     0
#> 6 community  0.354360794  1.20514029 -1.9882374  1.445932 0.5566021     0
Show the code
table(AOD$treat)
#> 
#> community   metcbt5       scy 
#>       200       200       200

5.1 倾向分数的估计

多分类变量

5.1.1 VGLM

拟合向量广义线性模型 (VGLMs) 来拟合具有 3 类暴露变量的倾向得分模型:

Show the code
require(VGAM)
psFormula <- "treat ~ illact + crimjust + subprob + subdep + white"
ps.model <- vglm(psFormula,family=multinomial, data=AOD)
AOD$ps <- data.frame(fitted(ps.model))

AOD %>% head() %>% 
    unnest(cols = c(ps),names_sep = "_")
#> # A tibble: 6 × 10
#>   treat      suf12  illact crimjust subprob subdep white ps_community ps_metcbt5
#>   <fct>      <dbl>   <dbl>    <dbl>   <dbl>  <dbl> <int>        <dbl>      <dbl>
#> 1 commun…  0.00828 -0.931     0.944   1.48   2.14      0        0.325      0.427
#> 2 commun… -0.660    0.417     0.186  -0.561  0.656     0        0.360      0.324
#> 3 commun…  0.629    0.0844   -0.530   1.14   0.994     0        0.334      0.336
#> 4 commun… -0.133    1.09     -0.632   1.20   0.240     0        0.333      0.303
#> 5 commun…  2.41     1.41     -1.05   -0.166  0.883     0        0.361      0.274
#> 6 commun…  0.354    1.21     -1.99    1.45   0.557     0        0.329      0.266
#> # ℹ 1 more variable: ps_scy <dbl>
Show the code
require(ggplot2)
ggplot(AOD, aes(x=ps[,1], fill=factor(treat))) +
  geom_density(alpha=0.5) +
  scale_fill_discrete(name="Treatment Group") +
  labs(title="Density Plot of Propensity Scores by Treatment Group",
       x="Propensity Score",
       y="Density") +
  theme_minimal()

5.1.2 GBM

通过 GBM 估计倾向分数

Show the code
set.seed(1235)
mnps.AOD.ATT <- mnps(treat ~ illact + crimjust + subprob + subdep + white, 
                 data = AOD, 
                 interaction.depth = 3,
                 estimand = "ATT", 
                 treatATT = "community", # the treated
                 verbose = FALSE, 
                 stop.method = "es.mean", 
                 n.trees = 1000)

5.2 IPW 计算

将PS转换为IPW

Show the code
AOD$w.ATT <- twang::get.weights(mnps.AOD.ATT, stop.method = "es.mean")
summary(AOD$w.ATT)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.1494  0.7265  1.0000  0.8896  1.0000  1.8823

by(AOD$w.ATT, AOD$treat, summary)
#> AOD$treat: community
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>       1       1       1       1       1       1 
#> ------------------------------------------------------------ 
#> AOD$treat: metcbt5
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.1494  0.4939  0.7204  0.7529  0.9460  1.8386 
#> ------------------------------------------------------------ 
#> AOD$treat: scy
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  0.3548  0.7619  0.9630  0.9159  1.0263  1.8823

5.3 weighted_data平衡诊断

Show the code
twang::bal.table(mnps.AOD.ATT, 
                 digits = 2, 
                 collapse.to = "covariate")[,c("max.std.eff.sz",
                                               "stop.method")]
#>    max.std.eff.sz stop.method
#> 1            0.09         unw
#> 2            0.10         unw
#> 3            0.09         unw
#> 4            0.10         unw
#> 5            0.11         unw
#> 6            0.01     es.mean
#> 7            0.03     es.mean
#> 8            0.03     es.mean
#> 9            0.06     es.mean
#> 10           0.07     es.mean
Show the code
plot(mnps.AOD.ATT, plots = 3)

5.4 加权结局模型的效应估计

Show the code
require(survey)
design.mnps.ATT <- svydesign(ids=~1, weights=~w.ATT, data=AOD)
fit <- svyglm(suf12 ~ treat, design = design.mnps.ATT)
require(Publish)
publish(fit, intercept = FALSE)
#>  Variable     Units Coefficient        CI.95   p-value 
#>     treat community         Ref                        
#>             metcbt5        0.20 [-0.00;0.41]   0.05129 
#>                 scy        0.08 [-0.11;0.27]   0.41503

5.5 连续变量

Show the code
require(twangContinuous)