Immunosupperssion, Organ Transplant, HIV Positivity, Diabetes Mellitus Without End Organ Damage, Diabetes Mellitus With End Organ Damage, Connective Tissue Disease
lalonde_match<-Match( Y =lalonde$re78, Tr =lalonde$treat, X =lalonde$lr_ps, M =1, caliper =0.1, replace =TRUE, estimand ='ATE')summary(lalonde_match)#> #> Estimate... 2053.1 #> AI SE...... 803.05 #> T-stat..... 2.5566 #> p.val...... 0.010569 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 433 #> Matched number of observations (unweighted). 744 #> #> Caliper (SDs)........................................ 0.1 #> Number of obs dropped by 'exact' or 'caliper' 12lalonde_match_df<-data.frame( treated.ps =lalonde[lalonde_match$index.treated, ]$lr_ps, control.ps =lalonde[lalonde_match$index.control, ]$lr_ps, treated.y =1, control.y =0)lalonde_match_df<-lalonde_match_df[order(lalonde_match_df$control.ps), ]rows<-(1:nrow(lalonde_match_df)-1)%%floor(nrow(lalonde_match_df)/5)==0ggplot(lalonde, aes(x =lr_ps, y =treat))+geom_point(alpha =0.5)+geom_smooth( method =glm, formula =y~x, method.args =list(family =binomial(link ='logit')), se =FALSE)+xlim(c(0, 1))+xlab('Propensity Score')+ylab('Treatment')+geom_segment( data =lalonde_match_df,aes( x =treated.ps, xend =control.ps, y =treated.y, yend =control.y), color ='purple', alpha =0.1)
匹配后,治疗组和对照组应具有非常相似的特征。可以使用简单的回归模型来估计治疗对结果的影响。
24.3.2 一对一匹配ATT
Estimating the treatment effect on the treated (default is ATT)
Code
rr_att<-Match(Y =lalonde$re78, Tr =lalonde$treat, X =lalonde$lr_ps, M =1, estimand='ATT')summary(rr_att)# The default estimate is ATT here#> #> Estimate... 2153.3 #> AI SE...... 825.4 #> T-stat..... 2.6088 #> p.val...... 0.0090858 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 185 #> Matched number of observations (unweighted). 346rr_att_mb<-psa::MatchBalance( df =lalonde, formu =lalonde_formu, formu.Y =update.formula(lalonde_formu, re78~.), index.treated =rr_att$index.treated, index.control =rr_att$index.control, tolerance =0.25, M =1, estimand ='ATT')plot(rr_att_mb)
rr.ate<-Match(Y =lalonde$re78, Tr =lalonde$treat, X =lalonde$lr_ps, M =1, estimand ='ATE')summary(rr.ate)#> #> Estimate... 2013.3 #> AI SE...... 817.76 #> T-stat..... 2.4619 #> p.val...... 0.013819 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 445 #> Matched number of observations (unweighted). 756
24.3.4 一对多匹配 (ATT)
Code
rr2<-Match(Y =lalonde$re78, Tr =lalonde$treat, X =lalonde$lr_ps, M =1, ties =TRUE, replace =TRUE, estimand ='ATT')summary(rr2)# The default estimate is ATT here#> #> Estimate... 2153.3 #> AI SE...... 825.4 #> T-stat..... 2.6088 #> p.val...... 0.0090858 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 185 #> Matched number of observations (unweighted). 346
matchit.out<-MatchIt::matchit(lalonde_formu, data =lalonde)summary(matchit.out)#> #> Call:#> MatchIt::matchit(formula = lalonde_formu, data = lalonde)#> #> Summary of Balance for All Data:#> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean#> distance 0.4468 0.3936 0.4533 1.2101 0.1340#> age 25.8162 25.0538 0.1066 1.0278 0.0254#> I(age^2) 717.3946 677.3154 0.0929 1.0115 0.0254#> educ 10.3459 10.0885 0.1281 1.5513 0.0287#> I(educ^2) 111.0595 104.3731 0.1701 1.6625 0.0287#> black 0.8432 0.8269 0.0449 . 0.0163#> hisp 0.0595 0.1077 -0.2040 . 0.0482#> married 0.1892 0.1538 0.0902 . 0.0353#> nodegr 0.7081 0.8346 -0.2783 . 0.1265#> re74 2095.5740 2107.0268 -0.0023 0.7381 0.0192#> I(re74^2) 28141433.9907 36667413.1577 -0.0747 0.5038 0.0192#> re75 1532.0556 1266.9092 0.0824 1.0763 0.0508#> I(re75^2) 12654752.6909 11196530.0057 0.0260 1.4609 0.0508#> u74 0.7081 0.7500 -0.0921 . 0.0419#> u75 0.6000 0.6846 -0.1727 . 0.0846#> eCDF Max#> distance 0.2244#> age 0.0652#> I(age^2) 0.0652#> educ 0.1265#> I(educ^2) 0.1265#> black 0.0163#> hisp 0.0482#> married 0.0353#> nodegr 0.1265#> re74 0.0471#> I(re74^2) 0.0471#> re75 0.1075#> I(re75^2) 0.1075#> u74 0.0419#> u75 0.0846#> #> Summary of Balance for Matched Data:#> Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean#> distance 0.4468 0.4284 0.1571 1.3077 0.0387#> age 25.8162 25.1351 0.0952 1.1734 0.0243#> I(age^2) 717.3946 675.1676 0.0979 1.1512 0.0243#> educ 10.3459 10.2649 0.0403 1.2869 0.0174#> I(educ^2) 111.0595 108.4919 0.0653 1.3938 0.0174#> black 0.8432 0.8486 -0.0149 . 0.0054#> hisp 0.0595 0.0703 -0.0457 . 0.0108#> married 0.1892 0.1892 0.0000 . 0.0000#> nodegr 0.7081 0.7676 -0.1308 . 0.0595#> re74 2095.5740 1741.2109 0.0725 1.5797 0.0146#> I(re74^2) 28141433.9907 18066538.6428 0.0883 3.5436 0.0146#> re75 1532.0556 1314.8073 0.0675 1.3933 0.0264#> I(re75^2) 12654752.6909 9126579.7979 0.0630 3.4873 0.0264#> u74 0.7081 0.7243 -0.0357 . 0.0162#> u75 0.6000 0.6108 -0.0221 . 0.0108#> eCDF Max Std. Pair Dist.#> distance 0.1189 0.1585#> age 0.0541 0.8159#> I(age^2) 0.0541 0.7701#> educ 0.0595 0.7662#> I(educ^2) 0.0595 0.7604#> black 0.0054 0.5798#> hisp 0.0108 0.2286#> married 0.0000 0.2378#> nodegr 0.0595 0.5588#> re74 0.0432 0.6080#> I(re74^2) 0.0432 0.3620#> re75 0.0649 0.7292#> I(re75^2) 0.0649 0.3690#> u74 0.0162 0.7728#> u75 0.0108 0.7282#> #> Sample Sizes:#> Control Treated#> All 260 185#> Matched 185 185#> Unmatched 75 0#> Discarded 0 0
Code
# Same as above but calculate average treatment effectrr.ate<-Match(Y =lalonde$re78, Tr =lalonde$treat, X =lalonde$lr_ps, M =1, ties =FALSE, replace =FALSE, estimand='ATE')summary(rr.ate)# Here the estimate is ATE#> #> Estimate... 2036.6 #> SE......... 501.71 #> T-stat..... 4.0592 #> p.val...... 4.9233e-05 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 370 #> Matched number of observations (unweighted). 370
Code
## Genetic Matchingrr.gen<-GenMatch(Tr =lalonde$treat, X =lalonde$lr_ps, BalanceMatrix =lalonde[,all.vars(lalonde_formu)[-1]], estimand ='ATE', M =1, pop.size =16, print.level =0)rr.gen.mout<-Match(Y =lalonde$re78, Tr =lalonde$treat, X =lalonde$lr_ps, estimand ='ATE', Weight.matrix =rr.gen)summary(rr.gen.mout)#> #> Estimate... 2086.5 #> AI SE...... 815.65 #> T-stat..... 2.5581 #> p.val...... 0.010524 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 445 #> Matched number of observations (unweighted). 671
Code
## Partial exact matchingrr2<-Matchby(Y =lalonde$re78, Tr =lalonde$treat, X =lalonde$lr_ps, by =factor(lalonde$nodegr), print.level =0)summary(rr2)#> #> Estimate... 2014.4 #> SE......... 702.05 #> T-stat..... 2.8693 #> p.val...... 0.0041132 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 185 #> Matched number of observations (unweighted). 185
Code
## Partial exact matching on two covariatesrr3<-Matchby(Y =lalonde$re78, Tr =lalonde$treat, X =lalonde$lr_ps, by =lalonde[,c('nodegr','married')], print.level =0)summary(rr3)#> #> Estimate... 1894 #> SE......... 705.3 #> T-stat..... 2.6853 #> p.val...... 0.0072455 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 185 #> Matched number of observations (unweighted). 185
24.4 示例
变量名
描述
age
年龄
educ
受教育年限
black
分类变量,1为黑人
hisp
分类变量,1为西班牙裔
married
分类变量,1为已婚
nodegr
分类变量,1为有高中学历证书
re74
1974年的收入
re75
1975年的收入
re78
1978年的收入
u74
分类变量,1为1974年收入为零
u75
分类变量,1为1975年收入为零
treat
分类变量,1为实验组
24.4.1 估计倾向值分数
Code
attach(lalonde)glm_ps<-glm( formula =treat~age+educ+black+hisp+married+nodegr+re74+re75, family =binomial(link ='logit'))psm1<-Match(Y=re78, Tr =treat, X=glm_ps$fitted.values, estimand ="ATT", M=1, replace =TRUE)summary(psm1)#> #> Estimate... 2624.3 #> AI SE...... 802.19 #> T-stat..... 3.2714 #> p.val...... 0.0010702 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 185 #> Matched number of observations (unweighted). 344
psm2<-Match(Y=re78, Tr =treat, X=glm_ps$fitted.values, estimand ="ATT", M=1, replace =FALSE)summary(psm2)#> #> Estimate... 1996.3 #> SE......... 643.88 #> T-stat..... 3.1005 #> p.val...... 0.0019319 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 185 #> Matched number of observations (unweighted). 185
***** (V1) age ***** Before Matching After Matching
mean treatment........ 25.816 25.816
mean control.......... 25.054 25.692
std mean diff......... 10.655 1.7342
mean raw eQQ diff..... 0.94054 0.73837
med raw eQQ diff..... 1 0
max raw eQQ diff..... 7 9
mean eCDF diff........ 0.025364 0.021893
med eCDF diff........ 0.022193 0.020349
max eCDF diff........ 0.065177 0.061047
var ratio (Tr/Co)..... 1.0278 1.083
T-test p-value........ 0.26594 0.84975
KS Bootstrap p-value.. 0.526 0.355
KS Naive p-value...... 0.7481 0.54314
KS Statistic.......... 0.065177 0.061047
Code
check_balance<-MatchBalance( formul =treat~age+educ+black+hisp+married+nodegr+re74+re75, match.out =psm1, nboots =1000,data =lalonde)#> #> ***** (V1) age *****#> Before Matching After Matching#> mean treatment........ 25.816 25.816 #> mean control.......... 25.054 25.692 #> std mean diff......... 10.655 1.7342 #> #> mean raw eQQ diff..... 0.94054 0.73837 #> med raw eQQ diff..... 1 0 #> max raw eQQ diff..... 7 9 #> #> mean eCDF diff........ 0.025364 0.021893 #> med eCDF diff........ 0.022193 0.020349 #> max eCDF diff........ 0.065177 0.061047 #> #> var ratio (Tr/Co)..... 1.0278 1.083 #> T-test p-value........ 0.26594 0.84975 #> KS Bootstrap p-value.. 0.514 0.364 #> KS Naive p-value...... 0.7481 0.54314 #> KS Statistic.......... 0.065177 0.061047 #> #> #> ***** (V2) educ *****#> Before Matching After Matching#> mean treatment........ 10.346 10.346 #> mean control.......... 10.088 10.146 #> std mean diff......... 12.806 9.9664 #> #> mean raw eQQ diff..... 0.40541 0.23256 #> med raw eQQ diff..... 0 0 #> max raw eQQ diff..... 2 2 #> #> mean eCDF diff........ 0.028698 0.016611 #> med eCDF diff........ 0.012682 0.010174 #> max eCDF diff........ 0.12651 0.061047 #> #> var ratio (Tr/Co)..... 1.5513 1.2344 #> T-test p-value........ 0.15017 0.1842 #> KS Bootstrap p-value.. 0.003 0.183 #> KS Naive p-value...... 0.062873 0.54314 #> KS Statistic.......... 0.12651 0.061047 #> #> #> ***** (V3) black *****#> Before Matching After Matching#> mean treatment........ 0.84324 0.84324 #> mean control.......... 0.82692 0.86847 #> std mean diff......... 4.4767 -6.9194 #> #> mean raw eQQ diff..... 0.016216 0.026163 #> med raw eQQ diff..... 0 0 #> max raw eQQ diff..... 1 1 #> #> mean eCDF diff........ 0.0081601 0.013081 #> med eCDF diff........ 0.0081601 0.013081 #> max eCDF diff........ 0.01632 0.026163 #> #> var ratio (Tr/Co)..... 0.92503 1.1572 #> T-test p-value........ 0.64736 0.40214 #> #> #> ***** (V4) hisp *****#> Before Matching After Matching#> mean treatment........ 0.059459 0.059459 #> mean control.......... 0.10769 0.04955 #> std mean diff......... -20.341 4.1792 #> #> mean raw eQQ diff..... 0.048649 0.011628 #> med raw eQQ diff..... 0 0 #> max raw eQQ diff..... 1 1 #> #> mean eCDF diff........ 0.024116 0.005814 #> med eCDF diff........ 0.024116 0.005814 #> max eCDF diff........ 0.048233 0.011628 #> #> var ratio (Tr/Co)..... 0.58288 1.1875 #> T-test p-value........ 0.064043 0.46063 #> #> #> ***** (V5) married *****#> Before Matching After Matching#> mean treatment........ 0.18919 0.18919 #> mean control.......... 0.15385 0.18423 #> std mean diff......... 8.9995 1.2617 #> #> mean raw eQQ diff..... 0.037838 0.026163 #> med raw eQQ diff..... 0 0 #> max raw eQQ diff..... 1 1 #> #> mean eCDF diff........ 0.017672 0.013081 #> med eCDF diff........ 0.017672 0.013081 #> max eCDF diff........ 0.035343 0.026163 #> #> var ratio (Tr/Co)..... 1.1802 1.0207 #> T-test p-value........ 0.33425 0.89497 #> #> #> ***** (V6) nodegr *****#> Before Matching After Matching#> mean treatment........ 0.70811 0.70811 #> mean control.......... 0.83462 0.76757 #> std mean diff......... -27.751 -13.043 #> #> mean raw eQQ diff..... 0.12432 0.043605 #> med raw eQQ diff..... 0 0 #> max raw eQQ diff..... 1 1 #> #> mean eCDF diff........ 0.063254 0.021802 #> med eCDF diff........ 0.063254 0.021802 #> max eCDF diff........ 0.12651 0.043605 #> #> var ratio (Tr/Co)..... 1.4998 1.1585 #> T-test p-value........ 0.0020368 0.0071385 #> #> #> ***** (V7) re74 *****#> Before Matching After Matching#> mean treatment........ 2095.6 2095.6 #> mean control.......... 2107 2193.3 #> std mean diff......... -0.23437 -2.0004 #> #> mean raw eQQ diff..... 487.98 869.16 #> med raw eQQ diff..... 0 0 #> max raw eQQ diff..... 8413 10305 #> #> mean eCDF diff........ 0.019223 0.054701 #> med eCDF diff........ 0.0158 0.050872 #> max eCDF diff........ 0.047089 0.12209 #> #> var ratio (Tr/Co)..... 0.7381 0.75054 #> T-test p-value........ 0.98186 0.84996 #> KS Bootstrap p-value.. 0.575 < 2.22e-16 #> KS Naive p-value...... 0.97023 0.011858 #> KS Statistic.......... 0.047089 0.12209 #> #> #> ***** (V8) re75 *****#> Before Matching After Matching#> mean treatment........ 1532.1 1532.1 #> mean control.......... 1266.9 2179.9 #> std mean diff......... 8.2363 -20.125 #> #> mean raw eQQ diff..... 367.61 590.34 #> med raw eQQ diff..... 0 0 #> max raw eQQ diff..... 2110.2 8092.9 #> #> mean eCDF diff........ 0.050834 0.050338 #> med eCDF diff........ 0.061954 0.049419 #> max eCDF diff........ 0.10748 0.098837 #> #> var ratio (Tr/Co)..... 1.0763 0.56563 #> T-test p-value........ 0.38527 0.079002 #> KS Bootstrap p-value.. 0.049 0.01 #> KS Naive p-value...... 0.16449 0.069435 #> KS Statistic.......... 0.10748 0.098837 #> #> #> Before Matching Minimum p.value: 0.0020368 #> Variable Name(s): nodegr Number(s): 6 #> #> After Matching Minimum p.value: < 2.22e-16 #> Variable Name(s): re74 Number(s): 7
Code
# age 变平衡了qqplot(lalonde$age[psm1$index.control],lalonde$age[psm1$index.treated])abline(a=0,b=1)
attach(lalonde)summary(PSM)#> #> Estimate... 2439.3 #> AI SE...... 813.4 #> T-stat..... 2.9989 #> p.val...... 0.0027099 #> #> Original number of observations.............. 445 #> Original number of treated obs............... 185 #> Matched number of observations............... 185 #> Matched number of observations (unweighted). 489PSM_CS<-Match(Y=re78, Tr =treat, X=glm_ps$fitted.values, estimand ="ATT", Weight.matrix =gen_match, replace =TRUE, CommonSupport =TRUE)summary(PSM_CS)#> #> Estimate... 2330 #> AI SE...... 821.6 #> T-stat..... 2.836 #> p.val...... 0.0045684 #> #> Original number of observations.............. 430 #> Original number of treated obs............... 181 #> Matched number of observations............... 181 #> Matched number of observations (unweighted). 468detach(lalonde)