24  匹配(Matching)

倾向评分匹配根据他们的倾向评分将治疗组中的每个人与对照组中的个体相匹配。对于每个人来说,倾向得分可以直观地视为从一系列协变量(和潜在混杂因素confounding)计算出来的最近治疗的概率。两个人,一个来自治疗组,一个来自对照组,如果他们的倾向评分之间的差异很小,则被认为是匹配的。不匹配的参与者将被丢弃。

24.1 数据来源

https://hbiostat.org/data/

https://hbiostat.org/data/repo/rhc

Code
rhc <- read_csv("data/PS/rhc.csv")

rhc %>% 
    select(age, sex, race, edu, income, ninsclas, cat1,das2d3pc, dnr1, 
           ca, surv2md1, aps1, scoma1, wtkilo1, temp1, meanbp1, resp1,
           hrt1, pafi1, paco21, ph1, wblc1, hema1, sod1, pot1, crea1,
           bili1, alb1, resp, card, neuro, gastr, renal, meta, hema,
           seps, trauma, ortho, cardiohx, chfhx, dementhx, psychhx,
           chrpulhx, renalhx, liverhx, gibledhx, malighx, immunhx, 
           transhx, amihx, swang1) %>% 
    mutate(
        across(where(is.character), as.factor),
        across(ends_with("hx"), as.factor)
    )->
    rhc2
rhc2 %>% select(where(is.factor)) %>% 
    map(table)
#> $sex
#> 
#> Female   Male 
#>   2543   3192 
#> 
#> $race
#> 
#> black other white 
#>   920   355  4460 
#> 
#> $income
#> 
#>   $11-$25k   $25-$50k     > $50k Under $11k 
#>       1165        893        451       3226 
#> 
#> $ninsclas
#> 
#>            Medicaid            Medicare Medicare & Medicaid        No insurance 
#>                 647                1458                 374                 322 
#>             Private  Private & Medicare 
#>                1698                1236 
#> 
#> $cat1
#> 
#>               ARF               CHF         Cirrhosis      Colon Cancer 
#>              2490               456               224                 7 
#>              Coma              COPD       Lung Cancer MOSF w/Malignancy 
#>               436               457                39               399 
#>     MOSF w/Sepsis 
#>              1227 
#> 
#> $dnr1
#> 
#>   No  Yes 
#> 5081  654 
#> 
#> $ca
#> 
#> Metastatic         No        Yes 
#>        384       4379        972 
#> 
#> $resp
#> 
#>   No  Yes 
#> 3622 2113 
#> 
#> $card
#> 
#>   No  Yes 
#> 3804 1931 
#> 
#> $neuro
#> 
#>   No  Yes 
#> 5042  693 
#> 
#> $gastr
#> 
#>   No  Yes 
#> 4793  942 
#> 
#> $renal
#> 
#>   No  Yes 
#> 5440  295 
#> 
#> $meta
#> 
#>   No  Yes 
#> 5470  265 
#> 
#> $hema
#> 
#>   No  Yes 
#> 5381  354 
#> 
#> $seps
#> 
#>   No  Yes 
#> 4704 1031 
#> 
#> $trauma
#> 
#>   No  Yes 
#> 5683   52 
#> 
#> $ortho
#> 
#>   No  Yes 
#> 5728    7 
#> 
#> $cardiohx
#> 
#>    0    1 
#> 4722 1013 
#> 
#> $chfhx
#> 
#>    0    1 
#> 4714 1021 
#> 
#> $dementhx
#> 
#>    0    1 
#> 5171  564 
#> 
#> $psychhx
#> 
#>    0    1 
#> 5349  386 
#> 
#> $chrpulhx
#> 
#>    0    1 
#> 4646 1089 
#> 
#> $renalhx
#> 
#>    0    1 
#> 5480  255 
#> 
#> $liverhx
#> 
#>    0    1 
#> 5334  401 
#> 
#> $gibledhx
#> 
#>    0    1 
#> 5550  185 
#> 
#> $malighx
#> 
#>    0    1 
#> 4419 1316 
#> 
#> $immunhx
#> 
#>    0    1 
#> 4192 1543 
#> 
#> $transhx
#> 
#>    0    1 
#> 5073  662 
#> 
#> $amihx
#> 
#>    0    1 
#> 5535  200 
#> 
#> $swang1
#> 
#> No RHC    RHC 
#>   3551   2184

rhc2 %>% select(where(is.numeric)) %>% 
    summary()
#>       age              edu           das2d3pc        surv2md1     
#>  Min.   : 18.04   Min.   : 0.00   Min.   :11.00   Min.   :0.0000  
#>  1st Qu.: 50.15   1st Qu.:10.00   1st Qu.:16.06   1st Qu.:0.4709  
#>  Median : 64.05   Median :12.00   Median :19.75   Median :0.6280  
#>  Mean   : 61.38   Mean   :11.68   Mean   :20.50   Mean   :0.5925  
#>  3rd Qu.: 73.93   3rd Qu.:13.00   3rd Qu.:23.43   3rd Qu.:0.7430  
#>  Max.   :101.85   Max.   :30.00   Max.   :33.00   Max.   :0.9620  
#>       aps1            scoma1       wtkilo1           temp1      
#>  Min.   :  3.00   Min.   :  0   Min.   :  0.00   Min.   :27.00  
#>  1st Qu.: 41.00   1st Qu.:  0   1st Qu.: 56.30   1st Qu.:36.09  
#>  Median : 54.00   Median :  0   Median : 70.00   Median :38.09  
#>  Mean   : 54.67   Mean   : 21   Mean   : 67.83   Mean   :37.62  
#>  3rd Qu.: 67.00   3rd Qu.: 41   3rd Qu.: 83.70   3rd Qu.:39.00  
#>  Max.   :147.00   Max.   :100   Max.   :244.00   Max.   :43.00  
#>     meanbp1           resp1             hrt1           pafi1      
#>  Min.   :  0.00   Min.   :  0.00   Min.   :  0.0   Min.   : 11.6  
#>  1st Qu.: 50.00   1st Qu.: 14.00   1st Qu.: 97.0   1st Qu.:133.3  
#>  Median : 63.00   Median : 30.00   Median :124.0   Median :202.5  
#>  Mean   : 78.52   Mean   : 28.09   Mean   :115.2   Mean   :222.3  
#>  3rd Qu.:115.00   3rd Qu.: 38.00   3rd Qu.:141.0   3rd Qu.:316.6  
#>  Max.   :259.00   Max.   :100.00   Max.   :250.0   Max.   :937.5  
#>      paco21            ph1            wblc1             hema1      
#>  Min.   :  1.00   Min.   :6.579   Min.   :  0.000   Min.   : 2.00  
#>  1st Qu.: 31.00   1st Qu.:7.340   1st Qu.:  8.398   1st Qu.:26.10  
#>  Median : 37.00   Median :7.400   Median : 14.100   Median :30.00  
#>  Mean   : 38.75   Mean   :7.388   Mean   : 15.645   Mean   :31.87  
#>  3rd Qu.: 42.00   3rd Qu.:7.460   3rd Qu.: 20.049   3rd Qu.:36.30  
#>  Max.   :156.00   Max.   :7.770   Max.   :192.000   Max.   :66.19  
#>       sod1            pot1            crea1              bili1         
#>  Min.   :101.0   Min.   : 1.100   Min.   : 0.09999   Min.   : 0.09999  
#>  1st Qu.:132.0   1st Qu.: 3.400   1st Qu.: 1.00000   1st Qu.: 0.79993  
#>  Median :136.0   Median : 3.800   Median : 1.50000   Median : 1.00977  
#>  Mean   :136.8   Mean   : 4.067   Mean   : 2.13302   Mean   : 2.26707  
#>  3rd Qu.:142.0   3rd Qu.: 4.600   3rd Qu.: 2.39990   3rd Qu.: 1.39990  
#>  Max.   :178.0   Max.   :11.898   Max.   :25.09766   Max.   :58.19531  
#>       alb1       
#>  Min.   : 0.300  
#>  1st Qu.: 2.600  
#>  Median : 3.500  
#>  Mean   : 3.093  
#>  3rd Qu.: 3.500  
#>  Max.   :29.000



library(tableone)

CreateTableOne(strata = "swang1",
               data = rhc2
                ) %>% print(.,showAllLevels = T,smd = T, addOverall = T)
#>                       Stratified by swang1
#>                        level               No RHC          RHC            
#>   n                                          3551            2184         
#>   age (mean (SD))                           61.76 (17.29)   60.75 (15.63) 
#>   sex (%)              Female                1637 ( 46.1)     906 ( 41.5) 
#>                        Male                  1914 ( 53.9)    1278 ( 58.5) 
#>   race (%)             black                  585 ( 16.5)     335 ( 15.3) 
#>                        other                  213 (  6.0)     142 (  6.5) 
#>                        white                 2753 ( 77.5)    1707 ( 78.2) 
#>   edu (mean (SD))                           11.57 (3.13)    11.86 (3.16)  
#>   income (%)           $11-$25k               713 ( 20.1)     452 ( 20.7) 
#>                        $25-$50k               500 ( 14.1)     393 ( 18.0) 
#>                        > $50k                 257 (  7.2)     194 (  8.9) 
#>                        Under $11k            2081 ( 58.6)    1145 ( 52.4) 
#>   ninsclas (%)         Medicaid               454 ( 12.8)     193 (  8.8) 
#>                        Medicare               947 ( 26.7)     511 ( 23.4) 
#>                        Medicare & Medicaid    251 (  7.1)     123 (  5.6) 
#>                        No insurance           186 (  5.2)     136 (  6.2) 
#>                        Private                967 ( 27.2)     731 ( 33.5) 
#>                        Private & Medicare     746 ( 21.0)     490 ( 22.4) 
#>   cat1 (%)             ARF                   1581 ( 44.5)     909 ( 41.6) 
#>                        CHF                    247 (  7.0)     209 (  9.6) 
#>                        Cirrhosis              175 (  4.9)      49 (  2.2) 
#>                        Colon Cancer             6 (  0.2)       1 (  0.0) 
#>                        Coma                   341 (  9.6)      95 (  4.3) 
#>                        COPD                   399 ( 11.2)      58 (  2.7) 
#>                        Lung Cancer             34 (  1.0)       5 (  0.2) 
#>                        MOSF w/Malignancy      241 (  6.8)     158 (  7.2) 
#>                        MOSF w/Sepsis          527 ( 14.8)     700 ( 32.1) 
#>   das2d3pc (mean (SD))                      20.37 (5.48)    20.70 (5.03)  
#>   dnr1 (%)             No                    3052 ( 85.9)    2029 ( 92.9) 
#>                        Yes                    499 ( 14.1)     155 (  7.1) 
#>   ca (%)               Metastatic             261 (  7.4)     123 (  5.6) 
#>                        No                    2652 ( 74.7)    1727 ( 79.1) 
#>                        Yes                    638 ( 18.0)     334 ( 15.3) 
#>   surv2md1 (mean (SD))                       0.61 (0.19)     0.57 (0.20)  
#>   aps1 (mean (SD))                          50.93 (18.81)   60.74 (20.27) 
#>   scoma1 (mean (SD))                        22.25 (31.37)   18.97 (28.26) 
#>   wtkilo1 (mean (SD))                       65.04 (29.50)   72.36 (27.73) 
#>   temp1 (mean (SD))                         37.63 (1.74)    37.59 (1.83)  
#>   meanbp1 (mean (SD))                       84.87 (38.87)   68.20 (34.24) 
#>   resp1 (mean (SD))                         28.98 (13.95)   26.65 (14.17) 
#>   hrt1 (mean (SD))                         112.87 (40.94)  118.93 (41.47) 
#>   pafi1 (mean (SD))                        240.63 (116.66) 192.43 (105.54)
#>   paco21 (mean (SD))                        39.95 (14.24)   36.79 (10.97) 
#>   ph1 (mean (SD))                            7.39 (0.11)     7.38 (0.11)  
#>   wblc1 (mean (SD))                         15.26 (11.41)   16.27 (12.55) 
#>   hema1 (mean (SD))                         32.70 (8.79)    30.51 (7.42)  
#>   sod1 (mean (SD))                         137.04 (7.68)   136.33 (7.60)  
#>   pot1 (mean (SD))                           4.08 (1.04)     4.05 (1.01)  
#>   crea1 (mean (SD))                          1.92 (2.03)     2.47 (2.05)  
#>   bili1 (mean (SD))                          2.00 (4.43)     2.71 (5.33)  
#>   alb1 (mean (SD))                           3.16 (0.67)     2.98 (0.93)  
#>   resp (%)             No                    2070 ( 58.3)    1552 ( 71.1) 
#>                        Yes                   1481 ( 41.7)     632 ( 28.9) 
#>   card (%)             No                    2544 ( 71.6)    1260 ( 57.7) 
#>                        Yes                   1007 ( 28.4)     924 ( 42.3) 
#>   neuro (%)            No                    2976 ( 83.8)    2066 ( 94.6) 
#>                        Yes                    575 ( 16.2)     118 (  5.4) 
#>   gastr (%)            No                    3029 ( 85.3)    1764 ( 80.8) 
#>                        Yes                    522 ( 14.7)     420 ( 19.2) 
#>   renal (%)            No                    3404 ( 95.9)    2036 ( 93.2) 
#>                        Yes                    147 (  4.1)     148 (  6.8) 
#>   meta (%)             No                    3379 ( 95.2)    2091 ( 95.7) 
#>                        Yes                    172 (  4.8)      93 (  4.3) 
#>   hema (%)             No                    3312 ( 93.3)    2069 ( 94.7) 
#>                        Yes                    239 (  6.7)     115 (  5.3) 
#>   seps (%)             No                    3036 ( 85.5)    1668 ( 76.4) 
#>                        Yes                    515 ( 14.5)     516 ( 23.6) 
#>   trauma (%)           No                    3533 ( 99.5)    2150 ( 98.4) 
#>                        Yes                     18 (  0.5)      34 (  1.6) 
#>   ortho (%)            No                    3548 ( 99.9)    2180 ( 99.8) 
#>                        Yes                      3 (  0.1)       4 (  0.2) 
#>   cardiohx (%)         0                     2984 ( 84.0)    1738 ( 79.6) 
#>                        1                      567 ( 16.0)     446 ( 20.4) 
#>   chfhx (%)            0                     2955 ( 83.2)    1759 ( 80.5) 
#>                        1                      596 ( 16.8)     425 ( 19.5) 
#>   dementhx (%)         0                     3138 ( 88.4)    2033 ( 93.1) 
#>                        1                      413 ( 11.6)     151 (  6.9) 
#>   psychhx (%)          0                     3265 ( 91.9)    2084 ( 95.4) 
#>                        1                      286 (  8.1)     100 (  4.6) 
#>   chrpulhx (%)         0                     2777 ( 78.2)    1869 ( 85.6) 
#>                        1                      774 ( 21.8)     315 ( 14.4) 
#>   renalhx (%)          0                     3402 ( 95.8)    2078 ( 95.1) 
#>                        1                      149 (  4.2)     106 (  4.9) 
#>   liverhx (%)          0                     3286 ( 92.5)    2048 ( 93.8) 
#>                        1                      265 (  7.5)     136 (  6.2) 
#>   gibledhx (%)         0                     3420 ( 96.3)    2130 ( 97.5) 
#>                        1                      131 (  3.7)      54 (  2.5) 
#>   malighx (%)          0                     2679 ( 75.4)    1740 ( 79.7) 
#>                        1                      872 ( 24.6)     444 ( 20.3) 
#>   immunhx (%)          0                     2644 ( 74.5)    1548 ( 70.9) 
#>                        1                      907 ( 25.5)     636 ( 29.1) 
#>   transhx (%)          0                     3216 ( 90.6)    1857 ( 85.0) 
#>                        1                      335 (  9.4)     327 ( 15.0) 
#>   amihx (%)            0                     3446 ( 97.0)    2089 ( 95.7) 
#>                        1                      105 (  3.0)      95 (  4.3) 
#>   swang1 (%)           No RHC                3551 (100.0)       0 (  0.0) 
#>                        RHC                      0 (  0.0)    2184 (100.0) 
#>                       Stratified by swang1
#>                        p      test SMD   
#>   n                                      
#>   age (mean (SD))       0.026       0.061
#>   sex (%)               0.001       0.093
#>                                          
#>   race (%)              0.425       0.036
#>                                          
#>                                          
#>   edu (mean (SD))       0.001       0.091
#>   income (%)           <0.001       0.142
#>                                          
#>                                          
#>                                          
#>   ninsclas (%)         <0.001       0.194
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>   cat1 (%)             <0.001       0.583
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>   das2d3pc (mean (SD))  0.023       0.063
#>   dnr1 (%)             <0.001       0.228
#>                                          
#>   ca (%)                0.001       0.107
#>                                          
#>                                          
#>   surv2md1 (mean (SD)) <0.001       0.198
#>   aps1 (mean (SD))     <0.001       0.501
#>   scoma1 (mean (SD))   <0.001       0.110
#>   wtkilo1 (mean (SD))  <0.001       0.256
#>   temp1 (mean (SD))     0.429       0.021
#>   meanbp1 (mean (SD))  <0.001       0.455
#>   resp1 (mean (SD))    <0.001       0.165
#>   hrt1 (mean (SD))     <0.001       0.147
#>   pafi1 (mean (SD))    <0.001       0.433
#>   paco21 (mean (SD))   <0.001       0.249
#>   ph1 (mean (SD))      <0.001       0.120
#>   wblc1 (mean (SD))     0.002       0.084
#>   hema1 (mean (SD))    <0.001       0.269
#>   sod1 (mean (SD))      0.001       0.092
#>   pot1 (mean (SD))      0.321       0.027
#>   crea1 (mean (SD))    <0.001       0.270
#>   bili1 (mean (SD))    <0.001       0.145
#>   alb1 (mean (SD))     <0.001       0.230
#>   resp (%)             <0.001       0.270
#>                                          
#>   card (%)             <0.001       0.295
#>                                          
#>   neuro (%)            <0.001       0.353
#>                                          
#>   gastr (%)            <0.001       0.121
#>                                          
#>   renal (%)            <0.001       0.116
#>                                          
#>   meta (%)              0.337       0.028
#>                                          
#>   hema (%)              0.029       0.062
#>                                          
#>   seps (%)             <0.001       0.234
#>                                          
#>   trauma (%)           <0.001       0.104
#>                                          
#>   ortho (%)             0.516       0.027
#>                                          
#>   cardiohx (%)         <0.001       0.116
#>                                          
#>   chfhx (%)             0.011       0.070
#>                                          
#>   dementhx (%)         <0.001       0.163
#>                                          
#>   psychhx (%)          <0.001       0.143
#>                                          
#>   chrpulhx (%)         <0.001       0.192
#>                                          
#>   renalhx (%)           0.268       0.032
#>                                          
#>   liverhx (%)           0.084       0.049
#>                                          
#>   gibledhx (%)          0.014       0.070
#>                                          
#>   malighx (%)          <0.001       0.101
#>                                          
#>   immunhx (%)           0.003       0.080
#>                                          
#>   transhx (%)          <0.001       0.170
#>                                          
#>   amihx (%)             0.007       0.074
#>                                          
#>   swang1 (%)           <0.001         NaN
#> 
Variable name Variable Definition
Age Age
Sex Sex
Race Race
Edu Years of education
Income Income
Ninsclas Medical insurance
Cat1 Primary disease category
Cat2 Secondary disease category
Categories of admission diagnosis:  
Resp Respiratory Diagnosis
Card Cardiovascular Diagnosis
Neuro Neurological Diagnosis
Gastr Gastrointestinal Diagnosis
Renal Renal Diagnosis
Meta Metabolic Diagnosis
Hema Hematologic Diagnosis
Seps Sepsis Diagnosis
Trauma Trauma Diagnosis
Ortho Orthopedic Diagnosis
   
Adld3p ADL
Das2d3pc DASI ( Duke Activity Status Index)
Dnr1 DNR status on day1
Ca Cancer
Surv2md1 Support model estimate of the prob. of surviving 2 months
Aps1 APACHE score
Scoma1 Glasgow Coma Score
Wtkilo1 Weight
Temp1 Temperature
Meanbp1 Mean blood pressure
Resp1 Respiratory rate
Hrt1 Heart rate
Pafi1 PaO2/FIO2 ratio
Paco21 PaCo2
Ph1 PH
Wblc1 WBC
Hema1 Hematocrit
Sod1 Sodium
Pot1 Potassium
Crea1 Creatinine
Bili1 Bilirubin
Alb1 Albumin
Urin1 Urine output
Categories of comorbidities illness:  
Cardiohx Acute MI, Peripheral Vascular Disease, Severe Cardiovascular Symptoms (NYHA-Class III), Very Severe Cardiovascular Symptoms (NYHA-Class IV)
Chfhx Congestive Heart Failure
Dementhx Dementia, Stroke or Cerebral Infarct, Parkinson�s Disease
Psychhx Psychiatric History, Active Psychosis or Severe Depression
Chrpulhx Chronic Pulmonary Disease, Severe Pulmonary Disease, Very Severe Pulmonary Disease
Renalhx Chronic Renal Disease, Chronic Hemodialysis or Peritoneal Dialysis
Liverhx Cirrhosis, Hepatic Failure
Gibledhx Upper GI Bleeding
Malighx Solid Tumor, Metastatic Disease, Chronic Leukemia/Myeloma, Acute Leukemia, Lymphoma
Immunhx Immunosupperssion, Organ Transplant, HIV Positivity, Diabetes Mellitus Without End Organ Damage, Diabetes Mellitus With End Organ Damage, Connective Tissue Disease
Transhx Transfer (> 24 Hours) from Another Hospital
Amihx Definite Myocardial Infarction
   
Swang1 Right Heart Catheterization (rhc2)
Sadmdte Study Admission Date
Dthdte Date of Death
Lstctdte Date of Last Contact
Dschdte Hospital Discharge Date
Death Death at any time up to 180 Days
Ptid Patient ID

24.2 估计倾向得分

Code

library(tidymodels)
rhc2_formula <- swang1 ~ .



logit_ps <- logistic_reg() %>%
    fit(rhc2_formula, data = rhc2)

summary(logit_ps$fit)
#> 
#> Call:
#> stats::glm(formula = swang1 ~ ., family = stats::binomial, data = data)
#> 
#> Coefficients:
#>                               Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)                 17.2203728  3.1137929   5.530 3.20e-08 ***
#> age                         -0.0043737  0.0028619  -1.528 0.126457    
#> sexMale                      0.0478361  0.0679017   0.704 0.481128    
#> raceother                    0.0238498  0.1519260   0.157 0.875258    
#> racewhite                   -0.0480662  0.0939149  -0.512 0.608787    
#> edu                          0.0261939  0.0116913   2.240 0.025061 *  
#> income$25-$50k               0.0888363  0.1094906   0.811 0.417159    
#> income> $50k                 0.0312483  0.1382814   0.226 0.821220    
#> incomeUnder $11k             0.0548697  0.0868242   0.632 0.527411    
#> ninsclasMedicare             0.2328987  0.1351961   1.723 0.084948 .  
#> ninsclasMedicare & Medicaid  0.4012458  0.1691375   2.372 0.017677 *  
#> ninsclasNo insurance         0.5264711  0.1673868   3.145 0.001660 ** 
#> ninsclasPrivate              0.4138771  0.1247856   3.317 0.000911 ***
#> ninsclasPrivate & Medicare   0.3490696  0.1391972   2.508 0.012151 *  
#> cat1CHF                      0.7833692  0.1596299   4.907 9.23e-07 ***
#> cat1Cirrhosis               -1.1607975  0.2378358  -4.881 1.06e-06 ***
#> cat1Colon Cancer            -0.0864465  1.1193521  -0.077 0.938441    
#> cat1Coma                    -0.5829321  0.1819159  -3.204 0.001353 ** 
#> cat1COPD                    -0.4719080  0.1901740  -2.481 0.013085 *  
#> cat1Lung Cancer             -0.4138852  0.5445513  -0.760 0.447226    
#> cat1MOSF w/Malignancy        0.0143100  0.1614131   0.089 0.929356    
#> cat1MOSF w/Sepsis            0.5339948  0.0910519   5.865 4.50e-09 ***
#> das2d3pc                    -0.0020607  0.0066432  -0.310 0.756416    
#> dnr1Yes                     -0.6777513  0.1166464  -5.810 6.24e-09 ***
#> caNo                         1.0485967  0.4350633   2.410 0.015943 *  
#> caYes                        0.2805522  0.1619387   1.732 0.083192 .  
#> surv2md1                    -1.8855344  0.3535424  -5.333 9.65e-08 ***
#> aps1                         0.0037687  0.0029458   1.279 0.200771    
#> scoma1                      -0.0041620  0.0016024  -2.597 0.009396 ** 
#> wtkilo1                      0.0063327  0.0012171   5.203 1.96e-07 ***
#> temp1                       -0.0284681  0.0197651  -1.440 0.149776    
#> meanbp1                     -0.0063608  0.0010216  -6.226 4.78e-10 ***
#> resp1                       -0.0207690  0.0025956  -8.002 1.23e-15 ***
#> hrt1                         0.0054615  0.0008942   6.108 1.01e-09 ***
#> pafi1                       -0.0047235  0.0003463 -13.639  < 2e-16 ***
#> paco21                      -0.0261850  0.0036694  -7.136 9.60e-13 ***
#> ph1                         -1.6756614  0.3847432  -4.355 1.33e-05 ***
#> wblc1                        0.0001465  0.0027568   0.053 0.957609    
#> hema1                       -0.0115223  0.0047296  -2.436 0.014841 *  
#> sod1                        -0.0108807  0.0043040  -2.528 0.011470 *  
#> pot1                        -0.1720083  0.0341922  -5.031 4.89e-07 ***
#> crea1                        0.0523499  0.0218175   2.399 0.016420 *  
#> bili1                        0.0072293  0.0074332   0.973 0.330766    
#> alb1                        -0.0964314  0.0481135  -2.004 0.045044 *  
#> respYes                     -0.2725098  0.0827259  -3.294 0.000987 ***
#> cardYes                      0.5575983  0.0856700   6.509 7.58e-11 ***
#> neuroYes                    -0.4873660  0.1343763  -3.627 0.000287 ***
#> gastrYes                     0.3505067  0.1054000   3.325 0.000883 ***
#> renalYes                     0.3004368  0.1490678   2.015 0.043859 *  
#> metaYes                     -0.1129081  0.1554971  -0.726 0.467771    
#> hemaYes                     -0.5131993  0.1470678  -3.490 0.000484 ***
#> sepsYes                      0.2844857  0.0919399   3.094 0.001973 ** 
#> traumaYes                    1.2560843  0.3346498   3.753 0.000174 ***
#> orthoYes                     1.1814997  0.9691723   1.219 0.222813    
#> cardiohx1                    0.0482934  0.0953604   0.506 0.612554    
#> chfhx1                       0.0936948  0.1042890   0.898 0.368964    
#> dementhx1                   -0.4121592  0.1213400  -3.397 0.000682 ***
#> psychhx1                    -0.3913426  0.1382534  -2.831 0.004646 ** 
#> chrpulhx1                    0.0122390  0.1007226   0.122 0.903286    
#> renalhx1                    -0.3352647  0.1814102  -1.848 0.064588 .  
#> liverhx1                    -0.0410152  0.1877313  -0.218 0.827057    
#> gibledhx1                   -0.1856252  0.2283797  -0.813 0.416337    
#> malighx1                     0.2298479  0.3846516   0.598 0.550141    
#> immunhx1                     0.0454368  0.0742516   0.612 0.540584    
#> transhx1                     0.4716832  0.0994237   4.744 2.09e-06 ***
#> amihx1                       0.1317831  0.1749886   0.753 0.451392    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 7621.4  on 5734  degrees of freedom
#> Residual deviance: 5993.2  on 5669  degrees of freedom
#> AIC: 6125.2
#> 
#> Number of Fisher Scoring iterations: 5
rhc2$PS <- fitted(logit_ps$fit)

predict(logit_ps, new_data = rhc2, type = "prob") %>% head()
.pred_No RHC .pred_RHC
0.6489847 0.3510153
0.3308540 0.6691460
0.3664559 0.6335441
0.6310644 0.3689356
0.5543195 0.4456805
0.9447431 0.0552569

24.3 匹配方法

24.3.1 Matching

Code
library(Matching)
data(lalonde, package='Matching')
glimpse(lalonde)
#> Rows: 445
#> Columns: 12
#> $ age     <int> 37, 22, 30, 27, 33, 22, 23, 32, 22, 33, 19, 21, 18, 27, 17, 19…
#> $ educ    <int> 11, 9, 12, 11, 8, 9, 12, 11, 16, 12, 9, 13, 8, 10, 7, 10, 13, …
#> $ black   <int> 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ hisp    <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ married <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,…
#> $ nodegr  <int> 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1,…
#> $ re74    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ re75    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ re78    <dbl> 9930.05, 3595.89, 24909.50, 7506.15, 289.79, 4056.49, 0.00, 84…
#> $ u74     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ u75     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ treat   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…

lalonde_formu <- treat ~ age + I(age^2) + educ + I(educ^2) + black +
    hisp + married + nodegr + re74  + I(re74^2) + re75 + I(re75^2) + u74 + u75
lr_out <- glm(formula = lalonde_formu,
              data = lalonde,
              family = binomial(link = 'logit'))

summary(lr_out)
#> 
#> Call:
#> glm(formula = lalonde_formu, family = binomial(link = "logit"), 
#>     data = lalonde)
#> 
#> Coefficients:
#>               Estimate Std. Error z value Pr(>|z|)  
#> (Intercept)  4.269e+00  2.173e+00   1.965   0.0494 *
#> age          2.143e-02  9.037e-02   0.237   0.8126  
#> I(age^2)    -3.448e-04  1.484e-03  -0.232   0.8163  
#> educ        -8.713e-01  4.150e-01  -2.099   0.0358 *
#> I(educ^2)    4.499e-02  2.330e-02   1.931   0.0535 .
#> black       -2.613e-01  3.708e-01  -0.705   0.4809  
#> hisp        -8.974e-01  5.184e-01  -1.731   0.0835 .
#> married      1.829e-01  2.831e-01   0.646   0.5183  
#> nodegr      -4.285e-01  3.930e-01  -1.090   0.2756  
#> re74        -2.168e-05  7.739e-05  -0.280   0.7793  
#> I(re74^2)   -8.553e-10  2.424e-09  -0.353   0.7242  
#> re75         6.577e-05  1.025e-04   0.642   0.5210  
#> I(re75^2)   -1.968e-09  5.042e-09  -0.390   0.6963  
#> u74         -8.315e-02  4.521e-01  -0.184   0.8541  
#> u75         -3.060e-01  3.591e-01  -0.852   0.3942  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 604.20  on 444  degrees of freedom
#> Residual deviance: 580.02  on 430  degrees of freedom
#> AIC: 610.02
#> 
#> Number of Fisher Scoring iterations: 4
lalonde$lr_ps <- fitted(lr_out)
Code
lalonde_match <- Match(
    Y = lalonde$re78,
    Tr = lalonde$treat,
    X = lalonde$lr_ps,
    M = 1,
    caliper = 0.1,
    replace = TRUE,
    estimand = 'ATE'
)

summary(lalonde_match)
#> 
#> Estimate...  2053.1 
#> AI SE......  803.05 
#> T-stat.....  2.5566 
#> p.val......  0.010569 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  433 
#> Matched number of observations  (unweighted).  744 
#> 
#> Caliper (SDs)........................................   0.1 
#> Number of obs dropped by 'exact' or 'caliper'  12

lalonde_match_df <- data.frame(
    treated.ps = lalonde[lalonde_match$index.treated, ]$lr_ps,
    control.ps = lalonde[lalonde_match$index.control, ]$lr_ps,
    treated.y = 1,
    control.y = 0
)
lalonde_match_df <- lalonde_match_df[order(lalonde_match_df$control.ps), ]


rows <- (1:nrow(lalonde_match_df) - 1) %% floor(nrow(lalonde_match_df) / 5) == 0

ggplot(lalonde, aes(x = lr_ps, y = treat)) +
    geom_point(alpha = 0.5) +
    geom_smooth(
        method = glm,
        formula = y ~ x,
        method.args = list(family = binomial(link = 'logit')),
        se = FALSE
    ) +
    xlim(c(0, 1)) +
    xlab('Propensity Score') + ylab('Treatment') +
    geom_segment(
        data = lalonde_match_df,
        aes(
            x = treated.ps,
            xend = control.ps,
            y = treated.y,
            yend = control.y
        ),
        color = 'purple',
        alpha = 0.1
    )

匹配后,治疗组和对照组应具有非常相似的特征。可以使用简单的回归模型来估计治疗对结果的影响。

24.3.2 一对一匹配ATT

Estimating the treatment effect on the treated (default is ATT)

Code
rr_att <- Match(Y = lalonde$re78, 
                Tr = lalonde$treat, 
                X = lalonde$lr_ps,
                M = 1,
                estimand='ATT')
summary(rr_att) # The default estimate is ATT here
#> 
#> Estimate...  2153.3 
#> AI SE......  825.4 
#> T-stat.....  2.6088 
#> p.val......  0.0090858 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  346

rr_att_mb <- psa::MatchBalance(
    df = lalonde,
    formu = lalonde_formu,
    formu.Y = update.formula(lalonde_formu, re78 ~ .),
    index.treated = rr_att$index.treated,
    index.control = rr_att$index.control,
    tolerance = 0.25,
    M = 1,
    estimand = 'ATT')
plot(rr_att_mb)

Code
summary(rr_att_mb)
#> Sample sizes and number of matches:
#>    Group   n n.matched n.percent.matched
#>  Treated 185       185         1.0000000
#>  Control 260       173         0.6653846
#>    Total 445       358         0.8044944
#> 
#> Covariate importance and t-tests for matched pairs:
#>           Import.Treat Import.Y Import.Total std.estimate       t p.value
#> I(educ^2)        1.931   1.5228        3.453     -0.04903 -0.8916  0.3732
#> educ             2.099   1.2121        3.311     -0.05483 -0.9577  0.3389
#> black            0.705   1.8424        2.547     -0.02326 -0.5383  0.5907
#> I(re74^2)        0.353   1.6415        1.994      0.07581  2.0955  0.0369
#> u75              0.852   0.9435        1.796     -0.06655 -1.8144  0.0705
#> hisp             1.731   0.0404        1.771      0.02042  0.8161  0.4150
#> nodegr           1.090   0.5011        1.591      0.03496  1.0914  0.2759
#> re74             0.280   1.1019        1.382      0.07979  1.7483  0.0813
#> re75             0.642   0.5903        1.232      0.06147  1.3171  0.1887
#> age              0.237   0.6729        0.910      0.00896  0.1374  0.8908
#> married          0.646   0.1406        0.787      0.04627  1.0000  0.3180
#> I(re75^2)        0.390   0.3817        0.772      0.05125  1.0364  0.3007
#> I(age^2)         0.232   0.5096        0.742      0.00297  0.0438  0.9651
#> u74              0.184   0.0702        0.254      0.03913  0.7495  0.4541
#>             ci.min  ci.max PercentMatched
#> I(educ^2) -0.15719 0.05913           60.4
#> educ      -0.16744 0.05778           60.1
#> black     -0.10826 0.06173           91.0
#> I(re74^2)  0.00465 0.14696           86.7
#> u75       -0.13870 0.00559           89.3
#> hisp      -0.02879 0.06963           98.3
#> nodegr    -0.02804 0.09797           93.9
#> re74      -0.00998 0.16956           77.2
#> re75      -0.03032 0.15326           78.0
#> age       -0.11922 0.13713           44.8
#> married   -0.04474 0.13728           89.6
#> I(re75^2) -0.04602 0.14852           88.7
#> I(age^2)  -0.13062 0.13656           49.7
#> u74       -0.06356 0.14183           81.5

24.3.3 一对一匹配ATE

average treatment effect

Code
rr.ate <- Match(Y = lalonde$re78, 
                Tr = lalonde$treat, 
                X = lalonde$lr_ps,
                M = 1,
                estimand = 'ATE')
summary(rr.ate)
#> 
#> Estimate...  2013.3 
#> AI SE......  817.76 
#> T-stat.....  2.4619 
#> p.val......  0.013819 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  445 
#> Matched number of observations  (unweighted).  756

24.3.4 一对多匹配 (ATT)

Code
rr2 <- Match(Y = lalonde$re78,      
             Tr = lalonde$treat, 
             X = lalonde$lr_ps,
             M = 1, 
             ties = TRUE, 
             replace = TRUE,
             estimand = 'ATT')
summary(rr2) # The default estimate is ATT here
#> 
#> Estimate...  2153.3 
#> AI SE......  825.4 
#> T-stat.....  2.6088 
#> p.val......  0.0090858 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  346

24.3.5 MachIt

Code
MatchIt::matchit(method = "nearest")
MatchIt::matchit(method = 'optimal')
MatchIt::matchit(method = 'full')
MatchIt::matchit(method = 'quick')
MatchIt::matchit(method = 'genetic')
MatchIt::matchit(method = 'exact')
MatchIt::matchit(method = 'subclass')
Code
matchit.out <- MatchIt::matchit(lalonde_formu, data = lalonde )
summary(matchit.out)
#> 
#> Call:
#> MatchIt::matchit(formula = lalonde_formu, data = lalonde)
#> 
#> Summary of Balance for All Data:
#>           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance         0.4468        0.3936          0.4533     1.2101    0.1340
#> age             25.8162       25.0538          0.1066     1.0278    0.0254
#> I(age^2)       717.3946      677.3154          0.0929     1.0115    0.0254
#> educ            10.3459       10.0885          0.1281     1.5513    0.0287
#> I(educ^2)      111.0595      104.3731          0.1701     1.6625    0.0287
#> black            0.8432        0.8269          0.0449          .    0.0163
#> hisp             0.0595        0.1077         -0.2040          .    0.0482
#> married          0.1892        0.1538          0.0902          .    0.0353
#> nodegr           0.7081        0.8346         -0.2783          .    0.1265
#> re74          2095.5740     2107.0268         -0.0023     0.7381    0.0192
#> I(re74^2) 28141433.9907 36667413.1577         -0.0747     0.5038    0.0192
#> re75          1532.0556     1266.9092          0.0824     1.0763    0.0508
#> I(re75^2) 12654752.6909 11196530.0057          0.0260     1.4609    0.0508
#> u74              0.7081        0.7500         -0.0921          .    0.0419
#> u75              0.6000        0.6846         -0.1727          .    0.0846
#>           eCDF Max
#> distance    0.2244
#> age         0.0652
#> I(age^2)    0.0652
#> educ        0.1265
#> I(educ^2)   0.1265
#> black       0.0163
#> hisp        0.0482
#> married     0.0353
#> nodegr      0.1265
#> re74        0.0471
#> I(re74^2)   0.0471
#> re75        0.1075
#> I(re75^2)   0.1075
#> u74         0.0419
#> u75         0.0846
#> 
#> Summary of Balance for Matched Data:
#>           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance         0.4468        0.4284          0.1571     1.3077    0.0387
#> age             25.8162       25.1351          0.0952     1.1734    0.0243
#> I(age^2)       717.3946      675.1676          0.0979     1.1512    0.0243
#> educ            10.3459       10.2649          0.0403     1.2869    0.0174
#> I(educ^2)      111.0595      108.4919          0.0653     1.3938    0.0174
#> black            0.8432        0.8486         -0.0149          .    0.0054
#> hisp             0.0595        0.0703         -0.0457          .    0.0108
#> married          0.1892        0.1892          0.0000          .    0.0000
#> nodegr           0.7081        0.7676         -0.1308          .    0.0595
#> re74          2095.5740     1741.2109          0.0725     1.5797    0.0146
#> I(re74^2) 28141433.9907 18066538.6428          0.0883     3.5436    0.0146
#> re75          1532.0556     1314.8073          0.0675     1.3933    0.0264
#> I(re75^2) 12654752.6909  9126579.7979          0.0630     3.4873    0.0264
#> u74              0.7081        0.7243         -0.0357          .    0.0162
#> u75              0.6000        0.6108         -0.0221          .    0.0108
#>           eCDF Max Std. Pair Dist.
#> distance    0.1189          0.1585
#> age         0.0541          0.8159
#> I(age^2)    0.0541          0.7701
#> educ        0.0595          0.7662
#> I(educ^2)   0.0595          0.7604
#> black       0.0054          0.5798
#> hisp        0.0108          0.2286
#> married     0.0000          0.2378
#> nodegr      0.0595          0.5588
#> re74        0.0432          0.6080
#> I(re74^2)   0.0432          0.3620
#> re75        0.0649          0.7292
#> I(re75^2)   0.0649          0.3690
#> u74         0.0162          0.7728
#> u75         0.0108          0.7282
#> 
#> Sample Sizes:
#>           Control Treated
#> All           260     185
#> Matched       185     185
#> Unmatched      75       0
#> Discarded       0       0
Code
# Same as above but calculate average treatment effect
rr.ate <- Match(Y = lalonde$re78, 
                Tr = lalonde$treat, 
                X = lalonde$lr_ps,
                M = 1,
                ties = FALSE, 
                replace = FALSE, 
                estimand='ATE')
summary(rr.ate) # Here the estimate is ATE
#> 
#> Estimate...  2036.6 
#> SE.........  501.71 
#> T-stat.....  4.0592 
#> p.val......  4.9233e-05 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  370 
#> Matched number of observations  (unweighted).  370
Code
## Genetic Matching
rr.gen <- GenMatch(Tr = lalonde$treat, 
                   X = lalonde$lr_ps, 
                   BalanceMatrix = lalonde[,all.vars(lalonde_formu)[-1]],
                   estimand = 'ATE', 
                   M = 1, 
                   pop.size = 16,
                   print.level = 0)
rr.gen.mout <- Match(Y = lalonde$re78, 
                     Tr = lalonde$treat, 
                     X = lalonde$lr_ps,
                     estimand = 'ATE',
                     Weight.matrix = rr.gen)
summary(rr.gen.mout)
#> 
#> Estimate...  2086.5 
#> AI SE......  815.65 
#> T-stat.....  2.5581 
#> p.val......  0.010524 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  445 
#> Matched number of observations  (unweighted).  671
Code
## Partial exact matching
rr2 <- Matchby(Y = lalonde$re78, 
               Tr = lalonde$treat, 
               X = lalonde$lr_ps, 
               by = factor(lalonde$nodegr),
               print.level = 0)
summary(rr2)
#> 
#> Estimate...  2014.4 
#> SE.........  702.05 
#> T-stat.....  2.8693 
#> p.val......  0.0041132 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  185
Code
## Partial exact matching on two covariates
rr3 <- Matchby(Y = lalonde$re78, 
               Tr = lalonde$treat, 
               X = lalonde$lr_ps, 
               by = lalonde[,c('nodegr','married')],
               print.level = 0)
summary(rr3)
#> 
#> Estimate...  1894 
#> SE.........  705.3 
#> T-stat.....  2.6853 
#> p.val......  0.0072455 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  185

24.4 示例

变量名 描述
age 年龄
educ 受教育年限
black 分类变量,1为黑人
hisp 分类变量,1为西班牙裔
married 分类变量,1为已婚
nodegr 分类变量,1为有高中学历证书
re74 1974年的收入
re75 1975年的收入
re78 1978年的收入
u74 分类变量,1为1974年收入为零
u75 分类变量,1为1975年收入为零
treat 分类变量,1为实验组

24.4.1 估计倾向值分数

Code
attach(lalonde)
glm_ps <- glm(
    formula = treat ~ age + educ + black + hisp + married + nodegr + re74  + re75,
    family = binomial(link = 'logit')
)

psm1 <- Match(Y=re78,
             Tr = treat,
             X=glm_ps$fitted.values,
             estimand = "ATT",
             M=1,
             replace = TRUE)
summary(psm1)
#> 
#> Estimate...  2624.3 
#> AI SE......  802.19 
#> T-stat.....  3.2714 
#> p.val......  0.0010702 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  344

如上所示,使用1对1样本可替代匹配法,实验组平均效应为2624.3,因果效应的标准误为803.19,t值为3.2714,p值为0.0010702<0.05,表明估计的实验组平均处理效应有统计学差异。

Code
psm2 <- Match(Y=re78,
             Tr = treat,
             X=glm_ps$fitted.values,
             estimand = "ATT",
             M=1,
             replace = FALSE)
summary(psm2)
#> 
#> Estimate...  1996.3 
#> SE.........  643.88 
#> T-stat.....  3.1005 
#> p.val......  0.0019319 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  185

24.4.2 检验平衡

受试者个体同质性,是否随机分配

协变量分布是否平衡,是否重合:

age 为例,实验组匹配前25.816匹配后25.816,对照组匹配前25.054匹配后25.692 ,匹配后实验组与对照组更接近了;T-test p-value > 0.05 ,表示匹配前后age 均值无统计学差异;KS Bootstrap p-value > 0.05 ,表示匹配前后age 分布无统计学差异

***** (V1) age *****                Before Matching          After Matching 

mean   treatment........          25.816             25.816  
mean control..........     25.054            25.692 
std mean diff.........     10.655            1.7342 

mean raw eQQ diff.....    0.94054           0.73837  
med  raw eQQ diff.....          1                 0  
max  raw eQQ diff.....          7                 9   

mean eCDF diff........   0.025364          0.021893  
med  eCDF diff........   0.022193          0.020349  
max  eCDF diff........   0.065177          0.061047   

var ratio (Tr/Co).....     1.0278             1.083  
T-test p-value........    0.26594           0.84975  
KS Bootstrap p-value..      0.526             0.355  
KS Naive p-value......     0.7481           0.54314  
KS Statistic..........   0.065177          0.061047 
Code
check_balance <- MatchBalance(
    formul = treat ~ age + educ + black + hisp + married + nodegr + re74  + re75,
    match.out = psm1,
    nboots = 1000,data = lalonde
)
#> 
#> ***** (V1) age *****
#>                        Before Matching        After Matching
#> mean treatment........     25.816             25.816 
#> mean control..........     25.054             25.692 
#> std mean diff.........     10.655             1.7342 
#> 
#> mean raw eQQ diff.....    0.94054            0.73837 
#> med  raw eQQ diff.....          1                  0 
#> max  raw eQQ diff.....          7                  9 
#> 
#> mean eCDF diff........   0.025364           0.021893 
#> med  eCDF diff........   0.022193           0.020349 
#> max  eCDF diff........   0.065177           0.061047 
#> 
#> var ratio (Tr/Co).....     1.0278              1.083 
#> T-test p-value........    0.26594            0.84975 
#> KS Bootstrap p-value..      0.514              0.364 
#> KS Naive p-value......     0.7481            0.54314 
#> KS Statistic..........   0.065177           0.061047 
#> 
#> 
#> ***** (V2) educ *****
#>                        Before Matching        After Matching
#> mean treatment........     10.346             10.346 
#> mean control..........     10.088             10.146 
#> std mean diff.........     12.806             9.9664 
#> 
#> mean raw eQQ diff.....    0.40541            0.23256 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          2                  2 
#> 
#> mean eCDF diff........   0.028698           0.016611 
#> med  eCDF diff........   0.012682           0.010174 
#> max  eCDF diff........    0.12651           0.061047 
#> 
#> var ratio (Tr/Co).....     1.5513             1.2344 
#> T-test p-value........    0.15017             0.1842 
#> KS Bootstrap p-value..      0.003              0.183 
#> KS Naive p-value......   0.062873            0.54314 
#> KS Statistic..........    0.12651           0.061047 
#> 
#> 
#> ***** (V3) black *****
#>                        Before Matching        After Matching
#> mean treatment........    0.84324            0.84324 
#> mean control..........    0.82692            0.86847 
#> std mean diff.........     4.4767            -6.9194 
#> 
#> mean raw eQQ diff.....   0.016216           0.026163 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........  0.0081601           0.013081 
#> med  eCDF diff........  0.0081601           0.013081 
#> max  eCDF diff........    0.01632           0.026163 
#> 
#> var ratio (Tr/Co).....    0.92503             1.1572 
#> T-test p-value........    0.64736            0.40214 
#> 
#> 
#> ***** (V4) hisp *****
#>                        Before Matching        After Matching
#> mean treatment........   0.059459           0.059459 
#> mean control..........    0.10769            0.04955 
#> std mean diff.........    -20.341             4.1792 
#> 
#> mean raw eQQ diff.....   0.048649           0.011628 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.024116           0.005814 
#> med  eCDF diff........   0.024116           0.005814 
#> max  eCDF diff........   0.048233           0.011628 
#> 
#> var ratio (Tr/Co).....    0.58288             1.1875 
#> T-test p-value........   0.064043            0.46063 
#> 
#> 
#> ***** (V5) married *****
#>                        Before Matching        After Matching
#> mean treatment........    0.18919            0.18919 
#> mean control..........    0.15385            0.18423 
#> std mean diff.........     8.9995             1.2617 
#> 
#> mean raw eQQ diff.....   0.037838           0.026163 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.017672           0.013081 
#> med  eCDF diff........   0.017672           0.013081 
#> max  eCDF diff........   0.035343           0.026163 
#> 
#> var ratio (Tr/Co).....     1.1802             1.0207 
#> T-test p-value........    0.33425            0.89497 
#> 
#> 
#> ***** (V6) nodegr *****
#>                        Before Matching        After Matching
#> mean treatment........    0.70811            0.70811 
#> mean control..........    0.83462            0.76757 
#> std mean diff.........    -27.751            -13.043 
#> 
#> mean raw eQQ diff.....    0.12432           0.043605 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.063254           0.021802 
#> med  eCDF diff........   0.063254           0.021802 
#> max  eCDF diff........    0.12651           0.043605 
#> 
#> var ratio (Tr/Co).....     1.4998             1.1585 
#> T-test p-value........  0.0020368          0.0071385 
#> 
#> 
#> ***** (V7) re74 *****
#>                        Before Matching        After Matching
#> mean treatment........     2095.6             2095.6 
#> mean control..........       2107             2193.3 
#> std mean diff.........   -0.23437            -2.0004 
#> 
#> mean raw eQQ diff.....     487.98             869.16 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....       8413              10305 
#> 
#> mean eCDF diff........   0.019223           0.054701 
#> med  eCDF diff........     0.0158           0.050872 
#> max  eCDF diff........   0.047089            0.12209 
#> 
#> var ratio (Tr/Co).....     0.7381            0.75054 
#> T-test p-value........    0.98186            0.84996 
#> KS Bootstrap p-value..      0.575         < 2.22e-16 
#> KS Naive p-value......    0.97023           0.011858 
#> KS Statistic..........   0.047089            0.12209 
#> 
#> 
#> ***** (V8) re75 *****
#>                        Before Matching        After Matching
#> mean treatment........     1532.1             1532.1 
#> mean control..........     1266.9             2179.9 
#> std mean diff.........     8.2363            -20.125 
#> 
#> mean raw eQQ diff.....     367.61             590.34 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....     2110.2             8092.9 
#> 
#> mean eCDF diff........   0.050834           0.050338 
#> med  eCDF diff........   0.061954           0.049419 
#> max  eCDF diff........    0.10748           0.098837 
#> 
#> var ratio (Tr/Co).....     1.0763            0.56563 
#> T-test p-value........    0.38527           0.079002 
#> KS Bootstrap p-value..      0.049               0.01 
#> KS Naive p-value......    0.16449           0.069435 
#> KS Statistic..........    0.10748           0.098837 
#> 
#> 
#> Before Matching Minimum p.value: 0.0020368 
#> Variable Name(s): nodegr  Number(s): 6 
#> 
#> After Matching Minimum p.value: < 2.22e-16 
#> Variable Name(s): re74  Number(s): 7
Code
# age 变平衡了
qqplot(lalonde$age[psm1$index.control],lalonde$age[psm1$index.treated])
abline(a=0,b=1)

Code

# re74 更不平衡了
qqplot(lalonde$re74[psm1$index.control],lalonde$re74[psm1$index.treated])
abline(a=0,b=1)

Code
# # The covariates we want to match on
x <- cbind(age , educ , black , hisp , married , nodegr , re74  , re75)

# The covariates we want to obtain balance on
BalanceMatrix = x
set.seed(100)



# Genetic Matching 自动适配平衡
gen_match <- GenMatch(Tr=treat,
                      X=glm_ps$fitted.values,
                      BalanceMatrix = x,
                      estimand = "ATT")
#> 
#> 
#> Sat Aug 31 15:02:33 2024
#> Domains:
#>  0.000000e+00   <=  X1   <=    1.000000e+03 
#> 
#> Data Type: Floating Point
#> Operators (code number, name, population) 
#>  (1) Cloning...........................  15
#>  (2) Uniform Mutation..................  12
#>  (3) Boundary Mutation.................  12
#>  (4) Non-Uniform Mutation..............  12
#>  (5) Polytope Crossover................  12
#>  (6) Simple Crossover..................  12
#>  (7) Whole Non-Uniform Mutation........  12
#>  (8) Heuristic Crossover...............  12
#>  (9) Local-Minimum Crossover...........  0
#> 
#> SOFT Maximum Number of Generations: 100
#> Maximum Nonchanging Generations: 4
#> Population size       : 100
#> Convergence Tolerance: 1.000000e-03
#> 
#> Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
#> Not Checking Gradients before Stopping.
#> Using Out of Bounds Individuals.
#> 
#> Maximization Problem.
#> GENERATION: 0 (initializing the population)
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 100, #Total UniqueCount: 100
#> var 1:
#> best............ 3.729259e+01
#> mean............ 4.810920e+02
#> variance........ 7.636997e+04
#> 
#> GENERATION: 1
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 61, #Total UniqueCount: 161
#> var 1:
#> best............ 3.729259e+01
#> mean............ 2.015230e+02
#> variance........ 6.039634e+04
#> 
#> GENERATION: 2
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 56, #Total UniqueCount: 217
#> var 1:
#> best............ 3.729259e+01
#> mean............ 9.873041e+01
#> variance........ 3.291501e+04
#> 
#> GENERATION: 3
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 56, #Total UniqueCount: 273
#> var 1:
#> best............ 3.729259e+01
#> mean............ 1.042351e+02
#> variance........ 2.899583e+04
#> 
#> GENERATION: 4
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 58, #Total UniqueCount: 331
#> var 1:
#> best............ 3.729259e+01
#> mean............ 1.050645e+02
#> variance........ 2.721152e+04
#> 
#> GENERATION: 5
#> Lexical Fit..... 1.541148e-02  2.397991e-02  2.418950e-02  2.418950e-02  1.729273e-01  1.956942e-01  3.942807e-01  3.942807e-01  6.059370e-01  6.059370e-01  6.074259e-01  6.137460e-01  8.414918e-01  8.538318e-01  9.621995e-01  9.621995e-01  
#> #unique......... 58, #Total UniqueCount: 389
#> var 1:
#> best............ 2.941808e-01
#> mean............ 8.418536e+01
#> variance........ 1.578216e+04
#> 
#> GENERATION: 6
#> Lexical Fit..... 4.145421e-02  4.145421e-02  4.499068e-02  4.499068e-02  1.608408e-01  1.806532e-01  2.764640e-01  4.729068e-01  4.729068e-01  6.940013e-01  6.940013e-01  7.762162e-01  8.635587e-01  8.667712e-01  8.667712e-01  9.507460e-01  
#> #unique......... 62, #Total UniqueCount: 451
#> var 1:
#> best............ 7.310932e-02
#> mean............ 9.403781e+01
#> variance........ 2.447312e+04
#> 
#> GENERATION: 7
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.419097e-02  6.248288e-02  1.198352e-01  2.880778e-01  3.513309e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.836843e-01  8.920699e-01  8.920699e-01  
#> #unique......... 61, #Total UniqueCount: 512
#> var 1:
#> best............ 8.760968e-02
#> mean............ 1.016562e+02
#> variance........ 4.437002e+04
#> 
#> GENERATION: 8
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 60, #Total UniqueCount: 572
#> var 1:
#> best............ 8.627748e-02
#> mean............ 7.473970e+01
#> variance........ 2.676016e+04
#> 
#> GENERATION: 9
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 55, #Total UniqueCount: 627
#> var 1:
#> best............ 8.627748e-02
#> mean............ 4.192386e+01
#> variance........ 1.737331e+04
#> 
#> GENERATION: 10
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 51, #Total UniqueCount: 678
#> var 1:
#> best............ 8.627748e-02
#> mean............ 6.242257e+01
#> variance........ 2.722981e+04
#> 
#> GENERATION: 11
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 49, #Total UniqueCount: 727
#> var 1:
#> best............ 8.627748e-02
#> mean............ 5.280735e+01
#> variance........ 1.784927e+04
#> 
#> GENERATION: 12
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 50, #Total UniqueCount: 777
#> var 1:
#> best............ 8.627748e-02
#> mean............ 5.682777e+01
#> variance........ 2.131922e+04
#> 
#> GENERATION: 13
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 53, #Total UniqueCount: 830
#> var 1:
#> best............ 8.627748e-02
#> mean............ 3.877940e+01
#> variance........ 1.599211e+04
#> 
#> 'wait.generations' limit reached.
#> No significant improvement in 4 generations.
#> 
#> Solution Lexical Fitness Value:
#> 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> 
#> Parameters at the Solution:
#> 
#>  X[ 1] : 8.627748e-02
#> 
#> Solution Found Generation 8
#> Number of Generations Run 13
#> 
#> Sat Aug 31 15:02:38 2024
#> Total run time : 0 hours 0 minutes and 5 seconds

PSM <-  Match(Y=re78,
             Tr = treat,
             X=glm_ps$fitted.values,
             estimand = "ATT",
             Weight.matrix = gen_match,
             replace = TRUE,)
summary(PSM)
#> 
#> Estimate...  2439.3 
#> AI SE......  813.4 
#> T-stat.....  2.9989 
#> p.val......  0.0027099 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  489

check_balance2 <- MatchBalance(
    formul = treat ~ age + educ + black + hisp + married + nodegr + re74  + re75,
    match.out = PSM,
    nboots = 1000,data = lalonde
)
#> 
#> ***** (V1) age *****
#>                        Before Matching        After Matching
#> mean treatment........     25.816             25.816 
#> mean control..........     25.054             25.217 
#> std mean diff.........     10.655             8.3769 
#> 
#> mean raw eQQ diff.....    0.94054            0.46217 
#> med  raw eQQ diff.....          1                  0 
#> max  raw eQQ diff.....          7                  9 
#> 
#> mean eCDF diff........   0.025364           0.012952 
#> med  eCDF diff........   0.022193           0.010225 
#> max  eCDF diff........   0.065177            0.03681 
#> 
#> var ratio (Tr/Co).....     1.0278             1.2224 
#> T-test p-value........    0.26594             0.3519 
#> KS Bootstrap p-value..      0.503              0.734 
#> KS Naive p-value......     0.7481            0.89488 
#> KS Statistic..........   0.065177            0.03681 
#> 
#> 
#> ***** (V2) educ *****
#>                        Before Matching        After Matching
#> mean treatment........     10.346             10.346 
#> mean control..........     10.088             10.188 
#> std mean diff.........     12.806             7.8605 
#> 
#> mean raw eQQ diff.....    0.40541            0.17587 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          2                  2 
#> 
#> mean eCDF diff........   0.028698           0.012562 
#> med  eCDF diff........   0.012682           0.010225 
#> max  eCDF diff........    0.12651            0.03681 
#> 
#> var ratio (Tr/Co).....     1.5513             1.2791 
#> T-test p-value........    0.15017             0.2847 
#> KS Bootstrap p-value..      0.011              0.456 
#> KS Naive p-value......   0.062873            0.89488 
#> KS Statistic..........    0.12651            0.03681 
#> 
#> 
#> ***** (V3) black *****
#>                        Before Matching        After Matching
#> mean treatment........    0.84324            0.84324 
#> mean control..........    0.82692              0.868 
#> std mean diff.........     4.4767            -6.7917 
#> 
#> mean raw eQQ diff.....   0.016216           0.034765 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........  0.0081601           0.017382 
#> med  eCDF diff........  0.0081601           0.017382 
#> max  eCDF diff........    0.01632           0.034765 
#> 
#> var ratio (Tr/Co).....    0.92503             1.1537 
#> T-test p-value........    0.64736            0.41503 
#> 
#> 
#> ***** (V4) hisp *****
#>                        Before Matching        After Matching
#> mean treatment........   0.059459           0.059459 
#> mean control..........    0.10769           0.057132 
#> std mean diff.........    -20.341            0.98148 
#> 
#> mean raw eQQ diff.....   0.048649            0.00818 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.024116            0.00409 
#> med  eCDF diff........   0.024116            0.00409 
#> max  eCDF diff........   0.048233            0.00818 
#> 
#> var ratio (Tr/Co).....    0.58288             1.0382 
#> T-test p-value........   0.064043            0.86677 
#> 
#> 
#> ***** (V5) married *****
#>                        Before Matching        After Matching
#> mean treatment........    0.18919            0.18919 
#> mean control..........    0.15385            0.18101 
#> std mean diff.........     8.9995             2.0837 
#> 
#> mean raw eQQ diff.....   0.037838           0.018405 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.017672          0.0092025 
#> med  eCDF diff........   0.017672          0.0092025 
#> max  eCDF diff........   0.035343           0.018405 
#> 
#> var ratio (Tr/Co).....     1.1802             1.0348 
#> T-test p-value........    0.33425            0.83168 
#> 
#> 
#> ***** (V6) nodegr *****
#>                        Before Matching        After Matching
#> mean treatment........    0.70811            0.70811 
#> mean control..........    0.83462            0.75333 
#> std mean diff.........    -27.751            -9.9207 
#> 
#> mean raw eQQ diff.....    0.12432           0.034765 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.063254           0.017382 
#> med  eCDF diff........   0.063254           0.017382 
#> max  eCDF diff........    0.12651           0.034765 
#> 
#> var ratio (Tr/Co).....     1.4998             1.1123 
#> T-test p-value........  0.0020368            0.04177 
#> 
#> 
#> ***** (V7) re74 *****
#>                        Before Matching        After Matching
#> mean treatment........     2095.6             2095.6 
#> mean control..........       2107             2018.1 
#> std mean diff.........   -0.23437             1.5857 
#> 
#> mean raw eQQ diff.....     487.98             648.91 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....       8413              10305 
#> 
#> mean eCDF diff........   0.019223           0.037077 
#> med  eCDF diff........     0.0158           0.033742 
#> max  eCDF diff........   0.047089           0.087935 
#> 
#> var ratio (Tr/Co).....     0.7381            0.86668 
#> T-test p-value........    0.98186            0.87945 
#> KS Bootstrap p-value..      0.555              0.002 
#> KS Naive p-value......    0.97023           0.045591 
#> KS Statistic..........   0.047089           0.087935 
#> 
#> 
#> ***** (V8) re75 *****
#>                        Before Matching        After Matching
#> mean treatment........     1532.1             1532.1 
#> mean control..........     1266.9             2079.5 
#> std mean diff.........     8.2363            -17.005 
#> 
#> mean raw eQQ diff.....     367.61             532.46 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....     2110.2             8092.9 
#> 
#> mean eCDF diff........   0.050834           0.040137 
#> med  eCDF diff........   0.061954             0.0409 
#> max  eCDF diff........    0.10748           0.083845 
#> 
#> var ratio (Tr/Co).....     1.0763            0.64518 
#> T-test p-value........    0.38527            0.12154 
#> KS Bootstrap p-value..      0.038               0.02 
#> KS Naive p-value......    0.16449            0.06428 
#> KS Statistic..........    0.10748           0.083845 
#> 
#> 
#> Before Matching Minimum p.value: 0.0020368 
#> Variable Name(s): nodegr  Number(s): 6 
#> 
#> After Matching Minimum p.value: 0.002 
#> Variable Name(s): re74  Number(s): 7

# age 变平衡了
qqplot(lalonde$age[PSM$index.control],lalonde$age[PSM$index.treated])
abline(a=0,b=1)

Code

# re74 也变平衡了
qqplot(lalonde$re74[PSM$index.control],lalonde$re74[PSM$index.treated])
abline(a=0,b=1)

24.4.3 敏感性分析

Code
library(rbounds)
psens(x =lalonde[PSM$index.treated,"re78"],
      y =lalonde[PSM$index.control,"re78"] ,
      Gamma = 2,
      GammaInc = 0.1)
#> 
#>  Rosenbaum Sensitivity Test for Wilcoxon Signed Rank P-Value 
#>  
#> Unconfounded estimate ....  1e-04 
#> 
#>  Gamma Lower bound Upper bound
#>    1.0       1e-04      0.0001
#>    1.1       0e+00      0.0015
#>    1.2       0e+00      0.0142
#>    1.3       0e+00      0.0693
#>    1.4       0e+00      0.2048
#>    1.5       0e+00      0.4152
#>    1.6       0e+00      0.6393
#>    1.7       0e+00      0.8140
#>    1.8       0e+00      0.9191
#>    1.9       0e+00      0.9699
#>    2.0       0e+00      0.9903
#> 
#>  Note: Gamma is Odds of Differential Assignment To
#>  Treatment Due to Unobserved Factors 
#> 
# psens

PSM(Y=re78)使用psens()进行Wilcoxon 符合秩检验,当τ=1.3,p值就大于0.05了,说明处理发生比为1.3时,就可以改变原先对于处理效应的结论,也就是说,这个隐藏性偏差不必太大就可以改变原来的结论,因此分析结果对隐藏性排除的影响非常敏感,结论不可靠。

PSM(Y=re78)使用Hodges-Lehmann点估计检验法 hlsens() ,当τ=1.5,其95%置信区间包含零,说明此时处理效应是无效的。说明处理发生比为1.5时,隐藏性偏差就可以改变原来的结论,因此匹配后的结论不可靠。

Code
x = lalonde[PSM$index.treated, "re78"]
y = lalonde[PSM$index.control, "re78"]
hlsens(x, y,Gamma = 2,GammaInc = 0.1)
#> 
#>  Rosenbaum Sensitivity Test for Hodges-Lehmann Point Estimate 
#>  
#> Unconfounded estimate ....  1527.95 
#> 
#>  Gamma Lower bound Upper bound
#>    1.0 1527.900000      1527.9
#>    1.1  917.250000      1580.2
#>    1.2  608.050000      1918.7
#>    1.3  338.450000      2141.0
#>    1.4  114.850000      2407.3
#>    1.5   -0.050046      2631.4
#>    1.6 -154.050000      2850.3
#>    1.7 -378.150000      3072.7
#>    1.8 -545.350000      3258.1
#>    1.9 -706.350000      3474.2
#>    2.0 -867.650000      3678.9
#> 
#>  Note: Gamma is Odds of Differential Assignment To
#>  Treatment Due to Unobserved Factors 
#> 

共同支持域的查验

Code
sum(glm_ps$fitted.values[lalonde$treat==1]> 
        max(glm_ps$fitted.values[lalonde$treat==0]))
#> [1] 4

sum(glm_ps$fitted.values[lalonde$treat==1]< 
        min(glm_ps$fitted.values[lalonde$treat==0]))
#> [1] 0

丢弃的实验组样本共有4个。185-181

Code
attach(lalonde)
summary(PSM)
#> 
#> Estimate...  2439.3 
#> AI SE......  813.4 
#> T-stat.....  2.9989 
#> p.val......  0.0027099 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  489
PSM_CS <-  Match(Y=re78,
             Tr = treat,
             X=glm_ps$fitted.values,
             estimand = "ATT",
             Weight.matrix = gen_match,
             replace = TRUE,
             CommonSupport = TRUE)
summary(PSM_CS)
#> 
#> Estimate...  2330 
#> AI SE......  821.6 
#> T-stat.....  2.836 
#> p.val......  0.0045684 
#> 
#> Original number of observations..............  430 
#> Original number of treated obs...............  181 
#> Matched number of observations...............  181 
#> Matched number of observations  (unweighted).  468
detach(lalonde)

有查验共同支持域的ATT(2330),与无查验共同支持域(2439.3)存在差异,因此必须重新改进倾向值分析。

24.4.4 MatchIt

24.4.4.1 匹配数据

Code
library(MatchIt)
NM <- MatchIt::matchit(
    formula = treat ~ age + educ + black + hisp + married + nodegr + re74  + re75,
    data = lalonde,
    method = "nearest", # 最近邻匹配
    ratio = 1, # 1:1
    replace = FALSE
)
summary(NM)
#> 
#> Call:
#> MatchIt::matchit(formula = treat ~ age + educ + black + hisp + 
#>     married + nodegr + re74 + re75, data = lalonde, method = "nearest", 
#>     replace = FALSE, ratio = 1)
#> 
#> Summary of Balance for All Data:
#>          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance        0.4377        0.4001          0.3935     1.0471    0.1117
#> age            25.8162       25.0538          0.1066     1.0278    0.0254
#> educ           10.3459       10.0885          0.1281     1.5513    0.0287
#> black           0.8432        0.8269          0.0449          .    0.0163
#> hisp            0.0595        0.1077         -0.2040          .    0.0482
#> married         0.1892        0.1538          0.0902          .    0.0353
#> nodegr          0.7081        0.8346         -0.2783          .    0.1265
#> re74         2095.5740     2107.0268         -0.0023     0.7381    0.0192
#> re75         1532.0556     1266.9092          0.0824     1.0763    0.0508
#>          eCDF Max
#> distance   0.2140
#> age        0.0652
#> educ       0.1265
#> black      0.0163
#> hisp       0.0482
#> married    0.0353
#> nodegr     0.1265
#> re74       0.0471
#> re75       0.1075
#> 
#> Summary of Balance for Matched Data:
#>          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance        0.4377        0.4267          0.1152     1.0987    0.0298
#> age            25.8162       25.8757         -0.0083     0.9551    0.0121
#> educ           10.3459       10.1459          0.0995     1.3748    0.0220
#> black           0.8432        0.8486         -0.0149          .    0.0054
#> hisp            0.0595        0.0649         -0.0229          .    0.0054
#> married         0.1892        0.2000         -0.0276          .    0.0108
#> nodegr          0.7081        0.7730         -0.1427          .    0.0649
#> re74         2095.5740     1659.5326          0.0892     1.1752    0.0351
#> re75         1532.0556     1359.6980          0.0535     0.9270    0.0502
#>          eCDF Max Std. Pair Dist.
#> distance   0.1027          0.1302
#> age        0.0432          0.8711
#> educ       0.0757          0.6533
#> black      0.0054          0.4906
#> hisp       0.0054          0.2057
#> married    0.0108          0.7177
#> nodegr     0.0649          0.2378
#> re74       0.0865          0.6297
#> re75       0.1081          0.7019
#> 
#> Sample Sizes:
#>           Control Treated
#> All           260     185
#> Matched       185     185
#> Unmatched      75       0
#> Discarded       0       0

24.4.4.2 评估质量

Code
# 散点图展示了匹配后实验组和对照组样本倾向值的分布,凸显了分布平衡与不平衡,分布缺乏重合
plot(NM,type = "jitter")

#> To identify the units, use first mouse button; to stop, use second.
Code
# QQ图 展示了 匹配前(All)匹配后(Matched)的平衡情况
plot(NM,type = "QQ")

Code

# 直方图展示了匹配前后倾向值的分布
plot(NM,type = "hist")

Code
# 标准化平衡统计值,Std. Mean Diff.
summary(NM,standardize = TRUE)
#> 
#> Call:
#> MatchIt::matchit(formula = treat ~ age + educ + black + hisp + 
#>     married + nodegr + re74 + re75, data = lalonde, method = "nearest", 
#>     replace = FALSE, ratio = 1)
#> 
#> Summary of Balance for All Data:
#>          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance        0.4377        0.4001          0.3935     1.0471    0.1117
#> age            25.8162       25.0538          0.1066     1.0278    0.0254
#> educ           10.3459       10.0885          0.1281     1.5513    0.0287
#> black           0.8432        0.8269          0.0449          .    0.0163
#> hisp            0.0595        0.1077         -0.2040          .    0.0482
#> married         0.1892        0.1538          0.0902          .    0.0353
#> nodegr          0.7081        0.8346         -0.2783          .    0.1265
#> re74         2095.5740     2107.0268         -0.0023     0.7381    0.0192
#> re75         1532.0556     1266.9092          0.0824     1.0763    0.0508
#>          eCDF Max
#> distance   0.2140
#> age        0.0652
#> educ       0.1265
#> black      0.0163
#> hisp       0.0482
#> married    0.0353
#> nodegr     0.1265
#> re74       0.0471
#> re75       0.1075
#> 
#> Summary of Balance for Matched Data:
#>          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance        0.4377        0.4267          0.1152     1.0987    0.0298
#> age            25.8162       25.8757         -0.0083     0.9551    0.0121
#> educ           10.3459       10.1459          0.0995     1.3748    0.0220
#> black           0.8432        0.8486         -0.0149          .    0.0054
#> hisp            0.0595        0.0649         -0.0229          .    0.0054
#> married         0.1892        0.2000         -0.0276          .    0.0108
#> nodegr          0.7081        0.7730         -0.1427          .    0.0649
#> re74         2095.5740     1659.5326          0.0892     1.1752    0.0351
#> re75         1532.0556     1359.6980          0.0535     0.9270    0.0502
#>          eCDF Max Std. Pair Dist.
#> distance   0.1027          0.1302
#> age        0.0432          0.8711
#> educ       0.0757          0.6533
#> black      0.0054          0.4906
#> hisp       0.0054          0.2057
#> married    0.0108          0.7177
#> nodegr     0.0649          0.2378
#> re74       0.0865          0.6297
#> re75       0.1081          0.7019
#> 
#> Sample Sizes:
#>           Control Treated
#> All           260     185
#> Matched       185     185
#> Unmatched      75       0
#> Discarded       0       0

继续改进,调整模型和协变量

24.4.4.3 计算平均处理效应

为了简化步骤,以当前的结果进行匹配后分析。

Code

# 提取匹配后的样本
mData <- match.data(NM,group = "all")
mData_trt <- match.data(NM,group = "treat")
mData_ctrl <- match.data(NM,group = "control")
 
# 包从CRAN剔除了
Code
library(rbounds)
psens(x =mData_trt$re78,
      y =mData_ctrl$re78 ,Gamma = 2,GammaInc = 0.1)
#> 
#>  Rosenbaum Sensitivity Test for Wilcoxon Signed Rank P-Value 
#>  
#> Unconfounded estimate ....  0.0199 
#> 
#>  Gamma Lower bound Upper bound
#>    1.0      0.0199      0.0199
#>    1.1      0.0048      0.0639
#>    1.2      0.0010      0.1491
#>    1.3      0.0002      0.2749
#>    1.4      0.0000      0.4248
#>    1.5      0.0000      0.5755
#>    1.6      0.0000      0.7076
#>    1.7      0.0000      0.8108
#>    1.8      0.0000      0.8844
#>    1.9      0.0000      0.9328
#>    2.0      0.0000      0.9627
#> 
#>  Note: Gamma is Odds of Differential Assignment To
#>  Treatment Due to Unobserved Factors 
#> 

hlsens(x =mData_trt$re78,
      y =mData_ctrl$re78 ,Gamma = 2,GammaInc = 0.1)
#> 
#>  Rosenbaum Sensitivity Test for Hodges-Lehmann Point Estimate 
#>  
#> Unconfounded estimate ....  1435.08 
#> 
#>  Gamma Lower bound Upper bound
#>    1.0  1.4351e+03      1435.1
#>    1.1  8.0828e+02      1515.9
#>    1.2  4.6298e+02      1809.0
#>    1.3  1.8238e+02      2178.1
#>    1.4 -2.0266e-02      2439.4
#>    1.5 -2.4892e+02      2678.5
#>    1.6 -4.5162e+02      2937.5
#>    1.7 -6.7212e+02      3184.0
#>    1.8 -8.9732e+02      3462.6
#>    1.9 -1.1328e+03      3651.1
#>    2.0 -1.3022e+03      3848.9
#> 
#>  Note: Gamma is Odds of Differential Assignment To
#>  Treatment Due to Unobserved Factors 
#>