24 匹配（Matching）

倾向评分匹配根据他们的倾向评分将治疗组中的每个人与对照组中的个体相匹配。对于每个人来说，倾向得分可以直观地视为从一系列协变量（和潜在混杂因素confounding）计算出来的最近治疗的概率。两个人，一个来自治疗组，一个来自对照组，如果他们的倾向评分之间的差异很小，则被认为是匹配的。不匹配的参与者将被丢弃。

24.1 数据来源

https://hbiostat.org/data/

https://hbiostat.org/data/repo/rhc

Code

rhc <- read_csv("data/PS/rhc.csv")

rhc %>% 
    select(age, sex, race, edu, income, ninsclas, cat1,das2d3pc, dnr1, 
           ca, surv2md1, aps1, scoma1, wtkilo1, temp1, meanbp1, resp1,
           hrt1, pafi1, paco21, ph1, wblc1, hema1, sod1, pot1, crea1,
           bili1, alb1, resp, card, neuro, gastr, renal, meta, hema,
           seps, trauma, ortho, cardiohx, chfhx, dementhx, psychhx,
           chrpulhx, renalhx, liverhx, gibledhx, malighx, immunhx, 
           transhx, amihx, swang1) %>% 
    mutate(
        across(where(is.character), as.factor),
        across(ends_with("hx"), as.factor)
    )->
    rhc2
rhc2 %>% select(where(is.factor)) %>% 
    map(table)
#> $sex
#> 
#> Female   Male 
#>   2543   3192 
#> 
#> $race
#> 
#> black other white 
#>   920   355  4460 
#> 
#> $income
#> 
#>   $11-$25k   $25-$50k     > $50k Under $11k 
#>       1165        893        451       3226 
#> 
#> $ninsclas
#> 
#>            Medicaid            Medicare Medicare & Medicaid        No insurance 
#>                 647                1458                 374                 322 
#>             Private  Private & Medicare 
#>                1698                1236 
#> 
#> $cat1
#> 
#>               ARF               CHF         Cirrhosis      Colon Cancer 
#>              2490               456               224                 7 
#>              Coma              COPD       Lung Cancer MOSF w/Malignancy 
#>               436               457                39               399 
#>     MOSF w/Sepsis 
#>              1227 
#> 
#> $dnr1
#> 
#>   No  Yes 
#> 5081  654 
#> 
#> $ca
#> 
#> Metastatic         No        Yes 
#>        384       4379        972 
#> 
#> $resp
#> 
#>   No  Yes 
#> 3622 2113 
#> 
#> $card
#> 
#>   No  Yes 
#> 3804 1931 
#> 
#> $neuro
#> 
#>   No  Yes 
#> 5042  693 
#> 
#> $gastr
#> 
#>   No  Yes 
#> 4793  942 
#> 
#> $renal
#> 
#>   No  Yes 
#> 5440  295 
#> 
#> $meta
#> 
#>   No  Yes 
#> 5470  265 
#> 
#> $hema
#> 
#>   No  Yes 
#> 5381  354 
#> 
#> $seps
#> 
#>   No  Yes 
#> 4704 1031 
#> 
#> $trauma
#> 
#>   No  Yes 
#> 5683   52 
#> 
#> $ortho
#> 
#>   No  Yes 
#> 5728    7 
#> 
#> $cardiohx
#> 
#>    0    1 
#> 4722 1013 
#> 
#> $chfhx
#> 
#>    0    1 
#> 4714 1021 
#> 
#> $dementhx
#> 
#>    0    1 
#> 5171  564 
#> 
#> $psychhx
#> 
#>    0    1 
#> 5349  386 
#> 
#> $chrpulhx
#> 
#>    0    1 
#> 4646 1089 
#> 
#> $renalhx
#> 
#>    0    1 
#> 5480  255 
#> 
#> $liverhx
#> 
#>    0    1 
#> 5334  401 
#> 
#> $gibledhx
#> 
#>    0    1 
#> 5550  185 
#> 
#> $malighx
#> 
#>    0    1 
#> 4419 1316 
#> 
#> $immunhx
#> 
#>    0    1 
#> 4192 1543 
#> 
#> $transhx
#> 
#>    0    1 
#> 5073  662 
#> 
#> $amihx
#> 
#>    0    1 
#> 5535  200 
#> 
#> $swang1
#> 
#> No RHC    RHC 
#>   3551   2184

rhc2 %>% select(where(is.numeric)) %>% 
    summary()
#>       age              edu           das2d3pc        surv2md1     
#>  Min.   : 18.04   Min.   : 0.00   Min.   :11.00   Min.   :0.0000  
#>  1st Qu.: 50.15   1st Qu.:10.00   1st Qu.:16.06   1st Qu.:0.4709  
#>  Median : 64.05   Median :12.00   Median :19.75   Median :0.6280  
#>  Mean   : 61.38   Mean   :11.68   Mean   :20.50   Mean   :0.5925  
#>  3rd Qu.: 73.93   3rd Qu.:13.00   3rd Qu.:23.43   3rd Qu.:0.7430  
#>  Max.   :101.85   Max.   :30.00   Max.   :33.00   Max.   :0.9620  
#>       aps1            scoma1       wtkilo1           temp1      
#>  Min.   :  3.00   Min.   :  0   Min.   :  0.00   Min.   :27.00  
#>  1st Qu.: 41.00   1st Qu.:  0   1st Qu.: 56.30   1st Qu.:36.09  
#>  Median : 54.00   Median :  0   Median : 70.00   Median :38.09  
#>  Mean   : 54.67   Mean   : 21   Mean   : 67.83   Mean   :37.62  
#>  3rd Qu.: 67.00   3rd Qu.: 41   3rd Qu.: 83.70   3rd Qu.:39.00  
#>  Max.   :147.00   Max.   :100   Max.   :244.00   Max.   :43.00  
#>     meanbp1           resp1             hrt1           pafi1      
#>  Min.   :  0.00   Min.   :  0.00   Min.   :  0.0   Min.   : 11.6  
#>  1st Qu.: 50.00   1st Qu.: 14.00   1st Qu.: 97.0   1st Qu.:133.3  
#>  Median : 63.00   Median : 30.00   Median :124.0   Median :202.5  
#>  Mean   : 78.52   Mean   : 28.09   Mean   :115.2   Mean   :222.3  
#>  3rd Qu.:115.00   3rd Qu.: 38.00   3rd Qu.:141.0   3rd Qu.:316.6  
#>  Max.   :259.00   Max.   :100.00   Max.   :250.0   Max.   :937.5  
#>      paco21            ph1            wblc1             hema1      
#>  Min.   :  1.00   Min.   :6.579   Min.   :  0.000   Min.   : 2.00  
#>  1st Qu.: 31.00   1st Qu.:7.340   1st Qu.:  8.398   1st Qu.:26.10  
#>  Median : 37.00   Median :7.400   Median : 14.100   Median :30.00  
#>  Mean   : 38.75   Mean   :7.388   Mean   : 15.645   Mean   :31.87  
#>  3rd Qu.: 42.00   3rd Qu.:7.460   3rd Qu.: 20.049   3rd Qu.:36.30  
#>  Max.   :156.00   Max.   :7.770   Max.   :192.000   Max.   :66.19  
#>       sod1            pot1            crea1              bili1         
#>  Min.   :101.0   Min.   : 1.100   Min.   : 0.09999   Min.   : 0.09999  
#>  1st Qu.:132.0   1st Qu.: 3.400   1st Qu.: 1.00000   1st Qu.: 0.79993  
#>  Median :136.0   Median : 3.800   Median : 1.50000   Median : 1.00977  
#>  Mean   :136.8   Mean   : 4.067   Mean   : 2.13302   Mean   : 2.26707  
#>  3rd Qu.:142.0   3rd Qu.: 4.600   3rd Qu.: 2.39990   3rd Qu.: 1.39990  
#>  Max.   :178.0   Max.   :11.898   Max.   :25.09766   Max.   :58.19531  
#>       alb1       
#>  Min.   : 0.300  
#>  1st Qu.: 2.600  
#>  Median : 3.500  
#>  Mean   : 3.093  
#>  3rd Qu.: 3.500  
#>  Max.   :29.000



library(tableone)

CreateTableOne(strata = "swang1",
               data = rhc2
                ) %>% print(.,showAllLevels = T,smd = T, addOverall = T)
#>                       Stratified by swang1
#>                        level               No RHC          RHC            
#>   n                                          3551            2184         
#>   age (mean (SD))                           61.76 (17.29)   60.75 (15.63) 
#>   sex (%)              Female                1637 ( 46.1)     906 ( 41.5) 
#>                        Male                  1914 ( 53.9)    1278 ( 58.5) 
#>   race (%)             black                  585 ( 16.5)     335 ( 15.3) 
#>                        other                  213 (  6.0)     142 (  6.5) 
#>                        white                 2753 ( 77.5)    1707 ( 78.2) 
#>   edu (mean (SD))                           11.57 (3.13)    11.86 (3.16)  
#>   income (%)           $11-$25k               713 ( 20.1)     452 ( 20.7) 
#>                        $25-$50k               500 ( 14.1)     393 ( 18.0) 
#>                        > $50k                 257 (  7.2)     194 (  8.9) 
#>                        Under $11k            2081 ( 58.6)    1145 ( 52.4) 
#>   ninsclas (%)         Medicaid               454 ( 12.8)     193 (  8.8) 
#>                        Medicare               947 ( 26.7)     511 ( 23.4) 
#>                        Medicare & Medicaid    251 (  7.1)     123 (  5.6) 
#>                        No insurance           186 (  5.2)     136 (  6.2) 
#>                        Private                967 ( 27.2)     731 ( 33.5) 
#>                        Private & Medicare     746 ( 21.0)     490 ( 22.4) 
#>   cat1 (%)             ARF                   1581 ( 44.5)     909 ( 41.6) 
#>                        CHF                    247 (  7.0)     209 (  9.6) 
#>                        Cirrhosis              175 (  4.9)      49 (  2.2) 
#>                        Colon Cancer             6 (  0.2)       1 (  0.0) 
#>                        Coma                   341 (  9.6)      95 (  4.3) 
#>                        COPD                   399 ( 11.2)      58 (  2.7) 
#>                        Lung Cancer             34 (  1.0)       5 (  0.2) 
#>                        MOSF w/Malignancy      241 (  6.8)     158 (  7.2) 
#>                        MOSF w/Sepsis          527 ( 14.8)     700 ( 32.1) 
#>   das2d3pc (mean (SD))                      20.37 (5.48)    20.70 (5.03)  
#>   dnr1 (%)             No                    3052 ( 85.9)    2029 ( 92.9) 
#>                        Yes                    499 ( 14.1)     155 (  7.1) 
#>   ca (%)               Metastatic             261 (  7.4)     123 (  5.6) 
#>                        No                    2652 ( 74.7)    1727 ( 79.1) 
#>                        Yes                    638 ( 18.0)     334 ( 15.3) 
#>   surv2md1 (mean (SD))                       0.61 (0.19)     0.57 (0.20)  
#>   aps1 (mean (SD))                          50.93 (18.81)   60.74 (20.27) 
#>   scoma1 (mean (SD))                        22.25 (31.37)   18.97 (28.26) 
#>   wtkilo1 (mean (SD))                       65.04 (29.50)   72.36 (27.73) 
#>   temp1 (mean (SD))                         37.63 (1.74)    37.59 (1.83)  
#>   meanbp1 (mean (SD))                       84.87 (38.87)   68.20 (34.24) 
#>   resp1 (mean (SD))                         28.98 (13.95)   26.65 (14.17) 
#>   hrt1 (mean (SD))                         112.87 (40.94)  118.93 (41.47) 
#>   pafi1 (mean (SD))                        240.63 (116.66) 192.43 (105.54)
#>   paco21 (mean (SD))                        39.95 (14.24)   36.79 (10.97) 
#>   ph1 (mean (SD))                            7.39 (0.11)     7.38 (0.11)  
#>   wblc1 (mean (SD))                         15.26 (11.41)   16.27 (12.55) 
#>   hema1 (mean (SD))                         32.70 (8.79)    30.51 (7.42)  
#>   sod1 (mean (SD))                         137.04 (7.68)   136.33 (7.60)  
#>   pot1 (mean (SD))                           4.08 (1.04)     4.05 (1.01)  
#>   crea1 (mean (SD))                          1.92 (2.03)     2.47 (2.05)  
#>   bili1 (mean (SD))                          2.00 (4.43)     2.71 (5.33)  
#>   alb1 (mean (SD))                           3.16 (0.67)     2.98 (0.93)  
#>   resp (%)             No                    2070 ( 58.3)    1552 ( 71.1) 
#>                        Yes                   1481 ( 41.7)     632 ( 28.9) 
#>   card (%)             No                    2544 ( 71.6)    1260 ( 57.7) 
#>                        Yes                   1007 ( 28.4)     924 ( 42.3) 
#>   neuro (%)            No                    2976 ( 83.8)    2066 ( 94.6) 
#>                        Yes                    575 ( 16.2)     118 (  5.4) 
#>   gastr (%)            No                    3029 ( 85.3)    1764 ( 80.8) 
#>                        Yes                    522 ( 14.7)     420 ( 19.2) 
#>   renal (%)            No                    3404 ( 95.9)    2036 ( 93.2) 
#>                        Yes                    147 (  4.1)     148 (  6.8) 
#>   meta (%)             No                    3379 ( 95.2)    2091 ( 95.7) 
#>                        Yes                    172 (  4.8)      93 (  4.3) 
#>   hema (%)             No                    3312 ( 93.3)    2069 ( 94.7) 
#>                        Yes                    239 (  6.7)     115 (  5.3) 
#>   seps (%)             No                    3036 ( 85.5)    1668 ( 76.4) 
#>                        Yes                    515 ( 14.5)     516 ( 23.6) 
#>   trauma (%)           No                    3533 ( 99.5)    2150 ( 98.4) 
#>                        Yes                     18 (  0.5)      34 (  1.6) 
#>   ortho (%)            No                    3548 ( 99.9)    2180 ( 99.8) 
#>                        Yes                      3 (  0.1)       4 (  0.2) 
#>   cardiohx (%)         0                     2984 ( 84.0)    1738 ( 79.6) 
#>                        1                      567 ( 16.0)     446 ( 20.4) 
#>   chfhx (%)            0                     2955 ( 83.2)    1759 ( 80.5) 
#>                        1                      596 ( 16.8)     425 ( 19.5) 
#>   dementhx (%)         0                     3138 ( 88.4)    2033 ( 93.1) 
#>                        1                      413 ( 11.6)     151 (  6.9) 
#>   psychhx (%)          0                     3265 ( 91.9)    2084 ( 95.4) 
#>                        1                      286 (  8.1)     100 (  4.6) 
#>   chrpulhx (%)         0                     2777 ( 78.2)    1869 ( 85.6) 
#>                        1                      774 ( 21.8)     315 ( 14.4) 
#>   renalhx (%)          0                     3402 ( 95.8)    2078 ( 95.1) 
#>                        1                      149 (  4.2)     106 (  4.9) 
#>   liverhx (%)          0                     3286 ( 92.5)    2048 ( 93.8) 
#>                        1                      265 (  7.5)     136 (  6.2) 
#>   gibledhx (%)         0                     3420 ( 96.3)    2130 ( 97.5) 
#>                        1                      131 (  3.7)      54 (  2.5) 
#>   malighx (%)          0                     2679 ( 75.4)    1740 ( 79.7) 
#>                        1                      872 ( 24.6)     444 ( 20.3) 
#>   immunhx (%)          0                     2644 ( 74.5)    1548 ( 70.9) 
#>                        1                      907 ( 25.5)     636 ( 29.1) 
#>   transhx (%)          0                     3216 ( 90.6)    1857 ( 85.0) 
#>                        1                      335 (  9.4)     327 ( 15.0) 
#>   amihx (%)            0                     3446 ( 97.0)    2089 ( 95.7) 
#>                        1                      105 (  3.0)      95 (  4.3) 
#>   swang1 (%)           No RHC                3551 (100.0)       0 (  0.0) 
#>                        RHC                      0 (  0.0)    2184 (100.0) 
#>                       Stratified by swang1
#>                        p      test SMD   
#>   n                                      
#>   age (mean (SD))       0.026       0.061
#>   sex (%)               0.001       0.093
#>                                          
#>   race (%)              0.425       0.036
#>                                          
#>                                          
#>   edu (mean (SD))       0.001       0.091
#>   income (%)           <0.001       0.142
#>                                          
#>                                          
#>                                          
#>   ninsclas (%)         <0.001       0.194
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>   cat1 (%)             <0.001       0.583
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>                                          
#>   das2d3pc (mean (SD))  0.023       0.063
#>   dnr1 (%)             <0.001       0.228
#>                                          
#>   ca (%)                0.001       0.107
#>                                          
#>                                          
#>   surv2md1 (mean (SD)) <0.001       0.198
#>   aps1 (mean (SD))     <0.001       0.501
#>   scoma1 (mean (SD))   <0.001       0.110
#>   wtkilo1 (mean (SD))  <0.001       0.256
#>   temp1 (mean (SD))     0.429       0.021
#>   meanbp1 (mean (SD))  <0.001       0.455
#>   resp1 (mean (SD))    <0.001       0.165
#>   hrt1 (mean (SD))     <0.001       0.147
#>   pafi1 (mean (SD))    <0.001       0.433
#>   paco21 (mean (SD))   <0.001       0.249
#>   ph1 (mean (SD))      <0.001       0.120
#>   wblc1 (mean (SD))     0.002       0.084
#>   hema1 (mean (SD))    <0.001       0.269
#>   sod1 (mean (SD))      0.001       0.092
#>   pot1 (mean (SD))      0.321       0.027
#>   crea1 (mean (SD))    <0.001       0.270
#>   bili1 (mean (SD))    <0.001       0.145
#>   alb1 (mean (SD))     <0.001       0.230
#>   resp (%)             <0.001       0.270
#>                                          
#>   card (%)             <0.001       0.295
#>                                          
#>   neuro (%)            <0.001       0.353
#>                                          
#>   gastr (%)            <0.001       0.121
#>                                          
#>   renal (%)            <0.001       0.116
#>                                          
#>   meta (%)              0.337       0.028
#>                                          
#>   hema (%)              0.029       0.062
#>                                          
#>   seps (%)             <0.001       0.234
#>                                          
#>   trauma (%)           <0.001       0.104
#>                                          
#>   ortho (%)             0.516       0.027
#>                                          
#>   cardiohx (%)         <0.001       0.116
#>                                          
#>   chfhx (%)             0.011       0.070
#>                                          
#>   dementhx (%)         <0.001       0.163
#>                                          
#>   psychhx (%)          <0.001       0.143
#>                                          
#>   chrpulhx (%)         <0.001       0.192
#>                                          
#>   renalhx (%)           0.268       0.032
#>                                          
#>   liverhx (%)           0.084       0.049
#>                                          
#>   gibledhx (%)          0.014       0.070
#>                                          
#>   malighx (%)          <0.001       0.101
#>                                          
#>   immunhx (%)           0.003       0.080
#>                                          
#>   transhx (%)          <0.001       0.170
#>                                          
#>   amihx (%)             0.007       0.074
#>                                          
#>   swang1 (%)           <0.001         NaN
#>

Variable name	Variable Definition
Age	Age
Sex	Sex
Race	Race
Edu	Years of education
Income	Income
Ninsclas	Medical insurance
Cat1	Primary disease category
Cat2	Secondary disease category
Categories of admission diagnosis:
Resp	Respiratory Diagnosis
Card	Cardiovascular Diagnosis
Neuro	Neurological Diagnosis
Gastr	Gastrointestinal Diagnosis
Renal	Renal Diagnosis
Meta	Metabolic Diagnosis
Hema	Hematologic Diagnosis
Seps	Sepsis Diagnosis
Trauma	Trauma Diagnosis
Ortho	Orthopedic Diagnosis

Adld3p	ADL
Das2d3pc	DASI ( Duke Activity Status Index)
Dnr1	DNR status on day1
Ca	Cancer
Surv2md1	Support model estimate of the prob. of surviving 2 months
Aps1	APACHE score
Scoma1	Glasgow Coma Score
Wtkilo1	Weight
Temp1	Temperature
Meanbp1	Mean blood pressure
Resp1	Respiratory rate
Hrt1	Heart rate
Pafi1	PaO2/FIO2 ratio
Paco21	PaCo2
Ph1	PH
Wblc1	WBC
Hema1	Hematocrit
Sod1	Sodium
Pot1	Potassium
Crea1	Creatinine
Bili1	Bilirubin
Alb1	Albumin
Urin1	Urine output
Categories of comorbidities illness:
Cardiohx	Acute MI, Peripheral Vascular Disease, Severe Cardiovascular Symptoms (NYHA-Class III), Very Severe Cardiovascular Symptoms (NYHA-Class IV)
Chfhx	Congestive Heart Failure
Dementhx	Dementia, Stroke or Cerebral Infarct, Parkinson�s Disease
Psychhx	Psychiatric History, Active Psychosis or Severe Depression
Chrpulhx	Chronic Pulmonary Disease, Severe Pulmonary Disease, Very Severe Pulmonary Disease
Renalhx	Chronic Renal Disease, Chronic Hemodialysis or Peritoneal Dialysis
Liverhx	Cirrhosis, Hepatic Failure
Gibledhx	Upper GI Bleeding
Malighx	Solid Tumor, Metastatic Disease, Chronic Leukemia/Myeloma, Acute Leukemia, Lymphoma
Immunhx	Immunosupperssion, Organ Transplant, HIV Positivity, Diabetes Mellitus Without End Organ Damage, Diabetes Mellitus With End Organ Damage, Connective Tissue Disease
Transhx	Transfer (> 24 Hours) from Another Hospital
Amihx	Definite Myocardial Infarction

Swang1	Right Heart Catheterization (rhc2)
Sadmdte	Study Admission Date
Dthdte	Date of Death
Lstctdte	Date of Last Contact
Dschdte	Hospital Discharge Date
Death	Death at any time up to 180 Days
Ptid	Patient ID

24.2 估计倾向得分

Code


library(tidymodels)
rhc2_formula <- swang1 ~ .



logit_ps <- logistic_reg() %>%
    fit(rhc2_formula, data = rhc2)

summary(logit_ps$fit)
#> 
#> Call:
#> stats::glm(formula = swang1 ~ ., family = stats::binomial, data = data)
#> 
#> Coefficients:
#>                               Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)                 17.2203728  3.1137929   5.530 3.20e-08 ***
#> age                         -0.0043737  0.0028619  -1.528 0.126457    
#> sexMale                      0.0478361  0.0679017   0.704 0.481128    
#> raceother                    0.0238498  0.1519260   0.157 0.875258    
#> racewhite                   -0.0480662  0.0939149  -0.512 0.608787    
#> edu                          0.0261939  0.0116913   2.240 0.025061 *  
#> income$25-$50k               0.0888363  0.1094906   0.811 0.417159    
#> income> $50k                 0.0312483  0.1382814   0.226 0.821220    
#> incomeUnder $11k             0.0548697  0.0868242   0.632 0.527411    
#> ninsclasMedicare             0.2328987  0.1351961   1.723 0.084948 .  
#> ninsclasMedicare & Medicaid  0.4012458  0.1691375   2.372 0.017677 *  
#> ninsclasNo insurance         0.5264711  0.1673868   3.145 0.001660 ** 
#> ninsclasPrivate              0.4138771  0.1247856   3.317 0.000911 ***
#> ninsclasPrivate & Medicare   0.3490696  0.1391972   2.508 0.012151 *  
#> cat1CHF                      0.7833692  0.1596299   4.907 9.23e-07 ***
#> cat1Cirrhosis               -1.1607975  0.2378358  -4.881 1.06e-06 ***
#> cat1Colon Cancer            -0.0864465  1.1193521  -0.077 0.938441    
#> cat1Coma                    -0.5829321  0.1819159  -3.204 0.001353 ** 
#> cat1COPD                    -0.4719080  0.1901740  -2.481 0.013085 *  
#> cat1Lung Cancer             -0.4138852  0.5445513  -0.760 0.447226    
#> cat1MOSF w/Malignancy        0.0143100  0.1614131   0.089 0.929356    
#> cat1MOSF w/Sepsis            0.5339948  0.0910519   5.865 4.50e-09 ***
#> das2d3pc                    -0.0020607  0.0066432  -0.310 0.756416    
#> dnr1Yes                     -0.6777513  0.1166464  -5.810 6.24e-09 ***
#> caNo                         1.0485967  0.4350633   2.410 0.015943 *  
#> caYes                        0.2805522  0.1619387   1.732 0.083192 .  
#> surv2md1                    -1.8855344  0.3535424  -5.333 9.65e-08 ***
#> aps1                         0.0037687  0.0029458   1.279 0.200771    
#> scoma1                      -0.0041620  0.0016024  -2.597 0.009396 ** 
#> wtkilo1                      0.0063327  0.0012171   5.203 1.96e-07 ***
#> temp1                       -0.0284681  0.0197651  -1.440 0.149776    
#> meanbp1                     -0.0063608  0.0010216  -6.226 4.78e-10 ***
#> resp1                       -0.0207690  0.0025956  -8.002 1.23e-15 ***
#> hrt1                         0.0054615  0.0008942   6.108 1.01e-09 ***
#> pafi1                       -0.0047235  0.0003463 -13.639  < 2e-16 ***
#> paco21                      -0.0261850  0.0036694  -7.136 9.60e-13 ***
#> ph1                         -1.6756614  0.3847432  -4.355 1.33e-05 ***
#> wblc1                        0.0001465  0.0027568   0.053 0.957609    
#> hema1                       -0.0115223  0.0047296  -2.436 0.014841 *  
#> sod1                        -0.0108807  0.0043040  -2.528 0.011470 *  
#> pot1                        -0.1720083  0.0341922  -5.031 4.89e-07 ***
#> crea1                        0.0523499  0.0218175   2.399 0.016420 *  
#> bili1                        0.0072293  0.0074332   0.973 0.330766    
#> alb1                        -0.0964314  0.0481135  -2.004 0.045044 *  
#> respYes                     -0.2725098  0.0827259  -3.294 0.000987 ***
#> cardYes                      0.5575983  0.0856700   6.509 7.58e-11 ***
#> neuroYes                    -0.4873660  0.1343763  -3.627 0.000287 ***
#> gastrYes                     0.3505067  0.1054000   3.325 0.000883 ***
#> renalYes                     0.3004368  0.1490678   2.015 0.043859 *  
#> metaYes                     -0.1129081  0.1554971  -0.726 0.467771    
#> hemaYes                     -0.5131993  0.1470678  -3.490 0.000484 ***
#> sepsYes                      0.2844857  0.0919399   3.094 0.001973 ** 
#> traumaYes                    1.2560843  0.3346498   3.753 0.000174 ***
#> orthoYes                     1.1814997  0.9691723   1.219 0.222813    
#> cardiohx1                    0.0482934  0.0953604   0.506 0.612554    
#> chfhx1                       0.0936948  0.1042890   0.898 0.368964    
#> dementhx1                   -0.4121592  0.1213400  -3.397 0.000682 ***
#> psychhx1                    -0.3913426  0.1382534  -2.831 0.004646 ** 
#> chrpulhx1                    0.0122390  0.1007226   0.122 0.903286    
#> renalhx1                    -0.3352647  0.1814102  -1.848 0.064588 .  
#> liverhx1                    -0.0410152  0.1877313  -0.218 0.827057    
#> gibledhx1                   -0.1856252  0.2283797  -0.813 0.416337    
#> malighx1                     0.2298479  0.3846516   0.598 0.550141    
#> immunhx1                     0.0454368  0.0742516   0.612 0.540584    
#> transhx1                     0.4716832  0.0994237   4.744 2.09e-06 ***
#> amihx1                       0.1317831  0.1749886   0.753 0.451392    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 7621.4  on 5734  degrees of freedom
#> Residual deviance: 5993.2  on 5669  degrees of freedom
#> AIC: 6125.2
#> 
#> Number of Fisher Scoring iterations: 5
rhc2$PS <- fitted(logit_ps$fit)

predict(logit_ps, new_data = rhc2, type = "prob") %>% head()

.pred_No RHC	.pred_RHC
0.6489847	0.3510153
0.3308540	0.6691460
0.3664559	0.6335441
0.6310644	0.3689356
0.5543195	0.4456805
0.9447431	0.0552569

24.3 匹配方法

24.3.1 `Matching`

Code

library(Matching)
data(lalonde, package='Matching')
glimpse(lalonde)
#> Rows: 445
#> Columns: 12
#> $ age     <int> 37, 22, 30, 27, 33, 22, 23, 32, 22, 33, 19, 21, 18, 27, 17, 19…
#> $ educ    <int> 11, 9, 12, 11, 8, 9, 12, 11, 16, 12, 9, 13, 8, 10, 7, 10, 13, …
#> $ black   <int> 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ hisp    <int> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ married <int> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,…
#> $ nodegr  <int> 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1,…
#> $ re74    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ re75    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#> $ re78    <dbl> 9930.05, 3595.89, 24909.50, 7506.15, 289.79, 4056.49, 0.00, 84…
#> $ u74     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ u75     <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
#> $ treat   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…

lalonde_formu <- treat ~ age + I(age^2) + educ + I(educ^2) + black +
    hisp + married + nodegr + re74  + I(re74^2) + re75 + I(re75^2) + u74 + u75
lr_out <- glm(formula = lalonde_formu,
              data = lalonde,
              family = binomial(link = 'logit'))

summary(lr_out)
#> 
#> Call:
#> glm(formula = lalonde_formu, family = binomial(link = "logit"), 
#>     data = lalonde)
#> 
#> Coefficients:
#>               Estimate Std. Error z value Pr(>|z|)  
#> (Intercept)  4.269e+00  2.173e+00   1.965   0.0494 *
#> age          2.143e-02  9.037e-02   0.237   0.8126  
#> I(age^2)    -3.448e-04  1.484e-03  -0.232   0.8163  
#> educ        -8.713e-01  4.150e-01  -2.099   0.0358 *
#> I(educ^2)    4.499e-02  2.330e-02   1.931   0.0535 .
#> black       -2.613e-01  3.708e-01  -0.705   0.4809  
#> hisp        -8.974e-01  5.184e-01  -1.731   0.0835 .
#> married      1.829e-01  2.831e-01   0.646   0.5183  
#> nodegr      -4.285e-01  3.930e-01  -1.090   0.2756  
#> re74        -2.168e-05  7.739e-05  -0.280   0.7793  
#> I(re74^2)   -8.553e-10  2.424e-09  -0.353   0.7242  
#> re75         6.577e-05  1.025e-04   0.642   0.5210  
#> I(re75^2)   -1.968e-09  5.042e-09  -0.390   0.6963  
#> u74         -8.315e-02  4.521e-01  -0.184   0.8541  
#> u75         -3.060e-01  3.591e-01  -0.852   0.3942  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 604.20  on 444  degrees of freedom
#> Residual deviance: 580.02  on 430  degrees of freedom
#> AIC: 610.02
#> 
#> Number of Fisher Scoring iterations: 4
lalonde$lr_ps <- fitted(lr_out)

Code

lalonde_match <- Match(
    Y = lalonde$re78,
    Tr = lalonde$treat,
    X = lalonde$lr_ps,
    M = 1,
    caliper = 0.1,
    replace = TRUE,
    estimand = 'ATE'
)

summary(lalonde_match)
#> 
#> Estimate...  2053.1 
#> AI SE......  803.05 
#> T-stat.....  2.5566 
#> p.val......  0.010569 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  433 
#> Matched number of observations  (unweighted).  744 
#> 
#> Caliper (SDs)........................................   0.1 
#> Number of obs dropped by 'exact' or 'caliper'  12

lalonde_match_df <- data.frame(
    treated.ps = lalonde[lalonde_match$index.treated, ]$lr_ps,
    control.ps = lalonde[lalonde_match$index.control, ]$lr_ps,
    treated.y = 1,
    control.y = 0
)
lalonde_match_df <- lalonde_match_df[order(lalonde_match_df$control.ps), ]


rows <- (1:nrow(lalonde_match_df) - 1) %% floor(nrow(lalonde_match_df) / 5) == 0

ggplot(lalonde, aes(x = lr_ps, y = treat)) +
    geom_point(alpha = 0.5) +
    geom_smooth(
        method = glm,
        formula = y ~ x,
        method.args = list(family = binomial(link = 'logit')),
        se = FALSE
    ) +
    xlim(c(0, 1)) +
    xlab('Propensity Score') + ylab('Treatment') +
    geom_segment(
        data = lalonde_match_df,
        aes(
            x = treated.ps,
            xend = control.ps,
            y = treated.y,
            yend = control.y
        ),
        color = 'purple',
        alpha = 0.1
    )

匹配后，治疗组和对照组应具有非常相似的特征。可以使用简单的回归模型来估计治疗对结果的影响。

24.3.2 一对一匹配ATT

Estimating the treatment effect on the treated (default is ATT)

Code

rr_att <- Match(Y = lalonde$re78, 
                Tr = lalonde$treat, 
                X = lalonde$lr_ps,
                M = 1,
                estimand='ATT')
summary(rr_att) # The default estimate is ATT here
#> 
#> Estimate...  2153.3 
#> AI SE......  825.4 
#> T-stat.....  2.6088 
#> p.val......  0.0090858 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  346

rr_att_mb <- psa::MatchBalance(
    df = lalonde,
    formu = lalonde_formu,
    formu.Y = update.formula(lalonde_formu, re78 ~ .),
    index.treated = rr_att$index.treated,
    index.control = rr_att$index.control,
    tolerance = 0.25,
    M = 1,
    estimand = 'ATT')
plot(rr_att_mb)

Code

summary(rr_att_mb)
#> Sample sizes and number of matches:
#>    Group   n n.matched n.percent.matched
#>  Treated 185       185         1.0000000
#>  Control 260       173         0.6653846
#>    Total 445       358         0.8044944
#> 
#> Covariate importance and t-tests for matched pairs:
#>           Import.Treat Import.Y Import.Total std.estimate       t p.value
#> I(educ^2)        1.931   1.5228        3.453     -0.04903 -0.8916  0.3732
#> educ             2.099   1.2121        3.311     -0.05483 -0.9577  0.3389
#> black            0.705   1.8424        2.547     -0.02326 -0.5383  0.5907
#> I(re74^2)        0.353   1.6415        1.994      0.07581  2.0955  0.0369
#> u75              0.852   0.9435        1.796     -0.06655 -1.8144  0.0705
#> hisp             1.731   0.0404        1.771      0.02042  0.8161  0.4150
#> nodegr           1.090   0.5011        1.591      0.03496  1.0914  0.2759
#> re74             0.280   1.1019        1.382      0.07979  1.7483  0.0813
#> re75             0.642   0.5903        1.232      0.06147  1.3171  0.1887
#> age              0.237   0.6729        0.910      0.00896  0.1374  0.8908
#> married          0.646   0.1406        0.787      0.04627  1.0000  0.3180
#> I(re75^2)        0.390   0.3817        0.772      0.05125  1.0364  0.3007
#> I(age^2)         0.232   0.5096        0.742      0.00297  0.0438  0.9651
#> u74              0.184   0.0702        0.254      0.03913  0.7495  0.4541
#>             ci.min  ci.max PercentMatched
#> I(educ^2) -0.15719 0.05913           60.4
#> educ      -0.16744 0.05778           60.1
#> black     -0.10826 0.06173           91.0
#> I(re74^2)  0.00465 0.14696           86.7
#> u75       -0.13870 0.00559           89.3
#> hisp      -0.02879 0.06963           98.3
#> nodegr    -0.02804 0.09797           93.9
#> re74      -0.00998 0.16956           77.2
#> re75      -0.03032 0.15326           78.0
#> age       -0.11922 0.13713           44.8
#> married   -0.04474 0.13728           89.6
#> I(re75^2) -0.04602 0.14852           88.7
#> I(age^2)  -0.13062 0.13656           49.7
#> u74       -0.06356 0.14183           81.5

24.3.3 一对一匹配ATE

average treatment effect

Code

rr.ate <- Match(Y = lalonde$re78, 
                Tr = lalonde$treat, 
                X = lalonde$lr_ps,
                M = 1,
                estimand = 'ATE')
summary(rr.ate)
#> 
#> Estimate...  2013.3 
#> AI SE......  817.76 
#> T-stat.....  2.4619 
#> p.val......  0.013819 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  445 
#> Matched number of observations  (unweighted).  756

24.3.4 一对多匹配（ATT）

Code

rr2 <- Match(Y = lalonde$re78,      
             Tr = lalonde$treat, 
             X = lalonde$lr_ps,
             M = 1, 
             ties = TRUE, 
             replace = TRUE,
             estimand = 'ATT')
summary(rr2) # The default estimate is ATT here
#> 
#> Estimate...  2153.3 
#> AI SE......  825.4 
#> T-stat.....  2.6088 
#> p.val......  0.0090858 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  346

24.3.5 `MachIt`

Code

MatchIt::matchit(method = "nearest")
MatchIt::matchit(method = 'optimal')
MatchIt::matchit(method = 'full')
MatchIt::matchit(method = 'quick')
MatchIt::matchit(method = 'genetic')
MatchIt::matchit(method = 'exact')
MatchIt::matchit(method = 'subclass')

Code

matchit.out <- MatchIt::matchit(lalonde_formu, data = lalonde )
summary(matchit.out)
#> 
#> Call:
#> MatchIt::matchit(formula = lalonde_formu, data = lalonde)
#> 
#> Summary of Balance for All Data:
#>           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance         0.4468        0.3936          0.4533     1.2101    0.1340
#> age             25.8162       25.0538          0.1066     1.0278    0.0254
#> I(age^2)       717.3946      677.3154          0.0929     1.0115    0.0254
#> educ            10.3459       10.0885          0.1281     1.5513    0.0287
#> I(educ^2)      111.0595      104.3731          0.1701     1.6625    0.0287
#> black            0.8432        0.8269          0.0449          .    0.0163
#> hisp             0.0595        0.1077         -0.2040          .    0.0482
#> married          0.1892        0.1538          0.0902          .    0.0353
#> nodegr           0.7081        0.8346         -0.2783          .    0.1265
#> re74          2095.5740     2107.0268         -0.0023     0.7381    0.0192
#> I(re74^2) 28141433.9907 36667413.1577         -0.0747     0.5038    0.0192
#> re75          1532.0556     1266.9092          0.0824     1.0763    0.0508
#> I(re75^2) 12654752.6909 11196530.0057          0.0260     1.4609    0.0508
#> u74              0.7081        0.7500         -0.0921          .    0.0419
#> u75              0.6000        0.6846         -0.1727          .    0.0846
#>           eCDF Max
#> distance    0.2244
#> age         0.0652
#> I(age^2)    0.0652
#> educ        0.1265
#> I(educ^2)   0.1265
#> black       0.0163
#> hisp        0.0482
#> married     0.0353
#> nodegr      0.1265
#> re74        0.0471
#> I(re74^2)   0.0471
#> re75        0.1075
#> I(re75^2)   0.1075
#> u74         0.0419
#> u75         0.0846
#> 
#> Summary of Balance for Matched Data:
#>           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance         0.4468        0.4284          0.1571     1.3077    0.0387
#> age             25.8162       25.1351          0.0952     1.1734    0.0243
#> I(age^2)       717.3946      675.1676          0.0979     1.1512    0.0243
#> educ            10.3459       10.2649          0.0403     1.2869    0.0174
#> I(educ^2)      111.0595      108.4919          0.0653     1.3938    0.0174
#> black            0.8432        0.8486         -0.0149          .    0.0054
#> hisp             0.0595        0.0703         -0.0457          .    0.0108
#> married          0.1892        0.1892          0.0000          .    0.0000
#> nodegr           0.7081        0.7676         -0.1308          .    0.0595
#> re74          2095.5740     1741.2109          0.0725     1.5797    0.0146
#> I(re74^2) 28141433.9907 18066538.6428          0.0883     3.5436    0.0146
#> re75          1532.0556     1314.8073          0.0675     1.3933    0.0264
#> I(re75^2) 12654752.6909  9126579.7979          0.0630     3.4873    0.0264
#> u74              0.7081        0.7243         -0.0357          .    0.0162
#> u75              0.6000        0.6108         -0.0221          .    0.0108
#>           eCDF Max Std. Pair Dist.
#> distance    0.1189          0.1585
#> age         0.0541          0.8159
#> I(age^2)    0.0541          0.7701
#> educ        0.0595          0.7662
#> I(educ^2)   0.0595          0.7604
#> black       0.0054          0.5798
#> hisp        0.0108          0.2286
#> married     0.0000          0.2378
#> nodegr      0.0595          0.5588
#> re74        0.0432          0.6080
#> I(re74^2)   0.0432          0.3620
#> re75        0.0649          0.7292
#> I(re75^2)   0.0649          0.3690
#> u74         0.0162          0.7728
#> u75         0.0108          0.7282
#> 
#> Sample Sizes:
#>           Control Treated
#> All           260     185
#> Matched       185     185
#> Unmatched      75       0
#> Discarded       0       0

Code

# Same as above but calculate average treatment effect
rr.ate <- Match(Y = lalonde$re78, 
                Tr = lalonde$treat, 
                X = lalonde$lr_ps,
                M = 1,
                ties = FALSE, 
                replace = FALSE, 
                estimand='ATE')
summary(rr.ate) # Here the estimate is ATE
#> 
#> Estimate...  2036.6 
#> SE.........  501.71 
#> T-stat.....  4.0592 
#> p.val......  4.9233e-05 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  370 
#> Matched number of observations  (unweighted).  370

Code

## Genetic Matching
rr.gen <- GenMatch(Tr = lalonde$treat, 
                   X = lalonde$lr_ps, 
                   BalanceMatrix = lalonde[,all.vars(lalonde_formu)[-1]],
                   estimand = 'ATE', 
                   M = 1, 
                   pop.size = 16,
                   print.level = 0)
rr.gen.mout <- Match(Y = lalonde$re78, 
                     Tr = lalonde$treat, 
                     X = lalonde$lr_ps,
                     estimand = 'ATE',
                     Weight.matrix = rr.gen)
summary(rr.gen.mout)
#> 
#> Estimate...  2086.5 
#> AI SE......  815.65 
#> T-stat.....  2.5581 
#> p.val......  0.010524 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  445 
#> Matched number of observations  (unweighted).  671

Code

## Partial exact matching
rr2 <- Matchby(Y = lalonde$re78, 
               Tr = lalonde$treat, 
               X = lalonde$lr_ps, 
               by = factor(lalonde$nodegr),
               print.level = 0)
summary(rr2)
#> 
#> Estimate...  2014.4 
#> SE.........  702.05 
#> T-stat.....  2.8693 
#> p.val......  0.0041132 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  185

Code

## Partial exact matching on two covariates
rr3 <- Matchby(Y = lalonde$re78, 
               Tr = lalonde$treat, 
               X = lalonde$lr_ps, 
               by = lalonde[,c('nodegr','married')],
               print.level = 0)
summary(rr3)
#> 
#> Estimate...  1894 
#> SE.........  705.3 
#> T-stat.....  2.6853 
#> p.val......  0.0072455 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  185

24.4 示例

变量名	描述
`age`	年龄
`educ`	受教育年限
`black`	分类变量，1为黑人
`hisp`	分类变量，1为西班牙裔
`married`	分类变量，1为已婚
`nodegr`	分类变量，1为有高中学历证书
`re74`	1974年的收入
`re75`	1975年的收入
`re78`	1978年的收入
`u74`	分类变量，1为1974年收入为零
`u75`	分类变量，1为1975年收入为零
`treat`	分类变量，1为实验组

24.4.1 估计倾向值分数

Code

attach(lalonde)
glm_ps <- glm(
    formula = treat ~ age + educ + black + hisp + married + nodegr + re74  + re75,
    family = binomial(link = 'logit')
)

psm1 <- Match(Y=re78,
             Tr = treat,
             X=glm_ps$fitted.values,
             estimand = "ATT",
             M=1,
             replace = TRUE)
summary(psm1)
#> 
#> Estimate...  2624.3 
#> AI SE......  802.19 
#> T-stat.....  3.2714 
#> p.val......  0.0010702 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  344

如上所示，使用1对1样本可替代匹配法，实验组平均效应为2624.3，因果效应的标准误为803.19，t值为3.2714，p值为0.0010702<0.05，表明估计的实验组平均处理效应有统计学差异。

Code

psm2 <- Match(Y=re78,
             Tr = treat,
             X=glm_ps$fitted.values,
             estimand = "ATT",
             M=1,
             replace = FALSE)
summary(psm2)
#> 
#> Estimate...  1996.3 
#> SE.........  643.88 
#> T-stat.....  3.1005 
#> p.val......  0.0019319 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  185

24.4.2 检验平衡

受试者个体同质性，是否随机分配

协变量分布是否平衡，是否重合：

以age 为例，实验组匹配前25.816匹配后25.816，对照组匹配前25.054匹配后25.692 ，匹配后实验组与对照组更接近了；T-test p-value > 0.05 ，表示匹配前后age 均值无统计学差异；KS Bootstrap p-value > 0.05 ，表示匹配前后age 分布无统计学差异

***** (V1) age *****                Before Matching          After Matching 

mean   treatment........          25.816             25.816  
mean control..........     25.054            25.692 
std mean diff.........     10.655            1.7342 

mean raw eQQ diff.....    0.94054           0.73837  
med  raw eQQ diff.....          1                 0  
max  raw eQQ diff.....          7                 9   

mean eCDF diff........   0.025364          0.021893  
med  eCDF diff........   0.022193          0.020349  
max  eCDF diff........   0.065177          0.061047   

var ratio (Tr/Co).....     1.0278             1.083  
T-test p-value........    0.26594           0.84975  
KS Bootstrap p-value..      0.526             0.355  
KS Naive p-value......     0.7481           0.54314  
KS Statistic..........   0.065177          0.061047

Code

check_balance <- MatchBalance(
    formul = treat ~ age + educ + black + hisp + married + nodegr + re74  + re75,
    match.out = psm1,
    nboots = 1000,data = lalonde
)
#> 
#> ***** (V1) age *****
#>                        Before Matching        After Matching
#> mean treatment........     25.816             25.816 
#> mean control..........     25.054             25.692 
#> std mean diff.........     10.655             1.7342 
#> 
#> mean raw eQQ diff.....    0.94054            0.73837 
#> med  raw eQQ diff.....          1                  0 
#> max  raw eQQ diff.....          7                  9 
#> 
#> mean eCDF diff........   0.025364           0.021893 
#> med  eCDF diff........   0.022193           0.020349 
#> max  eCDF diff........   0.065177           0.061047 
#> 
#> var ratio (Tr/Co).....     1.0278              1.083 
#> T-test p-value........    0.26594            0.84975 
#> KS Bootstrap p-value..      0.514              0.364 
#> KS Naive p-value......     0.7481            0.54314 
#> KS Statistic..........   0.065177           0.061047 
#> 
#> 
#> ***** (V2) educ *****
#>                        Before Matching        After Matching
#> mean treatment........     10.346             10.346 
#> mean control..........     10.088             10.146 
#> std mean diff.........     12.806             9.9664 
#> 
#> mean raw eQQ diff.....    0.40541            0.23256 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          2                  2 
#> 
#> mean eCDF diff........   0.028698           0.016611 
#> med  eCDF diff........   0.012682           0.010174 
#> max  eCDF diff........    0.12651           0.061047 
#> 
#> var ratio (Tr/Co).....     1.5513             1.2344 
#> T-test p-value........    0.15017             0.1842 
#> KS Bootstrap p-value..      0.003              0.183 
#> KS Naive p-value......   0.062873            0.54314 
#> KS Statistic..........    0.12651           0.061047 
#> 
#> 
#> ***** (V3) black *****
#>                        Before Matching        After Matching
#> mean treatment........    0.84324            0.84324 
#> mean control..........    0.82692            0.86847 
#> std mean diff.........     4.4767            -6.9194 
#> 
#> mean raw eQQ diff.....   0.016216           0.026163 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........  0.0081601           0.013081 
#> med  eCDF diff........  0.0081601           0.013081 
#> max  eCDF diff........    0.01632           0.026163 
#> 
#> var ratio (Tr/Co).....    0.92503             1.1572 
#> T-test p-value........    0.64736            0.40214 
#> 
#> 
#> ***** (V4) hisp *****
#>                        Before Matching        After Matching
#> mean treatment........   0.059459           0.059459 
#> mean control..........    0.10769            0.04955 
#> std mean diff.........    -20.341             4.1792 
#> 
#> mean raw eQQ diff.....   0.048649           0.011628 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.024116           0.005814 
#> med  eCDF diff........   0.024116           0.005814 
#> max  eCDF diff........   0.048233           0.011628 
#> 
#> var ratio (Tr/Co).....    0.58288             1.1875 
#> T-test p-value........   0.064043            0.46063 
#> 
#> 
#> ***** (V5) married *****
#>                        Before Matching        After Matching
#> mean treatment........    0.18919            0.18919 
#> mean control..........    0.15385            0.18423 
#> std mean diff.........     8.9995             1.2617 
#> 
#> mean raw eQQ diff.....   0.037838           0.026163 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.017672           0.013081 
#> med  eCDF diff........   0.017672           0.013081 
#> max  eCDF diff........   0.035343           0.026163 
#> 
#> var ratio (Tr/Co).....     1.1802             1.0207 
#> T-test p-value........    0.33425            0.89497 
#> 
#> 
#> ***** (V6) nodegr *****
#>                        Before Matching        After Matching
#> mean treatment........    0.70811            0.70811 
#> mean control..........    0.83462            0.76757 
#> std mean diff.........    -27.751            -13.043 
#> 
#> mean raw eQQ diff.....    0.12432           0.043605 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.063254           0.021802 
#> med  eCDF diff........   0.063254           0.021802 
#> max  eCDF diff........    0.12651           0.043605 
#> 
#> var ratio (Tr/Co).....     1.4998             1.1585 
#> T-test p-value........  0.0020368          0.0071385 
#> 
#> 
#> ***** (V7) re74 *****
#>                        Before Matching        After Matching
#> mean treatment........     2095.6             2095.6 
#> mean control..........       2107             2193.3 
#> std mean diff.........   -0.23437            -2.0004 
#> 
#> mean raw eQQ diff.....     487.98             869.16 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....       8413              10305 
#> 
#> mean eCDF diff........   0.019223           0.054701 
#> med  eCDF diff........     0.0158           0.050872 
#> max  eCDF diff........   0.047089            0.12209 
#> 
#> var ratio (Tr/Co).....     0.7381            0.75054 
#> T-test p-value........    0.98186            0.84996 
#> KS Bootstrap p-value..      0.575         < 2.22e-16 
#> KS Naive p-value......    0.97023           0.011858 
#> KS Statistic..........   0.047089            0.12209 
#> 
#> 
#> ***** (V8) re75 *****
#>                        Before Matching        After Matching
#> mean treatment........     1532.1             1532.1 
#> mean control..........     1266.9             2179.9 
#> std mean diff.........     8.2363            -20.125 
#> 
#> mean raw eQQ diff.....     367.61             590.34 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....     2110.2             8092.9 
#> 
#> mean eCDF diff........   0.050834           0.050338 
#> med  eCDF diff........   0.061954           0.049419 
#> max  eCDF diff........    0.10748           0.098837 
#> 
#> var ratio (Tr/Co).....     1.0763            0.56563 
#> T-test p-value........    0.38527           0.079002 
#> KS Bootstrap p-value..      0.049               0.01 
#> KS Naive p-value......    0.16449           0.069435 
#> KS Statistic..........    0.10748           0.098837 
#> 
#> 
#> Before Matching Minimum p.value: 0.0020368 
#> Variable Name(s): nodegr  Number(s): 6 
#> 
#> After Matching Minimum p.value: < 2.22e-16 
#> Variable Name(s): re74  Number(s): 7

Code

# age 变平衡了
qqplot(lalonde$age[psm1$index.control],lalonde$age[psm1$index.treated])
abline(a=0,b=1)

Code


# re74 更不平衡了
qqplot(lalonde$re74[psm1$index.control],lalonde$re74[psm1$index.treated])
abline(a=0,b=1)

Code

# # The covariates we want to match on
x <- cbind(age , educ , black , hisp , married , nodegr , re74  , re75)

# The covariates we want to obtain balance on
BalanceMatrix = x
set.seed(100)



# Genetic Matching 自动适配平衡
gen_match <- GenMatch(Tr=treat,
                      X=glm_ps$fitted.values,
                      BalanceMatrix = x,
                      estimand = "ATT")
#> 
#> 
#> Sat Aug 31 15:02:33 2024
#> Domains:
#>  0.000000e+00   <=  X1   <=    1.000000e+03 
#> 
#> Data Type: Floating Point
#> Operators (code number, name, population) 
#>  (1) Cloning...........................  15
#>  (2) Uniform Mutation..................  12
#>  (3) Boundary Mutation.................  12
#>  (4) Non-Uniform Mutation..............  12
#>  (5) Polytope Crossover................  12
#>  (6) Simple Crossover..................  12
#>  (7) Whole Non-Uniform Mutation........  12
#>  (8) Heuristic Crossover...............  12
#>  (9) Local-Minimum Crossover...........  0
#> 
#> SOFT Maximum Number of Generations: 100
#> Maximum Nonchanging Generations: 4
#> Population size       : 100
#> Convergence Tolerance: 1.000000e-03
#> 
#> Not Using the BFGS Derivative Based Optimizer on the Best Individual Each Generation.
#> Not Checking Gradients before Stopping.
#> Using Out of Bounds Individuals.
#> 
#> Maximization Problem.
#> GENERATION: 0 (initializing the population)
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 100, #Total UniqueCount: 100
#> var 1:
#> best............ 3.729259e+01
#> mean............ 4.810920e+02
#> variance........ 7.636997e+04
#> 
#> GENERATION: 1
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 61, #Total UniqueCount: 161
#> var 1:
#> best............ 3.729259e+01
#> mean............ 2.015230e+02
#> variance........ 6.039634e+04
#> 
#> GENERATION: 2
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 56, #Total UniqueCount: 217
#> var 1:
#> best............ 3.729259e+01
#> mean............ 9.873041e+01
#> variance........ 3.291501e+04
#> 
#> GENERATION: 3
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 56, #Total UniqueCount: 273
#> var 1:
#> best............ 3.729259e+01
#> mean............ 1.042351e+02
#> variance........ 2.899583e+04
#> 
#> GENERATION: 4
#> Lexical Fit..... 1.180299e-02  1.180299e-02  1.155612e-01  1.186261e-01  1.822343e-01  3.842491e-01  3.842491e-01  4.144313e-01  4.144313e-01  4.610718e-01  5.977650e-01  7.731052e-01  7.731052e-01  8.523060e-01  9.184104e-01  9.749549e-01  
#> #unique......... 58, #Total UniqueCount: 331
#> var 1:
#> best............ 3.729259e+01
#> mean............ 1.050645e+02
#> variance........ 2.721152e+04
#> 
#> GENERATION: 5
#> Lexical Fit..... 1.541148e-02  2.397991e-02  2.418950e-02  2.418950e-02  1.729273e-01  1.956942e-01  3.942807e-01  3.942807e-01  6.059370e-01  6.059370e-01  6.074259e-01  6.137460e-01  8.414918e-01  8.538318e-01  9.621995e-01  9.621995e-01  
#> #unique......... 58, #Total UniqueCount: 389
#> var 1:
#> best............ 2.941808e-01
#> mean............ 8.418536e+01
#> variance........ 1.578216e+04
#> 
#> GENERATION: 6
#> Lexical Fit..... 4.145421e-02  4.145421e-02  4.499068e-02  4.499068e-02  1.608408e-01  1.806532e-01  2.764640e-01  4.729068e-01  4.729068e-01  6.940013e-01  6.940013e-01  7.762162e-01  8.635587e-01  8.667712e-01  8.667712e-01  9.507460e-01  
#> #unique......... 62, #Total UniqueCount: 451
#> var 1:
#> best............ 7.310932e-02
#> mean............ 9.403781e+01
#> variance........ 2.447312e+04
#> 
#> GENERATION: 7
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.419097e-02  6.248288e-02  1.198352e-01  2.880778e-01  3.513309e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.836843e-01  8.920699e-01  8.920699e-01  
#> #unique......... 61, #Total UniqueCount: 512
#> var 1:
#> best............ 8.760968e-02
#> mean............ 1.016562e+02
#> variance........ 4.437002e+04
#> 
#> GENERATION: 8
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 60, #Total UniqueCount: 572
#> var 1:
#> best............ 8.627748e-02
#> mean............ 7.473970e+01
#> variance........ 2.676016e+04
#> 
#> GENERATION: 9
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 55, #Total UniqueCount: 627
#> var 1:
#> best............ 8.627748e-02
#> mean............ 4.192386e+01
#> variance........ 1.737331e+04
#> 
#> GENERATION: 10
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 51, #Total UniqueCount: 678
#> var 1:
#> best............ 8.627748e-02
#> mean............ 6.242257e+01
#> variance........ 2.722981e+04
#> 
#> GENERATION: 11
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 49, #Total UniqueCount: 727
#> var 1:
#> best............ 8.627748e-02
#> mean............ 5.280735e+01
#> variance........ 1.784927e+04
#> 
#> GENERATION: 12
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 50, #Total UniqueCount: 777
#> var 1:
#> best............ 8.627748e-02
#> mean............ 5.682777e+01
#> variance........ 2.131922e+04
#> 
#> GENERATION: 13
#> Lexical Fit..... 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> #unique......... 53, #Total UniqueCount: 830
#> var 1:
#> best............ 8.627748e-02
#> mean............ 3.877940e+01
#> variance........ 1.599211e+04
#> 
#> 'wait.generations' limit reached.
#> No significant improvement in 4 generations.
#> 
#> Solution Lexical Fitness Value:
#> 4.177047e-02  4.177047e-02  4.559074e-02  6.427954e-02  1.215371e-01  2.846955e-01  3.519026e-01  4.150287e-01  4.150287e-01  8.316757e-01  8.316757e-01  8.667712e-01  8.667712e-01  8.794500e-01  8.948772e-01  8.948772e-01  
#> 
#> Parameters at the Solution:
#> 
#>  X[ 1] : 8.627748e-02
#> 
#> Solution Found Generation 8
#> Number of Generations Run 13
#> 
#> Sat Aug 31 15:02:38 2024
#> Total run time : 0 hours 0 minutes and 5 seconds

PSM <-  Match(Y=re78,
             Tr = treat,
             X=glm_ps$fitted.values,
             estimand = "ATT",
             Weight.matrix = gen_match,
             replace = TRUE,)
summary(PSM)
#> 
#> Estimate...  2439.3 
#> AI SE......  813.4 
#> T-stat.....  2.9989 
#> p.val......  0.0027099 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  489

check_balance2 <- MatchBalance(
    formul = treat ~ age + educ + black + hisp + married + nodegr + re74  + re75,
    match.out = PSM,
    nboots = 1000,data = lalonde
)
#> 
#> ***** (V1) age *****
#>                        Before Matching        After Matching
#> mean treatment........     25.816             25.816 
#> mean control..........     25.054             25.217 
#> std mean diff.........     10.655             8.3769 
#> 
#> mean raw eQQ diff.....    0.94054            0.46217 
#> med  raw eQQ diff.....          1                  0 
#> max  raw eQQ diff.....          7                  9 
#> 
#> mean eCDF diff........   0.025364           0.012952 
#> med  eCDF diff........   0.022193           0.010225 
#> max  eCDF diff........   0.065177            0.03681 
#> 
#> var ratio (Tr/Co).....     1.0278             1.2224 
#> T-test p-value........    0.26594             0.3519 
#> KS Bootstrap p-value..      0.503              0.734 
#> KS Naive p-value......     0.7481            0.89488 
#> KS Statistic..........   0.065177            0.03681 
#> 
#> 
#> ***** (V2) educ *****
#>                        Before Matching        After Matching
#> mean treatment........     10.346             10.346 
#> mean control..........     10.088             10.188 
#> std mean diff.........     12.806             7.8605 
#> 
#> mean raw eQQ diff.....    0.40541            0.17587 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          2                  2 
#> 
#> mean eCDF diff........   0.028698           0.012562 
#> med  eCDF diff........   0.012682           0.010225 
#> max  eCDF diff........    0.12651            0.03681 
#> 
#> var ratio (Tr/Co).....     1.5513             1.2791 
#> T-test p-value........    0.15017             0.2847 
#> KS Bootstrap p-value..      0.011              0.456 
#> KS Naive p-value......   0.062873            0.89488 
#> KS Statistic..........    0.12651            0.03681 
#> 
#> 
#> ***** (V3) black *****
#>                        Before Matching        After Matching
#> mean treatment........    0.84324            0.84324 
#> mean control..........    0.82692              0.868 
#> std mean diff.........     4.4767            -6.7917 
#> 
#> mean raw eQQ diff.....   0.016216           0.034765 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........  0.0081601           0.017382 
#> med  eCDF diff........  0.0081601           0.017382 
#> max  eCDF diff........    0.01632           0.034765 
#> 
#> var ratio (Tr/Co).....    0.92503             1.1537 
#> T-test p-value........    0.64736            0.41503 
#> 
#> 
#> ***** (V4) hisp *****
#>                        Before Matching        After Matching
#> mean treatment........   0.059459           0.059459 
#> mean control..........    0.10769           0.057132 
#> std mean diff.........    -20.341            0.98148 
#> 
#> mean raw eQQ diff.....   0.048649            0.00818 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.024116            0.00409 
#> med  eCDF diff........   0.024116            0.00409 
#> max  eCDF diff........   0.048233            0.00818 
#> 
#> var ratio (Tr/Co).....    0.58288             1.0382 
#> T-test p-value........   0.064043            0.86677 
#> 
#> 
#> ***** (V5) married *****
#>                        Before Matching        After Matching
#> mean treatment........    0.18919            0.18919 
#> mean control..........    0.15385            0.18101 
#> std mean diff.........     8.9995             2.0837 
#> 
#> mean raw eQQ diff.....   0.037838           0.018405 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.017672          0.0092025 
#> med  eCDF diff........   0.017672          0.0092025 
#> max  eCDF diff........   0.035343           0.018405 
#> 
#> var ratio (Tr/Co).....     1.1802             1.0348 
#> T-test p-value........    0.33425            0.83168 
#> 
#> 
#> ***** (V6) nodegr *****
#>                        Before Matching        After Matching
#> mean treatment........    0.70811            0.70811 
#> mean control..........    0.83462            0.75333 
#> std mean diff.........    -27.751            -9.9207 
#> 
#> mean raw eQQ diff.....    0.12432           0.034765 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....          1                  1 
#> 
#> mean eCDF diff........   0.063254           0.017382 
#> med  eCDF diff........   0.063254           0.017382 
#> max  eCDF diff........    0.12651           0.034765 
#> 
#> var ratio (Tr/Co).....     1.4998             1.1123 
#> T-test p-value........  0.0020368            0.04177 
#> 
#> 
#> ***** (V7) re74 *****
#>                        Before Matching        After Matching
#> mean treatment........     2095.6             2095.6 
#> mean control..........       2107             2018.1 
#> std mean diff.........   -0.23437             1.5857 
#> 
#> mean raw eQQ diff.....     487.98             648.91 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....       8413              10305 
#> 
#> mean eCDF diff........   0.019223           0.037077 
#> med  eCDF diff........     0.0158           0.033742 
#> max  eCDF diff........   0.047089           0.087935 
#> 
#> var ratio (Tr/Co).....     0.7381            0.86668 
#> T-test p-value........    0.98186            0.87945 
#> KS Bootstrap p-value..      0.555              0.002 
#> KS Naive p-value......    0.97023           0.045591 
#> KS Statistic..........   0.047089           0.087935 
#> 
#> 
#> ***** (V8) re75 *****
#>                        Before Matching        After Matching
#> mean treatment........     1532.1             1532.1 
#> mean control..........     1266.9             2079.5 
#> std mean diff.........     8.2363            -17.005 
#> 
#> mean raw eQQ diff.....     367.61             532.46 
#> med  raw eQQ diff.....          0                  0 
#> max  raw eQQ diff.....     2110.2             8092.9 
#> 
#> mean eCDF diff........   0.050834           0.040137 
#> med  eCDF diff........   0.061954             0.0409 
#> max  eCDF diff........    0.10748           0.083845 
#> 
#> var ratio (Tr/Co).....     1.0763            0.64518 
#> T-test p-value........    0.38527            0.12154 
#> KS Bootstrap p-value..      0.038               0.02 
#> KS Naive p-value......    0.16449            0.06428 
#> KS Statistic..........    0.10748           0.083845 
#> 
#> 
#> Before Matching Minimum p.value: 0.0020368 
#> Variable Name(s): nodegr  Number(s): 6 
#> 
#> After Matching Minimum p.value: 0.002 
#> Variable Name(s): re74  Number(s): 7

# age 变平衡了
qqplot(lalonde$age[PSM$index.control],lalonde$age[PSM$index.treated])
abline(a=0,b=1)

Code


# re74 也变平衡了
qqplot(lalonde$re74[PSM$index.control],lalonde$re74[PSM$index.treated])
abline(a=0,b=1)

24.4.3 敏感性分析

Code

library(rbounds)
psens(x =lalonde[PSM$index.treated,"re78"],
      y =lalonde[PSM$index.control,"re78"] ,
      Gamma = 2,
      GammaInc = 0.1)
#> 
#>  Rosenbaum Sensitivity Test for Wilcoxon Signed Rank P-Value 
#>  
#> Unconfounded estimate ....  1e-04 
#> 
#>  Gamma Lower bound Upper bound
#>    1.0       1e-04      0.0001
#>    1.1       0e+00      0.0015
#>    1.2       0e+00      0.0142
#>    1.3       0e+00      0.0693
#>    1.4       0e+00      0.2048
#>    1.5       0e+00      0.4152
#>    1.6       0e+00      0.6393
#>    1.7       0e+00      0.8140
#>    1.8       0e+00      0.9191
#>    1.9       0e+00      0.9699
#>    2.0       0e+00      0.9903
#> 
#>  Note: Gamma is Odds of Differential Assignment To
#>  Treatment Due to Unobserved Factors 
#> 
# psens

对PSM（Y=re78）使用psens()进行Wilcoxon 符合秩检验，当τ=1.3，p值就大于0.05了，说明处理发生比为1.3时，就可以改变原先对于处理效应的结论，也就是说，这个隐藏性偏差不必太大就可以改变原来的结论，因此分析结果对隐藏性排除的影响非常敏感，结论不可靠。

对 PSM（Y=re78）使用Hodges-Lehmann点估计检验法 hlsens() ，当τ=1.5，其95%置信区间包含零，说明此时处理效应是无效的。说明处理发生比为1.5时，隐藏性偏差就可以改变原来的结论，因此匹配后的结论不可靠。

Code

x = lalonde[PSM$index.treated, "re78"]
y = lalonde[PSM$index.control, "re78"]
hlsens(x, y,Gamma = 2,GammaInc = 0.1)
#> 
#>  Rosenbaum Sensitivity Test for Hodges-Lehmann Point Estimate 
#>  
#> Unconfounded estimate ....  1527.95 
#> 
#>  Gamma Lower bound Upper bound
#>    1.0 1527.900000      1527.9
#>    1.1  917.250000      1580.2
#>    1.2  608.050000      1918.7
#>    1.3  338.450000      2141.0
#>    1.4  114.850000      2407.3
#>    1.5   -0.050046      2631.4
#>    1.6 -154.050000      2850.3
#>    1.7 -378.150000      3072.7
#>    1.8 -545.350000      3258.1
#>    1.9 -706.350000      3474.2
#>    2.0 -867.650000      3678.9
#> 
#>  Note: Gamma is Odds of Differential Assignment To
#>  Treatment Due to Unobserved Factors 
#>

共同支持域的查验

Code

sum(glm_ps$fitted.values[lalonde$treat==1]> 
        max(glm_ps$fitted.values[lalonde$treat==0]))
#> [1] 4

sum(glm_ps$fitted.values[lalonde$treat==1]< 
        min(glm_ps$fitted.values[lalonde$treat==0]))
#> [1] 0

丢弃的实验组样本共有4个。185-181

Code

attach(lalonde)
summary(PSM)
#> 
#> Estimate...  2439.3 
#> AI SE......  813.4 
#> T-stat.....  2.9989 
#> p.val......  0.0027099 
#> 
#> Original number of observations..............  445 
#> Original number of treated obs...............  185 
#> Matched number of observations...............  185 
#> Matched number of observations  (unweighted).  489
PSM_CS <-  Match(Y=re78,
             Tr = treat,
             X=glm_ps$fitted.values,
             estimand = "ATT",
             Weight.matrix = gen_match,
             replace = TRUE,
             CommonSupport = TRUE)
summary(PSM_CS)
#> 
#> Estimate...  2330 
#> AI SE......  821.6 
#> T-stat.....  2.836 
#> p.val......  0.0045684 
#> 
#> Original number of observations..............  430 
#> Original number of treated obs...............  181 
#> Matched number of observations...............  181 
#> Matched number of observations  (unweighted).  468
detach(lalonde)

有查验共同支持域的ATT（2330），与无查验共同支持域（2439.3）存在差异，因此必须重新改进倾向值分析。

24.4.4 MatchIt

24.4.4.1 匹配数据

Code

library(MatchIt)
NM <- MatchIt::matchit(
    formula = treat ~ age + educ + black + hisp + married + nodegr + re74  + re75,
    data = lalonde,
    method = "nearest", # 最近邻匹配
    ratio = 1, # 1:1
    replace = FALSE
)
summary(NM)
#> 
#> Call:
#> MatchIt::matchit(formula = treat ~ age + educ + black + hisp + 
#>     married + nodegr + re74 + re75, data = lalonde, method = "nearest", 
#>     replace = FALSE, ratio = 1)
#> 
#> Summary of Balance for All Data:
#>          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance        0.4377        0.4001          0.3935     1.0471    0.1117
#> age            25.8162       25.0538          0.1066     1.0278    0.0254
#> educ           10.3459       10.0885          0.1281     1.5513    0.0287
#> black           0.8432        0.8269          0.0449          .    0.0163
#> hisp            0.0595        0.1077         -0.2040          .    0.0482
#> married         0.1892        0.1538          0.0902          .    0.0353
#> nodegr          0.7081        0.8346         -0.2783          .    0.1265
#> re74         2095.5740     2107.0268         -0.0023     0.7381    0.0192
#> re75         1532.0556     1266.9092          0.0824     1.0763    0.0508
#>          eCDF Max
#> distance   0.2140
#> age        0.0652
#> educ       0.1265
#> black      0.0163
#> hisp       0.0482
#> married    0.0353
#> nodegr     0.1265
#> re74       0.0471
#> re75       0.1075
#> 
#> Summary of Balance for Matched Data:
#>          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance        0.4377        0.4267          0.1152     1.0987    0.0298
#> age            25.8162       25.8757         -0.0083     0.9551    0.0121
#> educ           10.3459       10.1459          0.0995     1.3748    0.0220
#> black           0.8432        0.8486         -0.0149          .    0.0054
#> hisp            0.0595        0.0649         -0.0229          .    0.0054
#> married         0.1892        0.2000         -0.0276          .    0.0108
#> nodegr          0.7081        0.7730         -0.1427          .    0.0649
#> re74         2095.5740     1659.5326          0.0892     1.1752    0.0351
#> re75         1532.0556     1359.6980          0.0535     0.9270    0.0502
#>          eCDF Max Std. Pair Dist.
#> distance   0.1027          0.1302
#> age        0.0432          0.8711
#> educ       0.0757          0.6533
#> black      0.0054          0.4906
#> hisp       0.0054          0.2057
#> married    0.0108          0.7177
#> nodegr     0.0649          0.2378
#> re74       0.0865          0.6297
#> re75       0.1081          0.7019
#> 
#> Sample Sizes:
#>           Control Treated
#> All           260     185
#> Matched       185     185
#> Unmatched      75       0
#> Discarded       0       0

24.4.4.2 评估质量

Code

# 散点图展示了匹配后实验组和对照组样本倾向值的分布，凸显了分布平衡与不平衡，分布缺乏重合
plot(NM,type = "jitter")

#> To identify the units, use first mouse button; to stop, use second.

Code

# QQ图 展示了 匹配前（All）匹配后（Matched）的平衡情况
plot(NM,type = "QQ")

Code


# 直方图展示了匹配前后倾向值的分布
plot(NM,type = "hist")

Code

# 标准化平衡统计值，Std. Mean Diff.
summary(NM,standardize = TRUE)
#> 
#> Call:
#> MatchIt::matchit(formula = treat ~ age + educ + black + hisp + 
#>     married + nodegr + re74 + re75, data = lalonde, method = "nearest", 
#>     replace = FALSE, ratio = 1)
#> 
#> Summary of Balance for All Data:
#>          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance        0.4377        0.4001          0.3935     1.0471    0.1117
#> age            25.8162       25.0538          0.1066     1.0278    0.0254
#> educ           10.3459       10.0885          0.1281     1.5513    0.0287
#> black           0.8432        0.8269          0.0449          .    0.0163
#> hisp            0.0595        0.1077         -0.2040          .    0.0482
#> married         0.1892        0.1538          0.0902          .    0.0353
#> nodegr          0.7081        0.8346         -0.2783          .    0.1265
#> re74         2095.5740     2107.0268         -0.0023     0.7381    0.0192
#> re75         1532.0556     1266.9092          0.0824     1.0763    0.0508
#>          eCDF Max
#> distance   0.2140
#> age        0.0652
#> educ       0.1265
#> black      0.0163
#> hisp       0.0482
#> married    0.0353
#> nodegr     0.1265
#> re74       0.0471
#> re75       0.1075
#> 
#> Summary of Balance for Matched Data:
#>          Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance        0.4377        0.4267          0.1152     1.0987    0.0298
#> age            25.8162       25.8757         -0.0083     0.9551    0.0121
#> educ           10.3459       10.1459          0.0995     1.3748    0.0220
#> black           0.8432        0.8486         -0.0149          .    0.0054
#> hisp            0.0595        0.0649         -0.0229          .    0.0054
#> married         0.1892        0.2000         -0.0276          .    0.0108
#> nodegr          0.7081        0.7730         -0.1427          .    0.0649
#> re74         2095.5740     1659.5326          0.0892     1.1752    0.0351
#> re75         1532.0556     1359.6980          0.0535     0.9270    0.0502
#>          eCDF Max Std. Pair Dist.
#> distance   0.1027          0.1302
#> age        0.0432          0.8711
#> educ       0.0757          0.6533
#> black      0.0054          0.4906
#> hisp       0.0054          0.2057
#> married    0.0108          0.7177
#> nodegr     0.0649          0.2378
#> re74       0.0865          0.6297
#> re75       0.1081          0.7019
#> 
#> Sample Sizes:
#>           Control Treated
#> All           260     185
#> Matched       185     185
#> Unmatched      75       0
#> Discarded       0       0

继续改进，调整模型和协变量

24.4.4.3 计算平均处理效应

为了简化步骤，以当前的结果进行匹配后分析。

Code


# 提取匹配后的样本
mData <- match.data(NM,group = "all")
mData_trt <- match.data(NM,group = "treat")
mData_ctrl <- match.data(NM,group = "control")
 
# 包从CRAN剔除了

Code

library(rbounds)
psens(x =mData_trt$re78,
      y =mData_ctrl$re78 ,Gamma = 2,GammaInc = 0.1)
#> 
#>  Rosenbaum Sensitivity Test for Wilcoxon Signed Rank P-Value 
#>  
#> Unconfounded estimate ....  0.0199 
#> 
#>  Gamma Lower bound Upper bound
#>    1.0      0.0199      0.0199
#>    1.1      0.0048      0.0639
#>    1.2      0.0010      0.1491
#>    1.3      0.0002      0.2749
#>    1.4      0.0000      0.4248
#>    1.5      0.0000      0.5755
#>    1.6      0.0000      0.7076
#>    1.7      0.0000      0.8108
#>    1.8      0.0000      0.8844
#>    1.9      0.0000      0.9328
#>    2.0      0.0000      0.9627
#> 
#>  Note: Gamma is Odds of Differential Assignment To
#>  Treatment Due to Unobserved Factors 
#> 

hlsens(x =mData_trt$re78,
      y =mData_ctrl$re78 ,Gamma = 2,GammaInc = 0.1)
#> 
#>  Rosenbaum Sensitivity Test for Hodges-Lehmann Point Estimate 
#>  
#> Unconfounded estimate ....  1435.08 
#> 
#>  Gamma Lower bound Upper bound
#>    1.0  1.4351e+03      1435.1
#>    1.1  8.0828e+02      1515.9
#>    1.2  4.6298e+02      1809.0
#>    1.3  1.8238e+02      2178.1
#>    1.4 -2.0266e-02      2439.4
#>    1.5 -2.4892e+02      2678.5
#>    1.6 -4.5162e+02      2937.5
#>    1.7 -6.7212e+02      3184.0
#>    1.8 -8.9732e+02      3462.6
#>    1.9 -1.1328e+03      3651.1
#>    2.0 -1.3022e+03      3848.9
#> 
#>  Note: Gamma is Odds of Differential Assignment To
#>  Treatment Due to Unobserved Factors 
#>

24.1 数据来源

24.2 估计倾向得分

24.3 匹配方法

24.3.1 Matching

24.3.2 一对一匹配ATT

24.3.3 一对一匹配ATE

24.3.4 一对多匹配 （ATT）

24.3.5 MachIt

24.4 示例

24.4.1 估计倾向值分数

24.4.2 检验平衡

24.4.3 敏感性分析

24.4.4 MatchIt

24.4.4.1 匹配数据

24.4.4.2 评估质量

24.4.4.3 计算平均处理效应

24.3.1 `Matching`

24.3.4 一对多匹配（ATT）

24.3.5 `MachIt`