1  PSM

1.1 数据

Show the code
####################       数据模拟     ################################

# 模拟了吸烟与心血管疾病 (CVD) 之间的关系,其中年龄和性别是潜在的混杂协变量

# CVD 是结果变量(当患者没有 CVD 时,CVD = 0;否则,CVD =1)。
# 吸烟是一个二进制变量,表示患者是否吸烟。性别(0 = 女性,1 = 男性)和年龄是两个混杂的协变量


x.Gender <-rep(0:1,c(400,600))# 400 females and 600 males

set.seed(2020)
x.Age <-round(abs(rnorm(1000,mean=45,sd=15)))


# 模拟数据的真实PS值
z<-(x.Age-45)/15-(x.Age-45)^2/100+2*x.Gender
tps <- exp(z)/(1+exp(z)) # The true PS

set.seed(123)
Smoke <- as.numeric(runif(1000)< tps)

z.y<-x.Gender +0.3*x.Age +5*Smoke-20
y<- exp(z.y)/(1+exp(z.y))
set.seed(123)
CVD <- as.numeric(runif(1000)< y)

set.seed(124)
x.Age.mask <-rbinom(1000, 1,0.2)# Missing completely at random
x.Age <- ifelse(x.Age.mask==1,NA, x.Age)


data <-data.frame(x.Age,x.Gender, Smoke, CVD)
head(data)
#>   x.Age x.Gender Smoke CVD
#> 1    51        0     1   1
#> 2    50        0     0   0
#> 3    29        0     0   0
#> 4    28        0     0   0
#> 5     3        0     0   0
#> 6    56        0     1   1

library(tableone)
tbl1 <- CreateTableOne(vars = c("x.Age","x.Gender","CVD"),
                       data = data,
                       factorVars = c("x.Gender","CVD"),
                       strata = "Smoke",smd = T)

tbl1 <- print(tbl1, showAllLevels = T, smd = T)
#>                    Stratified by Smoke
#>                     level 0             1             p      test SMD   
#>   n                         546           454                           
#>   x.Age (mean (SD))       43.30 (19.09) 46.73 (8.32)   0.001       0.233
#>   x.Gender (%)      0       297 (54.4)    103 (22.7)  <0.001       0.689
#>                     1       249 (45.6)    351 (77.3)                    
#>   CVD (%)           0       470 (86.1)    204 (44.9)  <0.001       0.960
#>                     1        76 (13.9)    250 (55.1)
tbl1
#>                    Stratified by Smoke
#>                     level 0               1               p        test
#>   n                 ""    "  546"         "  454"         ""       ""  
#>   x.Age (mean (SD)) ""    "43.30 (19.09)" "46.73 (8.32)"  " 0.001" ""  
#>   x.Gender (%)      "0"   "  297 (54.4) " "  103 (22.7) " "<0.001" ""  
#>                     "1"   "  249 (45.6) " "  351 (77.3) " ""       ""  
#>   CVD (%)           "0"   "  470 (86.1) " "  204 (44.9) " "<0.001" ""  
#>                     "1"   "   76 (13.9) " "  250 (55.1) " ""       ""  
#>                    Stratified by Smoke
#>                     SMD     
#>   n                 ""      
#>   x.Age (mean (SD)) " 0.233"
#>   x.Gender (%)      " 0.689"
#>                     ""      
#>   CVD (%)           " 0.960"
#>                     ""
# 吸烟组和非吸烟组之间的年龄和性别协变量存在显著差异



######################################## 缺失值处理和数据整洁          ################

map_df(data, ~sum(is.na(.x)))
#> # A tibble: 1 × 4
#>   x.Age x.Gender Smoke   CVD
#>   <int>    <int> <int> <int>
#> 1   185        0     0     0

# Gmd 是指基尼均值差
Hmisc::describe(data)
#> data 
#> 
#>  4  Variables      1000  Observations
#> --------------------------------------------------------------------------------
#> x.Age 
#>        n  missing distinct     Info     Mean      Gmd      .05      .10 
#>      815      185       83        1    44.83    17.22       20       26 
#>      .25      .50      .75      .90      .95 
#>       35       44       55       64       70 
#> 
#> lowest :   1   2   3   5   6, highest:  84  85  86  93 101
#> --------------------------------------------------------------------------------
#> x.Gender 
#>        n  missing distinct     Info      Sum     Mean      Gmd 
#>     1000        0        2     0.72      600      0.6   0.4805 
#> 
#> --------------------------------------------------------------------------------
#> Smoke 
#>        n  missing distinct     Info      Sum     Mean      Gmd 
#>     1000        0        2    0.744      454    0.454   0.4963 
#> 
#> --------------------------------------------------------------------------------
#> CVD 
#>        n  missing distinct     Info      Sum     Mean      Gmd 
#>     1000        0        2    0.659      326    0.326   0.4399 
#> 
#> --------------------------------------------------------------------------------

library(VIM)

set.seed(123)
data_knnimp <- kNN(data = data, k = 10, weights = "auto", imp_var = F)
map_df(data_knnimp, ~sum(is.na(.x)))
#> # A tibble: 1 × 4
#>   x.Age x.Gender Smoke   CVD
#>   <int>    <int> <int> <int>
#> 1     0        0     0     0

data<- data_knnimp %>% 
    mutate(across(-x.Age,as.factor))

1.2 PS估计模型 distance

1.2.1 逻辑回归

Show the code
############################      ########################
library(MatchIt)

glm(formula = Smoke ~ x.Age + x.Gender, family = binomial("logit") ,data = data)
#> 
#> Call:  glm(formula = Smoke ~ x.Age + x.Gender, family = binomial("logit"), 
#>     data = data)
#> 
#> Coefficients:
#> (Intercept)        x.Age    x.Gender1  
#>    -1.98523      0.02005      1.44400  
#> 
#> Degrees of Freedom: 999 Total (i.e. Null);  997 Residual
#> Null Deviance:       1378 
#> Residual Deviance: 1252  AIC: 1258


m.out <- matchit(formula = Smoke ~ x.Age + x.Gender, data = data)
m.out$model
#> 
#> Call:  glm(formula = Smoke ~ x.Age + x.Gender, family = structure(list(
#>     family = "quasibinomial", link = "logit", linkfun = function (mu) 
#>     .Call(C_logit_link, mu), linkinv = function (eta) 
#>     .Call(C_logit_linkinv, eta), variance = function (mu) 
#>     mu * (1 - mu), dev.resids = function (y, mu, wt) 
#>     .Call(C_binomial_dev_resids, y, mu, wt), aic = function (y, 
#>         n, mu, wt, dev) 
#>     NA, mu.eta = function (eta) 
#>     .Call(C_logit_mu_eta, eta), initialize = {
#>         if (NCOL(y) == 1) {
#>             if (is.factor(y)) 
#>                 y <- y != levels(y)[1L]
#>             n <- rep.int(1, nobs)
#>             y[weights == 0] <- 0
#>             if (any(y < 0 | y > 1)) 
#>                 stop("y values must be 0 <= y <= 1")
#>             mustart <- (weights * y + 0.5)/(weights + 1)
#>             m <- weights * y
#>             if ("quasibinomial" == "binomial" && any(abs(m - 
#>                 round(m)) > 0.001)) 
#>                 warning(gettextf("non-integer #successes in a %s glm!", 
#>                   "quasibinomial"), domain = NA)
#>         }
#>         else if (NCOL(y) == 2) {
#>             if ("quasibinomial" == "binomial" && any(abs(y - 
#>                 round(y)) > 0.001)) 
#>                 warning(gettextf("non-integer counts in a %s glm!", 
#>                   "quasibinomial"), domain = NA)
#>             n <- (y1 <- y[, 1L]) + y[, 2L]
#>             y <- y1/n
#>             if (any(n0 <- n == 0)) 
#>                 y[n0] <- 0
#>             weights <- weights * n
#>             mustart <- (n * y + 0.5)/(n + 1)
#>         }
#>         else stop(gettextf("for the '%s' family, y must be a vector of 0 and 1's\nor a 2 column matrix where col 1 is no. successes and col 2 is no. failures", 
#>             "quasibinomial"), domain = NA)
#>     }, validmu = function (mu) 
#>     all(is.finite(mu)) && all(0 < mu & mu < 1), valideta = function (eta) 
#>     TRUE, dispersion = NA_real_), class = "family"), data = structure(list(
#>     x.Age = c(51, 50, 29, 28, 3, 56, 59, 42, 78, 47, 32, 37, 
#>     63, 37, 43, 72, 71, 1, 37, 46, 78, 61, 50, 44, 58, 48, 37, 
#>     59, 45, 47, 37, 34, 61, 78, 51, 45, 41, 51, 37, 52, 59, 37, 
#>     40, 34, 27, 49, 45, 45, 55, 52, 42, 51, 35, 52, 47, 47, 42, 
#>     25, 36, 54, 74, 49, 21, 93, 59, 51, 59, 42, 46, 48, 57, 78, 
#>     37, 51, 31, 34, 18, 37, 46, 49, 54, 45, 78, 48, 30, 78, 17, 
#>     61, 73, 53, 22, 16, 52, 64, 42, 33, 50, 55, 37, 35, 19, 30, 
#>     36, 51, 56, 37, 40, 37, 49, 49, 40, 67, 60, 37, 49, 33, 27, 
#>     26, 43, 78, 40, 51, 78, 37, 26, 48, 56, 78, 78, 31, 47, 36, 
#>     44, 43, 29, 44, 71, 37, 85, 26, 45, 44, 78, 15, 52, 41, 28, 
#>     48, 46, 37, 40, 37, 45, 23, 51, 45, 28, 46, 35, 55, 54, 37, 
#>     37, 45, 16, 64, 50, 32, 17, 58, 64, 49, 37, 37, 37, 37, 37, 
#>     27, 61, 45, 41, 58, 29, 19, 81, 42, 59, 34, 26, 58, 63, 27, 
#>     56, 58, 37, 54, 29, 37, 51, 55, 34, 37, 39, 37, 37, 26, 62, 
#>     61, 44, 37, 53, 41, 78, 34, 52, 37, 30, 37, 28, 37, 53, 24, 
#>     43, 37, 33, 51, 62, 52, 37, 45, 53, 52, 20, 70, 6, 57, 52, 
#>     55, 51, 70, 46, 45, 78, 69, 47, 41, 15, 67, 37, 33, 45, 36, 
#>     68, 28, 26, 37, 64, 47, 51, 57, 37, 20, 42, 53, 43, 37, 22, 
#>     44, 60, 40, 37, 27, 36, 37, 9, 79, 37, 48, 32, 25, 47, 3, 
#>     56, 33, 61, 39, 51, 60, 58, 13, 2, 36, 64, 42, 50, 35, 37, 
#>     78, 37, 36, 59, 42, 63, 68, 67, 80, 78, 58, 47, 37, 46, 45, 
#>     48, 51, 44, 38, 42, 27, 37, 53, 47, 37, 41, 57, 55, 46, 15, 
#>     51, 59, 71, 40, 57, 38, 23, 37, 73, 46, 37, 39, 37, 25, 34, 
#>     30, 47, 56, 28, 48, 43, 47, 35, 32, 43, 53, 39, 40, 43, 35, 
#>     37, 70, 35, 51, 7, 27, 43, 55, 44, 84, 43, 51, 37, 60, 39, 
#>     48, 51, 45, 52, 78, 37, 30, 29, 73, 43, 35, 37, 28, 54, 21, 
#>     27, 63, 36, 55, 52, 45, 41, 45, 59, 72, 45, 45, 37, 33.5, 
#>     50, 61, 41, 41, 41, 48, 11, 34, 42, 41, 41, 40, 50, 42, 43, 
#>     61, 35, 25, 41, 59, 17, 46, 50, 51, 41, 27, 41, 41, 37, 53, 
#>     36, 57, 30, 60, 50, 42, 47, 32, 40, 35, 46, 33.5, 68, 54, 
#>     41, 36, 29, 67, 37, 36, 59, 40, 50, 50, 41, 46, 33, 50, 45, 
#>     65, 50, 50, 64, 41, 63, 53, 21, 41, 40, 43, 33.5, 41, 37, 
#>     50, 33, 43, 16, 68, 41, 31, 55, 42, 42, 42, 55, 66, 44, 33.5, 
#>     43, 31, 68, 38, 51, 22, 45, 41, 41, 45, 79, 63, 45, 63, 47, 
#>     68, 41, 39, 5, 21, 60, 16, 67, 46, 54, 35, 41, 43, 46, 45, 
#>     44, 47, 53, 38, 43, 32, 25, 23, 51, 57, 50, 69, 63, 56, 53, 
#>     40, 19, 44, 39, 41, 41, 36, 33, 50, 43, 78, 33.5, 39, 43, 
#>     59, 41, 55, 41, 41, 26, 54, 38, 70, 60, 42, 48, 24, 33.5, 
#>     36, 47, 50, 68, 41, 54, 82, 33, 41, 54, 51, 59, 25, 33.5, 
#>     82, 52, 26, 22, 35, 74, 41, 41, 53, 29, 51, 53, 33.5, 40, 
#>     14, 40, 63, 50, 48, 45, 38, 22, 59, 65, 49, 78, 50, 25, 50, 
#>     60, 37, 50, 42, 45, 52, 50, 29, 33.5, 32, 58, 52, 68, 43, 
#>     52, 69, 32, 47, 50, 28, 50, 46, 18, 50, 50, 41, 33.5, 20, 
#>     61, 41, 40, 13, 28, 49, 27, 53, 62, 33.5, 37, 51, 40, 64, 
#>     82, 57, 45, 22, 33.5, 33.5, 72, 42, 55, 30, 39, 21, 40, 34, 
#>     33.5, 50, 11, 50, 21, 52, 34, 49, 48, 44, 25, 52, 26, 43, 
#>     73, 61, 34, 35, 67, 18, 44, 42, 43, 25, 41, 47, 60, 50, 42, 
#>     32, 27, 26, 56, 54, 24, 50, 44, 20, 41, 47, 47, 39, 38, 56, 
#>     40, 36, 60, 59, 28, 79, 35, 29, 47, 101, 69, 50, 63, 54, 
#>     56, 78, 39, 70, 37, 49, 50, 31, 37, 41, 55, 69, 40, 44, 37, 
#>     33, 31, 67, 24, 41, 41, 68, 37, 50, 51, 67, 42, 41, 50, 44, 
#>     31, 81, 52, 33.5, 29, 32, 51, 33, 33.5, 39, 71, 57, 57, 63, 
#>     62, 65, 34, 55, 34, 2, 18, 57, 43, 63, 57, 59, 35, 41, 50, 
#>     28, 46, 37, 33.5, 40, 52, 11, 33, 50, 50, 56, 48, 39, 50, 
#>     33, 50, 73, 39, 26, 48, 54, 69, 51, 50, 45, 57, 56, 48, 27, 
#>     33.5, 51, 23, 33.5, 66, 39, 40, 23, 45, 26, 84, 54, 49, 64, 
#>     47, 47, 13, 66, 20, 40, 41, 39, 50, 41, 69, 48, 59, 35, 51, 
#>     33.5, 35, 47, 54, 30, 56, 34, 41, 50, 76, 33.5, 47, 41, 44, 
#>     58, 59, 44, 33.5, 31, 40, 43, 56, 50, 33.5, 54, 58, 52, 50, 
#>     34, 41, 51, 38, 69, 53, 41, 31, 40, 34, 33.5, 49, 34, 41, 
#>     22, 43, 54, 51, 42, 57, 48, 40, 65, 33, 27, 50, 69, 77, 31, 
#>     45, 60, 20, 59, 50, 50, 32, 33.5, 55, 55, 56, 26, 67, 46, 
#>     33.5, 40, 33, 30, 41, 57, 73, 20, 40, 26, 33.5, 49, 33.5, 
#>     34, 65, 50, 41, 37, 66, 41, 50, 50, 30, 33, 41, 49, 48, 37, 
#>     64, 32, 42, 48, 22, 33.5, 35, 34, 24, 41, 51, 86, 33.5, 41, 
#>     35, 55, 50, 57, 8, 41, 41, 61, 23, 43, 54, 27, 50, 58, 31, 
#>     46, 43, 52, 41, 51, 43, 41, 50, 58, 39, 41, 33.5, 35, 23, 
#>     42, 50, 19, 33, 32, 64, 25, 36, 63, 20, 24, 34, 82, 36, 41, 
#>     33.5, 51, 43, 70, 39, 57, 31), x.Gender = structure(c(1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("0", "1"), class = "factor"), 
#>     Smoke = structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 
#>     2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
#>     2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
#>     2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
#>     1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 
#>     1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
#>     1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 
#>     1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 
#>     2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
#>     1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 
#>     2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 
#>     1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 
#>     2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
#>     2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 
#>     2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
#>     2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 
#>     2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 
#>     1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 
#>     2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 
#>     2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 
#>     1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 
#>     1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
#>     2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 
#>     2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 
#>     2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 
#>     2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 
#>     2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 
#>     1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L
#>     ), levels = c("0", "1"), class = "factor"), CVD = structure(c(2L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 
#>     2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 
#>     1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 
#>     2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 
#>     1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
#>     2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 
#>     2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
#>     1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
#>     2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 
#>     1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 
#>     2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 
#>     1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
#>     2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), levels = c("0", "1"), class = "factor")), row.names = c(NA, 
#> -1000L), class = "data.frame"))
#> 
#> Coefficients:
#> (Intercept)        x.Age    x.Gender1  
#>    -1.98523      0.02005      1.44400  
#> 
#> Degrees of Freedom: 999 Total (i.e. Null);  997 Residual
#> Null Deviance:       1378 
#> Residual Deviance: 1252  AIC: NA

# 估计的PS值
eps <- m.out$distance

# 模拟数据的真实PS值
z<-(x.Age-45)/15-(x.Age-45)^2/100+2*x.Gender

tps <- exp(z)/(1+exp(z)) # The true PS
tps_comp <- tps[complete.cases(data)]

smoke_comp <- Smoke[complete.cases(data)] %>% as.factor()

df <- tibble(
    True = tps_comp,
    Estimate = eps,  # 估计如此糟糕(就像 sin 函数一样),是因为我们通过二次方程生成 PS
    smoke= smoke_comp
)
df
#> # A tibble: 1,000 × 3
#>        True Estimate smoke
#>       <dbl>    <dbl> <fct>
#>  1  5.10e-1    0.276 1    
#>  2  5.21e-1    0.272 0    
#>  3  2.59e-2    0.197 0    
#>  4  1.76e-2    0.194 0    
#>  5  1.33e-9    0.127 0    
#>  6  3.83e-1    0.297 1    
#>  7  2.64e-1    0.310 0    
#>  8  4.28e-1    0.242 0    
#>  9 NA          0.396 0    
#> 10  5.23e-1    0.261 1    
#> # ℹ 990 more rows

ggplot(df, aes(True, Estimate, colour = smoke))+
    geom_point()+
    geom_abline(intercept = 0,slope = 1,color = "#990000",
                linetype= "dashed")

Show the code


m.out2 <- matchit(formula = Smoke ~ x.Age + I(x.Age^2) + x.Gender, data = data)

eps2 <- m.out2$distance
df <- tibble(
    True = tps_comp,
    Estimate = eps2,  # 估计如此糟糕(就像 sin 函数一样),是因为我们通过二次方程生成 PS
    smoke= smoke_comp
)
ggplot(df, aes(True, Estimate, colour = smoke))+
    geom_point()+
    geom_abline(intercept = 0,slope = 1,color = "#990000",
                linetype= "dashed")

Show the code

m.out2$model
#> 
#> Call:  glm(formula = Smoke ~ x.Age + I(x.Age^2) + x.Gender, family = structure(list(
#>     family = "quasibinomial", link = "logit", linkfun = function (mu) 
#>     .Call(C_logit_link, mu), linkinv = function (eta) 
#>     .Call(C_logit_linkinv, eta), variance = function (mu) 
#>     mu * (1 - mu), dev.resids = function (y, mu, wt) 
#>     .Call(C_binomial_dev_resids, y, mu, wt), aic = function (y, 
#>         n, mu, wt, dev) 
#>     NA, mu.eta = function (eta) 
#>     .Call(C_logit_mu_eta, eta), initialize = {
#>         if (NCOL(y) == 1) {
#>             if (is.factor(y)) 
#>                 y <- y != levels(y)[1L]
#>             n <- rep.int(1, nobs)
#>             y[weights == 0] <- 0
#>             if (any(y < 0 | y > 1)) 
#>                 stop("y values must be 0 <= y <= 1")
#>             mustart <- (weights * y + 0.5)/(weights + 1)
#>             m <- weights * y
#>             if ("quasibinomial" == "binomial" && any(abs(m - 
#>                 round(m)) > 0.001)) 
#>                 warning(gettextf("non-integer #successes in a %s glm!", 
#>                   "quasibinomial"), domain = NA)
#>         }
#>         else if (NCOL(y) == 2) {
#>             if ("quasibinomial" == "binomial" && any(abs(y - 
#>                 round(y)) > 0.001)) 
#>                 warning(gettextf("non-integer counts in a %s glm!", 
#>                   "quasibinomial"), domain = NA)
#>             n <- (y1 <- y[, 1L]) + y[, 2L]
#>             y <- y1/n
#>             if (any(n0 <- n == 0)) 
#>                 y[n0] <- 0
#>             weights <- weights * n
#>             mustart <- (n * y + 0.5)/(n + 1)
#>         }
#>         else stop(gettextf("for the '%s' family, y must be a vector of 0 and 1's\nor a 2 column matrix where col 1 is no. successes and col 2 is no. failures", 
#>             "quasibinomial"), domain = NA)
#>     }, validmu = function (mu) 
#>     all(is.finite(mu)) && all(0 < mu & mu < 1), valideta = function (eta) 
#>     TRUE, dispersion = NA_real_), class = "family"), data = structure(list(
#>     x.Age = c(51, 50, 29, 28, 3, 56, 59, 42, 78, 47, 32, 37, 
#>     63, 37, 43, 72, 71, 1, 37, 46, 78, 61, 50, 44, 58, 48, 37, 
#>     59, 45, 47, 37, 34, 61, 78, 51, 45, 41, 51, 37, 52, 59, 37, 
#>     40, 34, 27, 49, 45, 45, 55, 52, 42, 51, 35, 52, 47, 47, 42, 
#>     25, 36, 54, 74, 49, 21, 93, 59, 51, 59, 42, 46, 48, 57, 78, 
#>     37, 51, 31, 34, 18, 37, 46, 49, 54, 45, 78, 48, 30, 78, 17, 
#>     61, 73, 53, 22, 16, 52, 64, 42, 33, 50, 55, 37, 35, 19, 30, 
#>     36, 51, 56, 37, 40, 37, 49, 49, 40, 67, 60, 37, 49, 33, 27, 
#>     26, 43, 78, 40, 51, 78, 37, 26, 48, 56, 78, 78, 31, 47, 36, 
#>     44, 43, 29, 44, 71, 37, 85, 26, 45, 44, 78, 15, 52, 41, 28, 
#>     48, 46, 37, 40, 37, 45, 23, 51, 45, 28, 46, 35, 55, 54, 37, 
#>     37, 45, 16, 64, 50, 32, 17, 58, 64, 49, 37, 37, 37, 37, 37, 
#>     27, 61, 45, 41, 58, 29, 19, 81, 42, 59, 34, 26, 58, 63, 27, 
#>     56, 58, 37, 54, 29, 37, 51, 55, 34, 37, 39, 37, 37, 26, 62, 
#>     61, 44, 37, 53, 41, 78, 34, 52, 37, 30, 37, 28, 37, 53, 24, 
#>     43, 37, 33, 51, 62, 52, 37, 45, 53, 52, 20, 70, 6, 57, 52, 
#>     55, 51, 70, 46, 45, 78, 69, 47, 41, 15, 67, 37, 33, 45, 36, 
#>     68, 28, 26, 37, 64, 47, 51, 57, 37, 20, 42, 53, 43, 37, 22, 
#>     44, 60, 40, 37, 27, 36, 37, 9, 79, 37, 48, 32, 25, 47, 3, 
#>     56, 33, 61, 39, 51, 60, 58, 13, 2, 36, 64, 42, 50, 35, 37, 
#>     78, 37, 36, 59, 42, 63, 68, 67, 80, 78, 58, 47, 37, 46, 45, 
#>     48, 51, 44, 38, 42, 27, 37, 53, 47, 37, 41, 57, 55, 46, 15, 
#>     51, 59, 71, 40, 57, 38, 23, 37, 73, 46, 37, 39, 37, 25, 34, 
#>     30, 47, 56, 28, 48, 43, 47, 35, 32, 43, 53, 39, 40, 43, 35, 
#>     37, 70, 35, 51, 7, 27, 43, 55, 44, 84, 43, 51, 37, 60, 39, 
#>     48, 51, 45, 52, 78, 37, 30, 29, 73, 43, 35, 37, 28, 54, 21, 
#>     27, 63, 36, 55, 52, 45, 41, 45, 59, 72, 45, 45, 37, 33.5, 
#>     50, 61, 41, 41, 41, 48, 11, 34, 42, 41, 41, 40, 50, 42, 43, 
#>     61, 35, 25, 41, 59, 17, 46, 50, 51, 41, 27, 41, 41, 37, 53, 
#>     36, 57, 30, 60, 50, 42, 47, 32, 40, 35, 46, 33.5, 68, 54, 
#>     41, 36, 29, 67, 37, 36, 59, 40, 50, 50, 41, 46, 33, 50, 45, 
#>     65, 50, 50, 64, 41, 63, 53, 21, 41, 40, 43, 33.5, 41, 37, 
#>     50, 33, 43, 16, 68, 41, 31, 55, 42, 42, 42, 55, 66, 44, 33.5, 
#>     43, 31, 68, 38, 51, 22, 45, 41, 41, 45, 79, 63, 45, 63, 47, 
#>     68, 41, 39, 5, 21, 60, 16, 67, 46, 54, 35, 41, 43, 46, 45, 
#>     44, 47, 53, 38, 43, 32, 25, 23, 51, 57, 50, 69, 63, 56, 53, 
#>     40, 19, 44, 39, 41, 41, 36, 33, 50, 43, 78, 33.5, 39, 43, 
#>     59, 41, 55, 41, 41, 26, 54, 38, 70, 60, 42, 48, 24, 33.5, 
#>     36, 47, 50, 68, 41, 54, 82, 33, 41, 54, 51, 59, 25, 33.5, 
#>     82, 52, 26, 22, 35, 74, 41, 41, 53, 29, 51, 53, 33.5, 40, 
#>     14, 40, 63, 50, 48, 45, 38, 22, 59, 65, 49, 78, 50, 25, 50, 
#>     60, 37, 50, 42, 45, 52, 50, 29, 33.5, 32, 58, 52, 68, 43, 
#>     52, 69, 32, 47, 50, 28, 50, 46, 18, 50, 50, 41, 33.5, 20, 
#>     61, 41, 40, 13, 28, 49, 27, 53, 62, 33.5, 37, 51, 40, 64, 
#>     82, 57, 45, 22, 33.5, 33.5, 72, 42, 55, 30, 39, 21, 40, 34, 
#>     33.5, 50, 11, 50, 21, 52, 34, 49, 48, 44, 25, 52, 26, 43, 
#>     73, 61, 34, 35, 67, 18, 44, 42, 43, 25, 41, 47, 60, 50, 42, 
#>     32, 27, 26, 56, 54, 24, 50, 44, 20, 41, 47, 47, 39, 38, 56, 
#>     40, 36, 60, 59, 28, 79, 35, 29, 47, 101, 69, 50, 63, 54, 
#>     56, 78, 39, 70, 37, 49, 50, 31, 37, 41, 55, 69, 40, 44, 37, 
#>     33, 31, 67, 24, 41, 41, 68, 37, 50, 51, 67, 42, 41, 50, 44, 
#>     31, 81, 52, 33.5, 29, 32, 51, 33, 33.5, 39, 71, 57, 57, 63, 
#>     62, 65, 34, 55, 34, 2, 18, 57, 43, 63, 57, 59, 35, 41, 50, 
#>     28, 46, 37, 33.5, 40, 52, 11, 33, 50, 50, 56, 48, 39, 50, 
#>     33, 50, 73, 39, 26, 48, 54, 69, 51, 50, 45, 57, 56, 48, 27, 
#>     33.5, 51, 23, 33.5, 66, 39, 40, 23, 45, 26, 84, 54, 49, 64, 
#>     47, 47, 13, 66, 20, 40, 41, 39, 50, 41, 69, 48, 59, 35, 51, 
#>     33.5, 35, 47, 54, 30, 56, 34, 41, 50, 76, 33.5, 47, 41, 44, 
#>     58, 59, 44, 33.5, 31, 40, 43, 56, 50, 33.5, 54, 58, 52, 50, 
#>     34, 41, 51, 38, 69, 53, 41, 31, 40, 34, 33.5, 49, 34, 41, 
#>     22, 43, 54, 51, 42, 57, 48, 40, 65, 33, 27, 50, 69, 77, 31, 
#>     45, 60, 20, 59, 50, 50, 32, 33.5, 55, 55, 56, 26, 67, 46, 
#>     33.5, 40, 33, 30, 41, 57, 73, 20, 40, 26, 33.5, 49, 33.5, 
#>     34, 65, 50, 41, 37, 66, 41, 50, 50, 30, 33, 41, 49, 48, 37, 
#>     64, 32, 42, 48, 22, 33.5, 35, 34, 24, 41, 51, 86, 33.5, 41, 
#>     35, 55, 50, 57, 8, 41, 41, 61, 23, 43, 54, 27, 50, 58, 31, 
#>     46, 43, 52, 41, 51, 43, 41, 50, 58, 39, 41, 33.5, 35, 23, 
#>     42, 50, 19, 33, 32, 64, 25, 36, 63, 20, 24, 34, 82, 36, 41, 
#>     33.5, 51, 43, 70, 39, 57, 31), x.Gender = structure(c(1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("0", "1"), class = "factor"), 
#>     Smoke = structure(c(2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 
#>     2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 
#>     2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
#>     2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
#>     1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 
#>     1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
#>     1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 
#>     1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 
#>     2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
#>     1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 
#>     2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 
#>     1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 
#>     2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
#>     2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 
#>     2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 
#>     2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 
#>     2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 
#>     2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 
#>     1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 
#>     2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 
#>     2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 
#>     1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 
#>     1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
#>     2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 
#>     2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 
#>     2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 
#>     2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 
#>     2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 
#>     1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 2L
#>     ), levels = c("0", "1"), class = "factor"), CVD = structure(c(2L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 
#>     2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 
#>     1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 
#>     2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 
#>     1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
#>     2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 
#>     2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 
#>     1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 
#>     2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 
#>     2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 
#>     1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 
#>     1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 
#>     2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 
#>     1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 
#>     2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 
#>     2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 
#>     1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
#>     2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 
#>     2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 
#>     1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L), levels = c("0", "1"), class = "factor")), row.names = c(NA, 
#> -1000L), class = "data.frame"))
#> 
#> Coefficients:
#> (Intercept)        x.Age   I(x.Age^2)    x.Gender1  
#>   -28.99771      1.19555     -0.01223      2.12637  
#> 
#> Degrees of Freedom: 999 Total (i.e. Null);  996 Residual
#> Null Deviance:       1378 
#> Residual Deviance: 819.7     AIC: NA

1.2.2 分类和回归树

Show the code
########################## ##########
m.out_rpart <- matchit(formula = Smoke ~ x.Age + x.Gender, data = data,
                distance = "rpart")

tibble(True = tps_comp,
       Estimate = m.out_rpart$distance,
       smoke = smoke_comp) %>%
    ggplot(aes(True, Estimate, colour = smoke)) +
    geom_point() +
    geom_abline(
        intercept = 0,
        slope = 1,
        color = "#990000",
        linetype = "dashed"
    )

Show the code

m.out_rpart2 <- matchit(formula = Smoke ~ x.Age + I(x.Age^2) + x.Gender, data = data,
                       distance = "rpart")

tibble(True = tps_comp,
       Estimate = m.out_rpart2$distance,
       smoke = smoke_comp) %>%
    ggplot(aes(True, Estimate, colour = smoke)) +
    geom_point() +
    geom_abline(
        intercept = 0,
        slope = 1,
        color = "#990000",
        linetype = "dashed"
    )

1.2.3 随机森林

Show the code
############################# #################

library(randomForest)
set.seed(123)
rf_psm <- randomForest(formula =  Smoke ~ x.Age + I(x.Age^2) + x.Gender, data =data)

rf_psm
#> 
#> Call:
#>  randomForest(formula = Smoke ~ x.Age + I(x.Age^2) + x.Gender,      data = data) 
#>                Type of random forest: classification
#>                      Number of trees: 500
#> No. of variables tried at each split: 1
#> 
#>         OOB estimate of  error rate: 18.7%
#> Confusion matrix:
#>     0   1 class.error
#> 0 445 101   0.1849817
#> 1  86 368   0.1894273


tibble(True = tps_comp,
       Estimate = rf_psm$votes[,2],
       smoke = smoke_comp) %>%
    ggplot(aes(True, Estimate, colour = smoke)) +
    geom_point() +
    geom_abline(
        intercept = 0,
        slope = 1,
        color = "#990000",
        linetype = "dashed"
    )

1.3 距离匹配算法 method

a caliper width of 0.2 of the standard deviation (SD) of the logit of PS be used

Show the code
#######  最近邻匹配 (NNM)      ##############

logit_knn_ratio <- matchit(formula = Smoke ~ x.Age + x.Gender, data = data,
                 method = "nearest", replace = T, ratio = 3)

logit_knn_ratio$match.matrix %>% head()
#>    [,1]  [,2]  [,3] 
#> 1  "104" "295" "167"
#> 6  "105" "193" "283"
#> 10 "55"  "131" "281"
#> 15 "119" "134" "223"
#> 29 "82"  "180" "230"
#> 30 "55"  "131" "281"

logit_knn_ratio$discarded %>% sum() # 记录是否有丢弃的患者
#> [1] 0




##########    最佳匹配 (OM)         #################




###########      遗传匹配 (GM) 是 PSM的扩展     ################
# 进化搜索算法
# 当除 PS 之外的其他协变量的所有权重都设置为 0 时,GM 将等效于 PSM。










#################  full matchin  #############3

如果使用最佳匹配,则必须安装 optmatch 包,而基因匹配需要 rgenound 包。

1.4 平衡诊断

1.4.1 SMD 和 VR

比较治疗组和对照组中协变量的标准化均数差 (SMD) 和方差比 (VR)

  1. SMD 值为 <0.1 ,至少<0.25。

  2. VR 是治疗组和对照组的方差比。接近 1.0 的 VR 表示协变量平衡,而 VR <0.5 或 >2.0 被认为“太极端”

Show the code
summary(m.out, standardize = T)
#> 
#> Call:
#> matchit(formula = Smoke ~ x.Age + x.Gender, data = data)
#> 
#> Summary of Balance for All Data:
#>           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance         0.5199        0.3992          0.8545     0.6324    0.1981
#> x.Age           46.6476       43.0769          0.4638     0.1743    0.1277
#> x.Gender0        0.2269        0.5440         -0.7571          .    0.3171
#> x.Gender1        0.7731        0.4560          0.7571          .    0.3171
#>           eCDF Max
#> distance    0.5027
#> x.Age       0.4217
#> x.Gender0   0.3171
#> x.Gender1   0.3171
#> 
#> Summary of Balance for Matched Data:
#>           Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
#> distance         0.5199        0.4402          0.5646     0.7389    0.1388
#> x.Age           46.6476       46.3062          0.0443     0.1857    0.1108
#> x.Gender0        0.2269        0.4537         -0.5417          .    0.2269
#> x.Gender1        0.7731        0.5463          0.5417          .    0.2269
#>           eCDF Max Std. Pair Dist.
#> distance    0.4581          0.7250
#> x.Age       0.3282          2.2455
#> x.Gender0   0.2269          0.5417
#> x.Gender1   0.2269          0.5417
#> 
#> Sample Sizes:
#>           Control Treated
#> All           546     454
#> Matched       454     454
#> Unmatched      92       0
#> Discarded       0       0
#  summary 命令不加选择地计算离散变量和连续变量的 SMD,从而导致离散变量的偏差(被视为连续变量以检查摘要中的smd)




after_match <- match.data(m.out)

# 由 CreateTableOne 函数汇总的匹配数据的基线协变量。
tbl2 <- CreateTableOne(vars = c("x.Age","x.Gender","CVD"),
                       data = after_match,
                       factorVars = c("x.Gender","CVD"),
                       strata = "Smoke",smd = T)
# 此函数仅考虑匹配数据,并且均值差值由匹配数据 标准化(除以 SD)。匹配后,匹配数据中的标准差可能会更小,因此 SMD 可能比匹配前更大,尽管平均差值减小
print(tbl2, showAllLevels = T, smd = T)
#>                    Stratified by Smoke
#>                     level 0             1             p      test SMD   
#>   n                         454           454                           
#>   x.Age (mean (SD))       46.31 (17.87) 46.65 (7.70)   0.709       0.025
#>   x.Gender (%)      0       206 (45.4)    103 (22.7)  <0.001       0.493
#>                     1       248 (54.6)    351 (77.3)                    
#>   CVD (%)           0       379 (83.5)    204 (44.9)  <0.001       0.878
#>                     1        75 (16.5)    250 (55.1)

更好的方法是使用原始 SD,它由 cobalt 包中的 bal.tab 函数实现

Show the code
if(!require(cobalt)) install.packages("cobalt")

bal.tab(m.out, thresholds = c(m = 0.1, v = 2))
#> Balance Measures
#>              Type Diff.Adj        M.Threshold V.Ratio.Adj      V.Threshold
#> distance Distance   0.5646                         0.7389     Balanced, <2
#> x.Age     Contin.   0.0443     Balanced, <0.1      0.1857 Not Balanced, >2
#> x.Gender   Binary   0.2269 Not Balanced, >0.1           .                 
#> 
#> Balance tally for mean differences
#>                    count
#> Balanced, <0.1         1
#> Not Balanced, >0.1     1
#> 
#> Variable with the greatest mean difference
#>  Variable Diff.Adj        M.Threshold
#>  x.Gender   0.2269 Not Balanced, >0.1
#> 
#> Balance tally for variance ratios
#>                  count
#> Balanced, <2         1
#> Not Balanced, >2     1
#> 
#> Variable with the greatest variance ratio
#>  Variable V.Ratio.Adj      V.Threshold
#>     x.Age      0.1857 Not Balanced, >2
#> 
#> Sample sizes
#>           Control Treated
#> All           546     454
#> Matched       454     454
#> Unmatched      92       0

1.4.2 显著性检验

有人批评 P 值的增加可能是由于 PSM 后研究人群的样本量减少 。应该使用解释匹配设计的统计数据,例如用于连续结果的配对 t 检验和 1:1 匹配设计中用于二分结果的 McNemar 检验 。这种做法很少在文献中实施或明确说明。

Show the code
head(after_match)
#>   x.Age x.Gender Smoke CVD  distance weights subclass
#> 1    51        0     1   1 0.2763829       1        1
#> 2    50        0     0   0 0.2723902       1        3
#> 6    56        0     1   1 0.2968789       1      220
#> 7    59        0     0   0 0.3095888       1      247
#> 8    42        0     0   0 0.2417770       1       65
#> 9    78        0     0   1 0.3962738       1      355

Weights 列是为每个样本提供的权重。这些权重是通过精心设计的规则计算的,以确保匹配的治疗组和对照组的权重相似。可以看出,1 名参与者(编号 7)被多次匹配,导出的数据中的权重为 0.187 。在这种情况下,可以通过使用weigts 包中带有 wtd.t.test() 函数的加权 t 检验来执行带有权重的统计

Show the code
if(!require(weights, quietly = T)) install.packages("weights")
after_match %>% DT::datatable()
Show the code
attach(after_match)
age_trt <- after_match[Smoke== 1, "x.Age"]
weight_trt <- after_match[Smoke== 1, "weights"]

age_ctrl <- after_match[Smoke== 0, "x.Age"]
weight_ctrl <- after_match[Smoke== 0, "weights"]


wtd.t.test(x = age_ctrl, y = age_trt,
        weight = weight_ctrl, weighty = weight_trt)
#> $test
#> [1] "Two Sample Weighted T-Test (Welch)"
#> 
#> $coefficients
#>     t.value          df     p.value 
#>  -0.1176369 848.0506954   0.9063832 
#> 
#> $additional
#> Difference     Mean.x     Mean.y   Std. Err 
#> -0.1085573 46.4285714 46.5371287  0.9228169
detach(after_match)

t 检验,则应对协变量值的顺序进行排序

Show the code
match_trt <- data[rownames(m.out$match.matrix),]

matx <- m.out$match.matrix

dim(matx) <- c(dim(matx)[1]* dim(matx)[2],1)

match_ctrl <- data[matx, ]

t.test(match_ctrl$x.Age, match_trt$x.Age, paired = F)
#> 
#>  Welch Two Sample t-test
#> 
#> data:  match_ctrl$x.Age and match_trt$x.Age
#> t = -0.3739, df = 615.61, p-value = 0.7086
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  -2.134594  1.451775
#> sample estimates:
#> mean of x mean of y 
#>  46.30617  46.64758

Kolmogorov-Smirnov (KS) 统计

cobalt 包中的函数 bal.tab 还提供 KS 检验,该检验比较处理组和对照组之间协变量的分布。唯一的要求是设置 ks.threshold 参数。

1.4.3 分布可视化

直方图、分位数-分位数图、累积分布函数 ,love plots

Show the code
plot(m.out)

Show the code
plot(m.out, type= "jitter")

#> To identify the units, use first mouse button; to stop, use second.
plot(m.out, type= "hist")

Show the code


bal.plot(m.out,var.name = "x.Age", which = "both",
         grid = T)

Show the code
bal.plot(m.out,var.name = "x.Gender", which = "both",
         grid = T)

Show the code
bal.plot(m.out,var.name = "x.Age", which = "both",
         grid = T, type = "ecdf")

Show the code


love.plot(bal.tab(m.out, thresholds = c(m = 0.1, v = 2)),
          grid = T, stars = "raw", 
          abs = F,stats = "mean.diffs"
          )

1.4.4 拟合优度指标

c-statistics, or area under the receiver operating characteristic curve (AUROC)

。但是,这些统计数据没有提供关于是否正确指定 PS 型号的信息

1.4.5 如果协变量在匹配后仍然不平衡,该怎么办?

  1. 更改公式 formula,甚至模型 distance , 机器学习

  2. 更改匹配方法 method,更改更小的卡尺 caliper = 0.1pop.size=100 的 GM 

  3. 将 PSM 与精确匹配方法相结合 exact=c('x.Gender') , 不推荐

  4. 增加样本量

  5. PSM 后基线协变量的残差可以通过匹配、分层、回归调整等常规方法处理

1.5 非随机试验中的二分类治疗效果

在 Rubin 提出的反事实框架下,对于每个个体,在治疗分配之前有两种潜在结果(在治疗或控制条件下)。一旦患者接受治疗或对照,研究人员就会观察到两种结果中的一种,但另一种结果缺失。在某种程度上,观察性研究中的数据分析就像缺失结果的插值。

Abadie 和 Imbens 提出了一个适合匹配的治疗效果估计量。在匹配数据中,所有协变量在处理组和对照组之间具有相似的分布。这意味着匹配的患者被认为是相似的。使用匹配的对照组的平均结果对治疗组的缺失结果进行插值,反之亦然。假设接受治疗的患者具有平均结果 (A)。在估计 PS 并进行匹配后,我们将对照条件下接受治疗的患者的结局与匹配患者的结局进行插值。如果他们没有接受治疗,他们对结果的期望应该是 B。因此,我们在匹配数据上比较两个平均结果 (A vs. B),以估计对接受治疗的平均治疗效果。

the average treatment effect (ATE) and the average treatment effect on the treated (ATT).

1.5.1 ATT

Zelig 包估计 Monte Carlo 模拟的因果效应,但Zelig 包不再维护更新

Translating Zelig to clarify

Show the code
library("clarify")
after_match <- match.data(m.out)

fit <- glm(CVD ~ Smoke + x.Age + x.Gender, data = after_match,
           family = binomial(link = "logit"),weights = after_match$weights )


set.seed(123)
sim_coefs <- clarify::sim(fit)
sim_coefs
#> A `clarify_sim` object
#>  - 4 coefficients, 1000 simulated values
#>  - sampled distribution: multivariate normal
#>  - original fitting function call:
#> 
#> glm(formula = CVD ~ Smoke + x.Age + x.Gender, family = binomial(link = "logit"), 
#>     data = after_match, weights = after_match$weights)

est <- sim_ame(sim_coefs, var = "Smoke", subset = Smoke == 1,
               contrast = "rr" , verbose = F)   # "diff" ,sr, ar

est
#> A `clarify_est` object (from `sim_ame()`)
#>  - Average adjusted predictions for `Smoke`
#>  - 1000 simulated values
#>  - 3 quantities estimated:                    
#>  E[Y(0)]  0.01111425
#>  E[Y(1)]  0.55066079
#>  RR      49.54547289
summary(est)
#>         Estimate    2.5 %   97.5 %
#> E[Y(0)]  0.01111  0.00722  0.01750
#> E[Y(1)]  0.55066  0.52059  0.57988
#> RR      49.54547 31.86784 76.39909

# 绘制 ATT
plot(est)

Show the code
ATT_func <- function(fit) {
  d <- subset(data, Smoke == 1)
  d$treat <- 1
  p1 <- mean(predict(fit, newdata = d, type = "response"))
  d$treat <- 0
  p0 <- mean(predict(fit, newdata = d, type = "response"))
  c(`E[Y(0)]` = p0, `E[Y(1)]` = p1, `RR` = p1 / p0)
}

sim_est <- sim_apply(sim = sim_coefs, ATT_func )

sim_est
#> A `clarify_est` object (from `sim_apply()`)
#>  - 1000 simulated values
#>  - 3 quantities estimated:                  
#>  E[Y(0)] 0.5506608
#>  E[Y(1)] 0.5506608
#>  RR      1.0000000

summary(sim_est)
#>         Estimate 2.5 % 97.5 %
#> E[Y(0)]    0.551 0.521  0.580
#> E[Y(1)]    0.551 0.521  0.580
#> RR         1.000 1.000  1.000

1.5.2 敏感性分析

1.5.2.1 Rosenbaum 的原始敏感性分析

常用函数 binarysens 和 psens。前者用于二元结果,后者用于连续或顺序结果。

Show the code
library(rbounds)

1.5.2.2 E 值

Show the code
if(!require(EValue)) install.packages("EValue")

evalue(est = RR(0.8), lo = .7, hi = .9)
#>             point lower    upper
#> RR       0.800000   0.7 0.900000
#> E-values 1.809017    NA 1.462475

1.5.2.3 倾向得分校准 (PSC)

Show the code
mdl1 <- glm(Smoke ~ x.Age,family = binomial, data = data[1:300,])
PS_raw <- predict(mdl1, newdata = data[1:300,],type = "response")

mdl2 <- glm(Smoke ~ x.Age,family = binomial, data = data[301:1000,])
PS2 <- predict(mdl2, newdata = data[301:1000,],type = "response")

mdl3 <- glm(Smoke ~ x.Age + x.Gender,family = binomial, data = data[301:1000,])
PS3 <- predict(mdl3, newdata = data[301:1000,],type = "response")

data_reg <- data.frame(PS2,PS3)

mdl_calibrated <- lm(PS3 ~ PS2,data = data_reg)

ps_calibrated <- predict(mdl_calibrated, x = PS_raw)

1.6 多分类或连续治疗

1.7 时间依赖性协变量