1 ROC

Code

library(tidymodels)
#> ── Attaching packages ────────────────────────────────────── tidymodels 1.2.0 ──
#> ✔ broom        1.0.6     ✔ rsample      1.2.1
#> ✔ dials        1.2.1     ✔ tune         1.2.1
#> ✔ infer        1.0.7     ✔ workflows    1.1.4
#> ✔ modeldata    1.4.0     ✔ workflowsets 1.1.0
#> ✔ parsnip      1.2.1     ✔ yardstick    1.3.1
#> ✔ recipes      1.1.0
data(two_class_example,package = "yardstick")
tibble(two_class_example)
#> # A tibble: 500 × 4
#>    truth   Class1   Class2 predicted
#>    <fct>    <dbl>    <dbl> <fct>    
#>  1 Class2 0.00359 0.996    Class2   
#>  2 Class1 0.679   0.321    Class1   
#>  3 Class2 0.111   0.889    Class2   
#>  4 Class1 0.735   0.265    Class1   
#>  5 Class2 0.0162  0.984    Class2   
#>  6 Class1 0.999   0.000725 Class1   
#>  7 Class1 0.999   0.000799 Class1   
#>  8 Class1 0.812   0.188    Class1   
#>  9 Class2 0.457   0.543    Class2   
#> 10 Class2 0.0976  0.902    Class2   
#> # ℹ 490 more rows

1.1 混淆矩阵

Code

conf_mat(two_class_example, truth = truth, estimate = predicted)
#>           Truth
#> Prediction Class1 Class2
#>     Class1    227     50
#>     Class2     31    192

sensitivity(two_class_example,truth,predicted)
#> # A tibble: 1 × 3
#>   .metric     .estimator .estimate
#>   <chr>       <chr>          <dbl>
#> 1 sensitivity binary         0.880

1.2 准确性

Code

accuracy(two_class_example, truth, predicted)
#> # A tibble: 1 × 3
#>   .metric  .estimator .estimate
#>   <chr>    <chr>          <dbl>
#> 1 accuracy binary         0.838

1.3 ROC

Code

two_class_curve <- roc_curve(two_class_example, truth, Class1)
two_class_curve
#> # A tibble: 502 × 3
#>    .threshold specificity sensitivity
#>         <dbl>       <dbl>       <dbl>
#>  1 -Inf           0                 1
#>  2    1.79e-7     0                 1
#>  3    4.50e-6     0.00413           1
#>  4    5.81e-6     0.00826           1
#>  5    5.92e-6     0.0124            1
#>  6    1.22e-5     0.0165            1
#>  7    1.40e-5     0.0207            1
#>  8    1.43e-5     0.0248            1
#>  9    2.38e-5     0.0289            1
#> 10    3.30e-5     0.0331            1
#> # ℹ 492 more rows


ggplot(two_class_curve,aes(1-specificity,sensitivity))+
    geom_path()+
    geom_abline(lty=3)+
    coord_equal()+
    theme_bw()

1.4 AUC

Code

auc1 <- roc_auc(two_class_example, truth, Class1,event_level = "first")
auc2 <- roc_auc(two_class_example, truth, Class1,event_level = "second")

autoplot(two_class_curve)+
    annotate("text",x=0.5,y=0.25,label=str_glue("AUC={round(auc1$.estimate,3)}"))

1.5 pROC

Code

library(pROC)

Code

set.seed(10)
df <- tibble(
    group = sample(c("1","0"),size = 100,replace = T),
    value = rnorm(100,10,3)
)

roc <- roc(df$group,df$value, smooth = F,ci=T,auc=T)
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases

roc
#> 
#> Call:
#> roc.default(response = df$group, predictor = df$value, smooth = F,     auc = T, ci = T)
#> 
#> Data: df$value in 47 controls (df$group 0) < 53 cases (df$group 1).
#> Area under the curve: 0.5319
#> 95% CI: 0.4159-0.6479 (DeLong)

# 计算最佳截断值
cutoff <- coords(roc,"best")
cutoff
#>   threshold specificity sensitivity
#> 1  7.834545   0.3404255   0.8301887

cutoff_text <- paste0(round(cutoff$threshold,3),"(",round(cutoff$specificity,3),",",round(cutoff$sensitivity,3),")")
cutoff_text
#> [1] "7.835(0.34,0.83)"


# 计算AUC

auc <- auc(roc)[1]
auc_low <- ci(roc,of="auc")[1]
auc_up <- ci(roc,of="auc")[3]


# 计算置信区间

ci <- ci.se(roc,specificities=seq(0,1,0.01))

df_ci <- ci[1:101,1:3]
df_ci <-  as.data.frame(df_ci)

df_ci <- tibble(
    x=rownames(df_ci) |> as.numeric(),
    df_ci,
)

# 绘图

ggroc(roc,
      color="red",size=1,legacy.axes = T)+
    theme_classic()+
    scale_x_continuous(expand = expansion(mult = c(0,0)))+
    scale_y_continuous(expand = expansion(mult = c(0,0)))+
    # 对角线
    geom_abline(slope = 1,linetype=3)+
    # 置信区间
    geom_ribbon(
        mapping = aes(x=1-x,ymin=`2.5%`,ymax=`97.5%`),
        data = df_ci,
        fill="lightblue",alpha=0.3,
    )+
    # 截断值
    geom_point(
        aes(x=1-specificity,y=sensitivity),
        data = cutoff,
        size=2)+
    # 截断值文字注释
    geom_text(
        aes(x=1-specificity,y=sensitivity,label=cutoff_text),
        data = cutoff,
        vjust=-1,
    )
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

Code


df <- two_class_example
roc_obj <- roc(response = df$truth, predictor = df$Class1,ci=TRUE,auc=TRUE)
#> Setting levels: control = Class1, case = Class2
#> Setting direction: controls > cases
roc_obj
#> 
#> Call:
#> roc.default(response = df$truth, predictor = df$Class1, auc = TRUE,     ci = TRUE)
#> 
#> Data: df$Class1 in 258 controls (df$truth Class1) > 242 cases (df$truth Class2).
#> Area under the curve: 0.9393
#> 95% CI: 0.9203-0.9584 (DeLong)

# roc_obj$specificities
# roc_obj$sensitivities
# roc_obj$thresholds
ci(roc_obj)
#> 95% CI: 0.9203-0.9584 (DeLong)
auc(roc_obj)
#> Area under the curve: 0.9393


# 最佳截断值（Youden's index）
best_cutoff <- coords(roc_obj, x = "best",input="threshold", 
                      ret=c("threshold","specificity","sensitivity","ppv","npv"),
                      best.method="youden")
best_cutoff
#>           threshold specificity sensitivity       ppv       npv
#> threshold 0.7585932   0.8062016   0.9214876 0.8168498 0.9162996


coords(roc_obj, x = "best",input="threshold", 
                      ret="all",
                      best.method="youden")
#>           threshold specificity sensitivity accuracy  tn  tp fn fp       npv
#> threshold 0.7585932   0.8062016   0.9214876    0.862 208 223 19 50 0.9162996
#>                 ppv       fdr       fpr       tpr       tnr       fnr
#> threshold 0.8168498 0.1831502 0.1937984 0.9214876 0.8062016 0.0785124
#>           1-specificity 1-sensitivity 1-accuracy      1-npv     1-ppv precision
#> threshold     0.1937984     0.0785124      0.138 0.08370044 0.1831502 0.8168498
#>              recall   youden closest.topleft
#> threshold 0.9214876 1.727689      0.04372204

Code

plot(roc_obj, print.thres = "best", 
     print.thres.pattern = "Best cutoff: %.5f", main = "ROC Curve")

Code

plot(specificity + sensitivity ~ threshold,
     coords(roc_obj, "all", transpose = FALSE),
     type = "l", log="x",
     subset = is.finite(threshold))

Code

plot(roc_obj, col = "blue",main = "Multiple ROC Curves", print.thres = "best", print.thres.pattern = "Optimal threshold: %.5f")
lines(roc_obj, col = "red",lwd=2,lty=3)
legend("bottomright", legend = c("Model 1", "Model 2"), col = c("blue", "red"), lwd = 2)