1 ROC

Code
library(tidymodels)
#> Warning: package 'tidymodels' was built under R version 4.5.2
#> ── Attaching packages ────────────────────────────────────── tidymodels 1.4.1 ──
#> ✔ broom        1.0.10     ✔ recipes      1.3.1 
#> ✔ dials        1.4.2      ✔ rsample      1.3.1 
#> ✔ dplyr        1.1.4      ✔ tailor       0.1.0 
#> ✔ ggplot2      4.0.1      ✔ tidyr        1.3.1 
#> ✔ infer        1.0.9      ✔ tune         2.0.1 
#> ✔ modeldata    1.5.1      ✔ workflows    1.3.0 
#> ✔ parsnip      1.3.3      ✔ workflowsets 1.1.1 
#> ✔ purrr        1.2.0      ✔ yardstick    1.3.2
#> Warning: package 'broom' was built under R version 4.5.2
#> Warning: package 'dials' was built under R version 4.5.2
#> Warning: package 'modeldata' was built under R version 4.5.2
#> Warning: package 'parsnip' was built under R version 4.5.2
#> Warning: package 'purrr' was built under R version 4.5.2
#> Warning: package 'rsample' was built under R version 4.5.2
#> Warning: package 'tailor' was built under R version 4.5.2
#> Warning: package 'tune' was built under R version 4.5.2
#> Warning: package 'workflows' was built under R version 4.5.2
#> ── Conflicts ───────────────────────────────────────── tidymodels_conflicts() ──
#> ✖ purrr::discard() masks scales::discard()
#> ✖ dplyr::filter()  masks stats::filter()
#> ✖ dplyr::lag()     masks stats::lag()
#> ✖ recipes::step()  masks stats::step()
library(tidyverse)
#> Warning: package 'stringr' was built under R version 4.5.2
#> Warning: package 'forcats' was built under R version 4.5.2
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ forcats   1.0.1     ✔ stringr   1.6.0
#> ✔ lubridate 1.9.4     ✔ tibble    3.3.0
#> ✔ readr     2.1.6
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ readr::col_factor() masks scales::col_factor()
#> ✖ purrr::discard()    masks scales::discard()
#> ✖ dplyr::filter()     masks stats::filter()
#> ✖ stringr::fixed()    masks recipes::fixed()
#> ✖ dplyr::lag()        masks stats::lag()
#> ✖ readr::spec()       masks yardstick::spec()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data(two_class_example,package = "yardstick")
tibble(two_class_example)
#> # A tibble: 500 × 4
#>    truth   Class1   Class2 predicted
#>    <fct>    <dbl>    <dbl> <fct>    
#>  1 Class2 0.00359 0.996    Class2   
#>  2 Class1 0.679   0.321    Class1   
#>  3 Class2 0.111   0.889    Class2   
#>  4 Class1 0.735   0.265    Class1   
#>  5 Class2 0.0162  0.984    Class2   
#>  6 Class1 0.999   0.000725 Class1   
#>  7 Class1 0.999   0.000799 Class1   
#>  8 Class1 0.812   0.188    Class1   
#>  9 Class2 0.457   0.543    Class2   
#> 10 Class2 0.0976  0.902    Class2   
#> # ℹ 490 more rows

1.1 混淆矩阵

Code
conf_mat(two_class_example, truth = truth, estimate = predicted)
#>           Truth
#> Prediction Class1 Class2
#>     Class1    227     50
#>     Class2     31    192

sensitivity(two_class_example,truth,predicted)
#> # A tibble: 1 × 3
#>   .metric     .estimator .estimate
#>   <chr>       <chr>          <dbl>
#> 1 sensitivity binary         0.880

1.2 准确性

Code
accuracy(two_class_example, truth, predicted)
#> # A tibble: 1 × 3
#>   .metric  .estimator .estimate
#>   <chr>    <chr>          <dbl>
#> 1 accuracy binary         0.838

1.3 ggplot2::geom_path()

Code
two_class_curve <- roc_curve(two_class_example, truth, Class1)
two_class_curve
#> # A tibble: 502 × 3
#>    .threshold specificity sensitivity
#>         <dbl>       <dbl>       <dbl>
#>  1 -Inf           0                 1
#>  2    1.79e-7     0                 1
#>  3    4.50e-6     0.00413           1
#>  4    5.81e-6     0.00826           1
#>  5    5.92e-6     0.0124            1
#>  6    1.22e-5     0.0165            1
#>  7    1.40e-5     0.0207            1
#>  8    1.43e-5     0.0248            1
#>  9    2.38e-5     0.0289            1
#> 10    3.30e-5     0.0331            1
#> # ℹ 492 more rows


ggplot(two_class_curve,aes(1-specificity,sensitivity))+
    geom_path()+
    geom_abline(lty=3)+
    coord_equal()+
    theme_bw()

1.4 yardstick::roc_auc()

Code
auc1 <- roc_auc(two_class_example, truth, Class1,event_level = "first")
auc2 <- roc_auc(two_class_example, truth, Class1,event_level = "second")

autoplot(two_class_curve)+
    annotate("text",x=0.5,y=0.25,label=str_glue("AUC={round(auc1$.estimate,3)}"))

1.5 pROC

Code
library(pROC)
#> Warning: package 'pROC' was built under R version 4.5.2
#> Type 'citation("pROC")' for a citation.
#> 
#> Attaching package: 'pROC'
#> The following objects are masked from 'package:stats':
#> 
#>     cov, smooth, var
Code
set.seed(10)
df <- tibble(
    group = sample(c("1","0"),size = 100,replace = T),
    value = rnorm(100,10,3)
)

roc <- roc(df$group,df$value, smooth = F,ci=T,auc=T)
#> Setting levels: control = 0, case = 1
#> Setting direction: controls < cases

roc
#> 
#> Call:
#> roc.default(response = df$group, predictor = df$value, smooth = F,     auc = T, ci = T)
#> 
#> Data: df$value in 47 controls (df$group 0) < 53 cases (df$group 1).
#> Area under the curve: 0.5319
#> 95% CI: 0.4159-0.6479 (DeLong)

# 计算最佳截断值
cutoff <- coords(roc,"best")
cutoff
#>   threshold specificity sensitivity
#> 1  7.834545   0.3404255   0.8301887

cutoff_text <- paste0(round(cutoff$threshold,3),"(",round(cutoff$specificity,3),",",round(cutoff$sensitivity,3),")")
cutoff_text
#> [1] "7.835(0.34,0.83)"


# 计算AUC

auc <- auc(roc)[1]
auc_low <- ci(roc,of="auc")[1]
auc_up <- ci(roc,of="auc")[3]


# 计算置信区间

ci <- ci.se(roc,specificities=seq(0,1,0.01))

df_ci <- ci[1:101,1:3]
df_ci <-  as.data.frame(df_ci)

df_ci <- tibble(
    x=rownames(df_ci) |> as.numeric(),
    df_ci,
)

# 绘图

ggroc(roc,
      color="red",size=1,legacy.axes = T)+
    theme_classic()+
    scale_x_continuous(expand = expansion(mult = c(0,0)))+
    scale_y_continuous(expand = expansion(mult = c(0,0)))+
    # 对角线
    geom_abline(slope = 1,linetype=3)+
    # 置信区间
    geom_ribbon(
        mapping = aes(x=1-x,ymin=`2.5%`,ymax=`97.5%`),
        data = df_ci,
        fill="lightblue",alpha=0.3,
    )+
    # 截断值
    geom_point(
        aes(x=1-specificity,y=sensitivity),
        data = cutoff,
        size=2)+
    # 截断值文字注释
    geom_text(
        aes(x=1-specificity,y=sensitivity,label=cutoff_text),
        data = cutoff,
        vjust=-1,
    )
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> ℹ The deprecated feature was likely used in the pROC package.
#>   Please report the issue at <https://github.com/xrobin/pROC/issues>.
#> Scale for x is already present.
#> Adding another scale for x, which will replace the existing scale.

Code

df <- two_class_example
roc_obj <- roc(response = df$truth, predictor = df$Class1,ci=TRUE,auc=TRUE)
#> Setting levels: control = Class1, case = Class2
#> Setting direction: controls > cases
roc_obj
#> 
#> Call:
#> roc.default(response = df$truth, predictor = df$Class1, auc = TRUE,     ci = TRUE)
#> 
#> Data: df$Class1 in 258 controls (df$truth Class1) > 242 cases (df$truth Class2).
#> Area under the curve: 0.9393
#> 95% CI: 0.9203-0.9584 (DeLong)

# roc_obj$specificities
# roc_obj$sensitivities
# roc_obj$thresholds
ci(roc_obj)
#> 95% CI: 0.9203-0.9584 (DeLong)
auc(roc_obj)
#> Area under the curve: 0.9393


# 最佳截断值(Youden's index)
best_cutoff <- coords(roc_obj, x = "best",input="threshold", 
                      ret=c("threshold","specificity","sensitivity","ppv","npv"),
                      best.method="youden")
best_cutoff
#>           threshold specificity sensitivity       ppv       npv
#> threshold 0.7585932   0.8062016   0.9214876 0.8168498 0.9162996


coords(roc_obj, x = "best",input="threshold", 
                      ret="all",
                      best.method="youden")
#>           threshold specificity sensitivity accuracy  tn  tp fn fp       npv
#> threshold 0.7585932   0.8062016   0.9214876    0.862 208 223 19 50 0.9162996
#>                 ppv       fdr       fpr       tpr       tnr       fnr
#> threshold 0.8168498 0.1831502 0.1937984 0.9214876 0.8062016 0.0785124
#>           1-specificity 1-sensitivity 1-accuracy      1-npv     1-ppv precision
#> threshold     0.1937984     0.0785124      0.138 0.08370044 0.1831502 0.8168498
#>              recall   lr_pos     lr_neg   youden closest.topleft
#> threshold 0.9214876 4.754876 0.09738557 1.727689      0.04372204
Code
plot(roc_obj, print.thres = "best", 
     print.thres.pattern = "Best cutoff: %.5f", main = "ROC Curve")

Code
plot(specificity + sensitivity ~ threshold,
     coords(roc_obj, "all", transpose = FALSE),
     type = "l", log="x",
     subset = is.finite(threshold))

Code
plot(roc_obj, col = "blue",main = "Multiple ROC Curves", print.thres = "best", print.thres.pattern = "Optimal threshold: %.5f")
lines(roc_obj, col = "red",lwd=2,lty=3)
legend("bottomright", legend = c("Model 1", "Model 2"), col = c("blue", "red"), lwd = 2)

Back to top