1 生存曲线

1.1 Kaplan-Meier curve

Code
library(tidyverse)
#> Warning: package 'purrr' was built under R version 4.5.2
#> Warning: package 'stringr' was built under R version 4.5.2
#> Warning: package 'forcats' was built under R version 4.5.2
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr     1.1.4     ✔ readr     2.1.6
#> ✔ forcats   1.0.1     ✔ stringr   1.6.0
#> ✔ ggplot2   4.0.1     ✔ tibble    3.3.0
#> ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
#> ✔ purrr     1.2.0     
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(survival)
library(ggsci)
#> Warning: package 'ggsci' was built under R version 4.5.2
library(survminer)
#> Warning: package 'survminer' was built under R version 4.5.2
#> Loading required package: ggpubr
#> Warning: package 'ggpubr' was built under R version 4.5.2
#> 
#> Attaching package: 'survminer'
#> 
#> The following object is masked from 'package:survival':
#> 
#>     myeloma
library(ggsurvfit)
#> Warning: package 'ggsurvfit' was built under R version 4.5.2
Code
### 数据 三列 “group”分组列, “time”生存时间列,“status”事件状态列
d<-read_excel("data/01source.xlsx",sheet=1,range = "A19:E75") |> 
    rename(
        LR_DPVB=`LR-DPVB`
    )  |> 
    pivot_longer(
        cols = CON:LR_DPVB,
        names_to = "treatment",
        values_to = "status"
    ) |> drop_na() |> 
    mutate(
        treatment=factor(treatment,levels=c('CON','LDRT','DPVB','LR_DPVB'))
    )
# 加载包
d
#> # A tibble: 56 × 3
#>     Days treatment status
#>    <dbl> <fct>      <dbl>
#>  1    11 CON            1
#>  2    13 CON            1
#>  3    14 CON            1
#>  4    14 CON            1
#>  5    15 CON            1
#>  6    17 CON            1
#>  7    17 CON            1
#>  8    17 CON            1
#>  9    20 CON            1
#> 10    20 CON            1
#> # ℹ 46 more rows
# 使用 Kaplan-Meier 方法拟合生存曲线
km_fit<-surv_fit(Surv(Days,status)~treatment,data=d)
summary(km_fit)
#> Call: survfit(formula = Surv(Days, status) ~ treatment, data = structure(list(
#>     Days = c(11, 13, 14, 14, 15, 17, 17, 17, 20, 20, 21, 21, 
#>     25, 27, 13, 13, 15, 16, 18, 19, 19, 20, 20, 20, 24, 25, 27, 
#>     30, 20, 23, 27, 28, 30, 32, 38, 39, 45, 50, 50, 50, 50, 50, 
#>     30, 40, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50), 
#>     treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
#>     3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
#>     4L, 4L), levels = c("CON", "LDRT", "DPVB", "LR_DPVB"), class = "factor"), 
#>     status = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
#>     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
#>     1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
#>     0, 0)), row.names = c(NA, -56L), class = c("tbl_df", "tbl", 
#> "data.frame")))
#> 
#>                 treatment=CON 
#>  time n.risk n.event survival std.err lower 95% CI upper 95% CI
#>    11     14       1   0.9286  0.0688       0.8030        1.000
#>    13     13       1   0.8571  0.0935       0.6921        1.000
#>    14     12       2   0.7143  0.1207       0.5129        0.995
#>    15     10       1   0.6429  0.1281       0.4351        0.950
#>    17      9       3   0.4286  0.1323       0.2341        0.785
#>    20      6       2   0.2857  0.1207       0.1248        0.654
#>    21      4       2   0.1429  0.0935       0.0396        0.515
#>    25      2       1   0.0714  0.0688       0.0108        0.472
#>    27      1       1   0.0000     NaN           NA           NA
#> 
#>                 treatment=LDRT 
#>  time n.risk n.event survival std.err lower 95% CI upper 95% CI
#>    13     14       2   0.8571  0.0935       0.6921        1.000
#>    15     12       1   0.7857  0.1097       0.5977        1.000
#>    16     11       1   0.7143  0.1207       0.5129        0.995
#>    18     10       1   0.6429  0.1281       0.4351        0.950
#>    19      9       2   0.5000  0.1336       0.2961        0.844
#>    20      7       3   0.2857  0.1207       0.1248        0.654
#>    24      4       1   0.2143  0.1097       0.0786        0.584
#>    25      3       1   0.1429  0.0935       0.0396        0.515
#>    27      2       1   0.0714  0.0688       0.0108        0.472
#>    30      1       1   0.0000     NaN           NA           NA
#> 
#>                 treatment=DPVB 
#>  time n.risk n.event survival std.err lower 95% CI upper 95% CI
#>    20     14       1    0.929  0.0688        0.803        1.000
#>    23     13       1    0.857  0.0935        0.692        1.000
#>    27     12       1    0.786  0.1097        0.598        1.000
#>    28     11       1    0.714  0.1207        0.513        0.995
#>    30     10       1    0.643  0.1281        0.435        0.950
#>    32      9       1    0.571  0.1323        0.363        0.899
#>    38      8       1    0.500  0.1336        0.296        0.844
#>    39      7       1    0.429  0.1323        0.234        0.785
#>    45      6       1    0.357  0.1281        0.177        0.721
#> 
#>                 treatment=LR_DPVB 
#>  time n.risk n.event survival std.err lower 95% CI upper 95% CI
#>    30     14       1    0.929  0.0688        0.803            1
#>    40     13       1    0.857  0.0935        0.692            1

1.2 plot()

Code
plot(km_fit,
     lty = 1,
     col = c("gray","blue","orange3","darkred"),
     lwd = 2,
)

1.3 survminer::ggsurvplot()

Code
# 使用 ggplot2 和 survminer 绘制生存曲线图

p <- ggsurvplot(
    fit  = km_fit,
    data = d,
    fun = "pct",  #event',累计死亡比列,'cumhaz':cumulative hazard累计风险
    
    palette = "jco",
    linetype = c(1,5,4,6),
    surv.median.line = 'none',
    
    conf.int = F,

    
    pval = TRUE,# "log rank p=0.0053"
    log.rank.weights='1',  
    pval.method = TRUE,
    pval.coord=c(40,75),
    pval.method.coord=c(32,75),
    
    risk.table = "abs_pct", # "nrisk_cumcensor" and "nrisk_cumevents"
    risk.table.col = "treatment",
    risk.table.height=0.3,
    tables.theme = theme_cleantable(),
    ncensor.plot=F,
  
    
    axes.offset=F,  #(0,0)pos
    title = "Hepa1-6 tumor",
    xlab="Days",
    ylab="Percent survival (%)",
    
    legend.title = "",
    legend = c(0.2,0.35),
    legend.labs=c('CON','LDRT','DPVB','LR_DPVB'),
    
    ggtheme = theme_survminer(),
)
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> ℹ The deprecated feature was likely used in the ggpubr package.
#>   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
p

Code
fit<- survfit(Surv(time, status) ~ sex, data = lung)

# Basic survival curves
ggsurvplot(fit, data = lung)

Code

# Customized survival curves
p <- ggsurvplot(fit, data = lung,
           surv.median.line = "hv", # Add medians survival
           
           # Change legends: title & labels
           legend.title = "Sex",
           legend.labs = c("Male", "Female"),
           # Add p-value and tervals
           pval = TRUE,
           
           conf.int = TRUE,
           # Add risk table
           risk.table = TRUE,
           tables.height = 0.2,
           tables.theme = theme_cleantable(),
           
           # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
           # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
           palette = c("#E7B800", "#2E9FDF"),
           ggtheme = theme_bw() # Change ggplot2 theme
)
Code
p <- ggsurvplot(fit, data = lung,
                surv.median.line = "hv", # Add medians survival
                
                # Change legends: title & labels
                legend.title = "Sex",
                legend.labs = c("Male", "Female"),
                # Add p-value and tervals
                pval = F,    ##不默认显示P
                conf.int = TRUE,
                # Add risk table
                risk.table = TRUE,
                tables.height = 0.2,
                tables.theme = theme_cleantable(),
                
                # Color palettes. Use custom color: c("#E7B800", "#2E9FDF"),
                # or brewer color (e.g.: "Dark2"), or ggsci color (e.g.: "jco")
                palette = c("#E7B800", "#2E9FDF"),
                ggtheme = theme_bw() # Change ggplot2 theme
)
##计算公式
data.survdiff <- survdiff(Surv(time, status) ~ sex,data = lung)
p.val = 1 - pchisq(data.survdiff$chisq, length(data.survdiff$n) - 1)
HR = (data.survdiff$obs[2]/data.survdiff$exp[2])/(data.survdiff$obs[1]/data.survdiff$exp[1])
up95 = exp(log(HR) + qnorm(0.975)*sqrt(1/data.survdiff$exp[2]+1/data.survdiff$exp[1]))
low95 = exp(log(HR) - qnorm(0.975)*sqrt(1/data.survdiff$exp[2]+1/data.survdiff$exp[1]))
ci <- paste0(sprintf("%.3f",HR)," [",sprintf("%.3f",low95),", ",sprintf("%.3f",up95),"]")

p$plot <- p$plot + 
  annotate("text", x = 0, y = 0.05, 
                            label = paste0("P value = ",sprintf("%.3f",p.val),"\n HR (95% CI) = ",ci),   ###添加P和HR 95%CI
                            size = 5, color = "black", hjust = 0)+
  theme(text = element_text(size = 15))
p$plot

1.4 ggsurvfit

Code
km_fit
#> Call: survfit(formula = Surv(Days, status) ~ treatment, data = structure(list(
#>     Days = c(11, 13, 14, 14, 15, 17, 17, 17, 20, 20, 21, 21, 
#>     25, 27, 13, 13, 15, 16, 18, 19, 19, 20, 20, 20, 24, 25, 27, 
#>     30, 20, 23, 27, 28, 30, 32, 38, 39, 45, 50, 50, 50, 50, 50, 
#>     30, 40, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50), 
#>     treatment = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
#>     1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
#>     2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
#>     3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
#>     4L, 4L), levels = c("CON", "LDRT", "DPVB", "LR_DPVB"), class = "factor"), 
#>     status = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
#>     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
#>     1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
#>     0, 0)), row.names = c(NA, -56L), class = c("tbl_df", "tbl", 
#> "data.frame")))
#> 
#>                    n events median 0.95LCL 0.95UCL
#> treatment=CON     14     14   17.0      15      25
#> treatment=LDRT    14     14   19.5      18      27
#> treatment=DPVB    14      9   38.5      30      NA
#> treatment=LR_DPVB 14      2     NA      NA      NA


km_fit |> tidy_survfit() |> 
    ggplot(aes(time,estimate,min=conf.low,ymax=conf.low,
               color=strata,fill=strata))+
    scale_color_jco()+
    geom_step()

Code

km_fit |> ggsurvfit()+
    add_confidence_interval()+
    # 添加删失点
    add_censor_mark()  +
    add_quantile()+
    add_pvalue("annotation",size=3,x=5,y=0.25)+
    #add_pvalue(caption = "Log-rank {p.value}")+
    add_risktable(size = 3,risktable_height = 0.3)+
    add_legend_title("treatment")
#> ! `add_pvalue()` works with objects created with `survfit2()` or
#>   `tidycmprsk::cuminc()`.
#> ℹ `add_pvalue()` has been ignored.

Back to top