1 gtsummary

https://www.danieldsjoberg.com/gtsummary/index.html

Code
Code
library(gtsummary)
head(trial)
#> # A tibble: 6 × 8
#>   trt      age marker stage grade response death ttdeath
#>   <chr>  <dbl>  <dbl> <fct> <fct>    <int> <int>   <dbl>
#> 1 Drug A    23  0.16  T1    II           0     0    24  
#> 2 Drug B     9  1.11  T2    I            1     0    24  
#> 3 Drug A    31  0.277 T1    II           0     0    24  
#> 4 Drug A    NA  2.07  T3    III          1     1    17.6
#> 5 Drug A    51  2.77  T4    III          1     1    16.4
#> 6 Drug B    39  0.613 T4    I            0     1    15.6
data("trial")

数据框中的每个变量都可以分配一个 "label" 属性

Code
attr(trial$trt,"label") <- '治疗'
attr(trial$trt,"label")
#> [1] "治疗"
  • Label attributes from the data set are automatically printed.

  • Missing values are listed as “Unknown” in the table.

argument 描述
label= 指定表格中打印的变量标签
type= 指定变量类型(例如continuous, categorical等
statistic= 更改显示的汇总统计数据
digits= 汇总统计数据将四舍五入的位数
missing= 是否显示包含缺失观测值数的行
missing_text= 缺失数字行的文本标签
sort= 按频率更改分类级别的排序
percent= 打印列、行或单元格百分比
include= 要包含在汇总表中的变量列表

1.1 基线特征表

1.1.1 描述分布

Code

trial2 <- trial %>% select(-response,-death)
trial2 %>%
  tbl_summary(
    by = trt,
    statistic = list(
      all_continuous() ~ c("{mean}± {sd} "),  # {median} ({p25},{p75})
      all_categorical() ~ "{n} / {N} ({p}%)"
    ),
   # digits = list(starts_with("age") ~ 3,
    #              grade ~ 2),
    label = list(age ~ "Patient age",
                 grade ~ "Tumor Grade"),
    missing_text = "(NA)",
  )

Characteristic

Drug A
N = 98

1

Drug B
N = 102

1
Patient age 47± 15 47± 14
    (NA) 7 4
Marker Level (ng/mL) 1.02± 0.89 0.82± 0.83
    (NA) 6 4
T Stage

    T1 28 / 98 (29%) 25 / 102 (25%)
    T2 25 / 98 (26%) 29 / 102 (28%)
    T3 22 / 98 (22%) 21 / 102 (21%)
    T4 23 / 98 (23%) 27 / 102 (26%)
Tumor Grade

    I 35 / 98 (36%) 33 / 102 (32%)
    II 32 / 98 (33%) 36 / 102 (35%)
    III 31 / 98 (32%) 33 / 102 (32%)
Months to Death/Censor 20.2± 5.0 19.0± 5.5
1

Mean± SD ; n / N (%)

Code
trial2 %>%
    tbl_summary(by = trt) %>%
    add_overall() |>   # t添加总和
    add_n() |>
    # 修改注释
    modify_header(label ~ "**变量**") %>%
    modify_spanning_header(c("stat_1", "stat_2") ~ "**药物**") %>%
    modify_footnote(all_stat_cols() ~ "Median (25%, 75%) or Frequency (%)") %>%
    modify_caption("**Table 1. Patient Characteristics**") %>%
    bold_labels()
Table 1. Patient Characteristics

变量

N

Overall
N = 200

1

药物

Drug A
N = 98

1

Drug B
N = 102

1
Age 189 47 (38, 57) 46 (37, 60) 48 (39, 56)
    Unknown
11 7 4
Marker Level (ng/mL) 190 0.64 (0.22, 1.41) 0.84 (0.23, 1.60) 0.52 (0.18, 1.21)
    Unknown
10 6 4
T Stage 200


    T1
53 (27%) 28 (29%) 25 (25%)
    T2
54 (27%) 25 (26%) 29 (28%)
    T3
43 (22%) 22 (22%) 21 (21%)
    T4
50 (25%) 23 (23%) 27 (26%)
Grade 200


    I
68 (34%) 35 (36%) 33 (32%)
    II
68 (34%) 32 (33%) 36 (35%)
    III
64 (32%) 31 (32%) 33 (32%)
Months to Death/Censor 200 22.4 (15.9, 24.0) 23.5 (17.4, 24.0) 21.2 (14.5, 24.0)
1

Median (25%, 75%) or Frequency (%)

函数 描述
add_p() 将 p 值添加到输出中,比较各组的值
add_overall() 添加包含总体汇总统计数据的列
add_n() 为每个变量添加一个包含 N(或缺少 N)的列
add_difference() 为两组之间的差值、置信区间和 p 值添加列
add_stat_label() 为每行中显示的汇总统计数据添加标签
add_stat() 泛型函数,用于添加具有用户定义值的列
add_q() 添加一列 Q 值以控制多个比较
modify_header() 更新列标题
modify_footnote() 更新列脚注
modify_spanning_header() 更新跨标头
modify_caption() 更新表格标题/标题
bold_labels() 粗体变量标签
bold_levels() 粗体可变级别
italicize_labels() 将变量标签斜体化
italicize_levels() 斜体变量级别
bold_p() 粗体显著性 p 值

1.1.2 统计检验

1.1.2.1 p值

Code
trial2 |> 
    tbl_summary(by = trt) |> 
    add_p(
        
        test=list(all_continuous() ~ "t.test", 
                  all_categorical() ~ "chisq.test.no.correct"),
         pvalue_fun = label_style_pvalue(digits = 3),
    ) 

Characteristic

Drug A
N = 98

1

Drug B
N = 102

1

p-value

2
Age 46 (37, 60) 48 (39, 56) 0.834
    Unknown 7 4
Marker Level (ng/mL) 0.84 (0.23, 1.60) 0.52 (0.18, 1.21) 0.116
    Unknown 6 4
T Stage

0.866
    T1 28 (29%) 25 (25%)
    T2 25 (26%) 29 (28%)
    T3 22 (22%) 21 (21%)
    T4 23 (23%) 27 (26%)
Grade

0.871
    I 35 (36%) 33 (32%)
    II 32 (33%) 36 (35%)
    III 31 (32%) 33 (32%)
Months to Death/Censor 23.5 (17.4, 24.0) 21.2 (14.5, 24.0) 0.108
1

Median (Q1, Q3); n (%)

2

Welch Two Sample t-test; Pearson’s Chi-squared test

Code
# ?tests
Code
wilcox.test(age~trt,data = trial2)
#> 
#>  Wilcoxon rank sum test with continuity correction
#> 
#> data:  age by trt
#> W = 4323, p-value = 0.7183
#> alternative hypothesis: true location shift is not equal to 0
table(trial2$grade,trial2$trt)|> chisq.test()
#> 
#>  Pearson's Chi-squared test
#> 
#> data:  table(trial2$grade, trial2$trt)
#> X-squared = 0.27673, df = 2, p-value = 0.8708

1.1.2.2 统计量

Code
my_ttest3 <- function(data, variable, by, ...) {
  t.test(data[[variable]] ~ as.factor(data[[by]])) %>%
    broom::tidy() %>%
        mutate(
            statistic = sprintf("%.3f", statistic),
            p.value = case_when(
            p.value >=0.05 ~ sprintf("%.3f (ns)",p.value),
            p.value >=0.01 ~ sprintf("%.3f (*)",p.value),
            p.value >=0.001 ~sprintf("%.3f (**)",p.value),
            p.value <0.001 ~ sprintf("%.3f (***)",p.value),
            )
        ) %>% 
    select(statistic, p.value)
}

trial |>
  tbl_summary(
    by = trt,
    include = c(trt, age, marker),
    missing = "ifany"
  ) |>
  add_stat(fns = everything() ~ my_ttest3) %>% 
  modify_header(statistic = "**t-statistic**", p.value = "**p-value**") 

Characteristic

Drug A
N = 98

1

Drug B
N = 102

1

t-statistic

p-value

Age 46 (37, 60) 48 (39, 56) -0.209 0.834 (ns)
    Unknown 7 4

Marker Level (ng/mL) 0.84 (0.23, 1.60) 0.52 (0.18, 1.21) 1.578 0.116 (ns)
    Unknown 6 4

1

Median (Q1, Q3)

1.1.2.3 置信区间

Code
mean_ci <- function(data, variable, ...) {
  test <- t.test(data[[variable]])
  dplyr::tibble(
    mean = test$estimate,
    conf.low = test$conf.int[1],
    conf.high = test$conf.int[2]
  )
}

trial |>
  tbl_custom_summary(
    include = c("marker", "ttdeath"),
    by = "trt",
    stat_fns = ~ mean_ci,
    statistic = ~ "{mean} [{conf.low}; {conf.high}]"
  ) |>
  add_overall(last = TRUE) |>
  modify_footnote(
    all_stat_cols() ~ "mean [95% CI]"
  )

Characteristic

Drug A
N = 98

1

Drug B
N = 102

1

Overall
N = 200

1
Marker Level (ng/mL) 1.02 [0.83; 1.20] 0.82 [0.65; 0.99] 0.92 [0.79; 1.04]
    Unknown 6 4 10
Months to Death/Censor 20.2 [19.2; 21.2] 19.0 [18.0; 20.1] 19.6 [18.9; 20.4]
1

mean [95% CI]

1.2 主题

Code
theme_gtsummary_journal(journal = "jama")
#> Setting theme "JAMA"

trial %>%
  select(trt, age, grade) %>%
  tbl_summary(by = trt) %>%
  add_p()

Characteristic

Drug A
N = 98

Drug B
N = 102

p-value

1
Age, Median (IQR) 46 (37 – 60) 48 (39 – 56) 0.72
    Unknown 7 4
Grade, n (%)

0.87
    I 35 (36) 33 (32)
    II 32 (33) 36 (35)
    III 31 (32) 33 (32)
1

Wilcoxon rank sum test; Pearson’s Chi-squared test

Code


#  清除所有先前设置的主题
reset_gtsummary_theme()

1.3 模型结果

Code
# load dataset
data(Titanic)
df = as.data.frame(Titanic)

# load library
library(gtsummary)

# create the model
model = glm(Survived ~ Age + Class + Sex + Freq, family=binomial, data=df)

# generate table 
model %>%
  tbl_regression() %>% # regression summary function
  add_global_p() %>% # add p-values
  bold_labels() %>% # make label in bold
  italicize_levels() # make categories in label in italic

Characteristic

log(OR)

1

95% CI

1

p-value

Age

0.5
    Child
    Adult 0.62 -1.0, 2.4
Class

>0.9
    1st
    2nd -0.03 -2.0, 2.0
    3rd 0.25 -1.8, 2.4
    Crew 0.27 -1.8, 2.4
Sex

0.6
    Male
    Female -0.37 -1.9, 1.1
Freq -0.01 -0.02, 0.00 0.2
1

OR = Odds Ratio, CI = Confidence Interval

Code



# generate table 
model %>%
  tbl_regression(intercept=TRUE, conf.level=0.9) %>%
  add_glance_source_note() %>%
  add_global_p() %>%
  add_q() 

Characteristic

log(OR)

1

90% CI

1

p-value

q-value

2
(Intercept) 0.10 -1.4, 1.6 >0.9 >0.9
Age

0.5 >0.9
    Child

    Adult 0.62 -0.78, 2.1

Class

>0.9 >0.9
    1st

    2nd -0.03 -1.7, 1.7

    3rd 0.25 -1.5, 2.0

    Crew 0.27 -1.5, 2.0

Sex

0.6 >0.9
    Male

    Female -0.37 -1.7, 0.89

Freq -0.01 -0.02, 0.00 0.2 0.9

Null deviance = 44.4; Null df = 31; Log-likelihood = -21.3; AIC = 56.5; BIC = 66.8; Deviance = 42.5; Residual df = 25; No. Obs. = 32

1

OR = Odds Ratio, CI = Confidence Interval

2

False discovery rate correction for multiple testing

1.4 比较模型结果

Code
data(trial)
library(survival)

model_reglog = glm(response ~ trt + grade, data=trial, family = binomial) %>% tbl_regression()
model_cox = coxph(Surv(ttdeath, death) ~ trt + grade, data=trial) %>% tbl_regression()

tbl_merge(
  list(model_reglog, model_cox),
  tab_spanner = c("**Tumor Response**", "**Time to Death**")
)

Characteristic

Tumor Response

Time to Death

log(OR)

1

95% CI

1

p-value

log(HR)

1

95% CI

1

p-value

Chemotherapy Treatment





    Drug A

    Drug B 0.19 -0.41, 0.81 0.5 0.22 -0.15, 0.59 0.2
Grade





    I

    II -0.06 -0.82, 0.68 0.9 0.25 -0.22, 0.72 0.3
    III 0.08 -0.66, 0.82 0.8 0.52 0.07, 0.98 0.024
1

OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio

gt

Code
library(gt)
tab <- mtcars |>
  rownames_to_column() |>
  arrange(factor(cyl), mpg) |>
  group_by(cyl) |>
  slice(1:3) |>
  gt()
tab
mpg disp hp drat wt qsec vs am gear carb
4
Volvo 142E 21.4 121.0 109 4.11 2.780 18.60 1 1 4 2
Toyota Corona 21.5 120.1 97 3.70 2.465 20.01 1 0 3 1
Datsun 710 22.8 108.0 93 3.85 2.320 18.61 1 1 4 1
6
Merc 280C 17.8 167.6 123 3.92 3.440 18.90 1 0 4 4
Valiant 18.1 225.0 105 2.76 3.460 20.22 1 0 3 1
Merc 280 19.2 167.6 123 3.92 3.440 18.30 1 0 4 4
8
Cadillac Fleetwood 10.4 472.0 205 2.93 5.250 17.98 0 0 3 4
Lincoln Continental 10.4 460.0 215 3.00 5.424 17.82 0 0 3 4
Camaro Z28 13.3 350.0 245 3.73 3.840 15.41 0 0 3 4
Code
tab <- tab |>
  tab_spanner(
    label = "Performance",
    columns = c(mpg, disp, hp, drat, wt, qsec)
  )

tab <- tab |>
  tab_spanner(
    label = "Specs",
    columns = c(vs, am, gear, carb)
  )
tab 
Performance Specs
mpg disp hp drat wt qsec vs am gear carb
4
Volvo 142E 21.4 121.0 109 4.11 2.780 18.60 1 1 4 2
Toyota Corona 21.5 120.1 97 3.70 2.465 20.01 1 0 3 1
Datsun 710 22.8 108.0 93 3.85 2.320 18.61 1 1 4 1
6
Merc 280C 17.8 167.6 123 3.92 3.440 18.90 1 0 4 4
Valiant 18.1 225.0 105 2.76 3.460 20.22 1 0 3 1
Merc 280 19.2 167.6 123 3.92 3.440 18.30 1 0 4 4
8
Cadillac Fleetwood 10.4 472.0 205 2.93 5.250 17.98 0 0 3 4
Lincoln Continental 10.4 460.0 215 3.00 5.424 17.82 0 0 3 4
Camaro Z28 13.3 350.0 245 3.73 3.840 15.41 0 0 3 4
Code
tab <- tab |>
  tab_header(
    title = md("The Cars of **mtcars**"),
    subtitle = "These are some fine automobiles"
  )
tab

The Cars of mtcars

These are some fine automobiles
Performance Specs
mpg disp hp drat wt qsec vs am gear carb
4
Volvo 142E 21.4 121.0 109 4.11 2.780 18.60 1 1 4 2
Toyota Corona 21.5 120.1 97 3.70 2.465 20.01 1 0 3 1
Datsun 710 22.8 108.0 93 3.85 2.320 18.61 1 1 4 1
6
Merc 280C 17.8 167.6 123 3.92 3.440 18.90 1 0 4 4
Valiant 18.1 225.0 105 2.76 3.460 20.22 1 0 3 1
Merc 280 19.2 167.6 123 3.92 3.440 18.30 1 0 4 4
8
Cadillac Fleetwood 10.4 472.0 205 2.93 5.250 17.98 0 0 3 4
Lincoln Continental 10.4 460.0 215 3.00 5.424 17.82 0 0 3 4
Camaro Z28 13.3 350.0 245 3.73 3.840 15.41 0 0 3 4
Back to top