Code
if(!require(gtsummary)) install.packages("gtsmmary")
https://www.danieldsjoberg.com/gtsummary/index.html
if(!require(gtsummary)) install.packages("gtsmmary")
library(gtsummary)
head(trial)
#> # A tibble: 6 × 8
#> trt age marker stage grade response death ttdeath
#> <chr> <dbl> <dbl> <fct> <fct> <int> <int> <dbl>
#> 1 Drug A 23 0.16 T1 II 0 0 24
#> 2 Drug B 9 1.11 T2 I 1 0 24
#> 3 Drug A 31 0.277 T1 II 0 0 24
#> 4 Drug A NA 2.07 T3 III 1 1 17.6
#> 5 Drug A 51 2.77 T4 III 1 1 16.4
#> 6 Drug B 39 0.613 T4 I 0 1 15.6
data("trial")
数据框中的每个变量都可以分配一个 "label"
属性
Label attributes from the data set are automatically printed.
Missing values are listed as “Unknown” in the table.
argument | 描述 |
---|---|
label= |
指定表格中打印的变量标签 |
type= |
指定变量类型(例如continuous, categorical等 |
statistic= |
更改显示的汇总统计数据 |
digits= |
汇总统计数据将四舍五入的位数 |
missing= |
是否显示包含缺失观测值数的行 |
missing_text= |
缺失数字行的文本标签 |
sort= |
按频率更改分类级别的排序 |
percent= |
打印列、行或单元格百分比 |
include= |
要包含在汇总表中的变量列表 |
trial2 <- trial %>% select(-response,-death)
trial2 %>%
tbl_summary(
by = trt,
statistic = list(
all_continuous() ~ c("{mean}± {sd} "), # {median} ({p25},{p75})
all_categorical() ~ "{n} / {N} ({p}%)"
),
# digits = list(starts_with("age") ~ 3,
# grade ~ 2),
label = list(age ~ "Patient age",
grade ~ "Tumor Grade"),
missing_text = "(NA)",
)
Characteristic |
Drug A |
Drug B |
---|---|---|
Patient age | 47± 15 | 47± 14 |
(NA) | 7 | 4 |
Marker Level (ng/mL) | 1.02± 0.89 | 0.82± 0.83 |
(NA) | 6 | 4 |
T Stage | ||
T1 | 28 / 98 (29%) | 25 / 102 (25%) |
T2 | 25 / 98 (26%) | 29 / 102 (28%) |
T3 | 22 / 98 (22%) | 21 / 102 (21%) |
T4 | 23 / 98 (23%) | 27 / 102 (26%) |
Tumor Grade | ||
I | 35 / 98 (36%) | 33 / 102 (32%) |
II | 32 / 98 (33%) | 36 / 102 (35%) |
III | 31 / 98 (32%) | 33 / 102 (32%) |
Months to Death/Censor | 20.2± 5.0 | 19.0± 5.5 |
1
Mean± SD ; n / N (%) |
trial2 %>%
tbl_summary(by = trt) %>%
add_overall() |> # t添加总和
add_n() |>
# 修改注释
modify_header(label ~ "**变量**") %>%
modify_spanning_header(c("stat_1", "stat_2") ~ "**药物**") %>%
modify_footnote(all_stat_cols() ~ "Median (25%, 75%) or Frequency (%)") %>%
modify_caption("**Table 1. Patient Characteristics**") %>%
bold_labels()
变量 |
N |
Overall |
药物 |
|
---|---|---|---|---|
Drug A |
Drug B |
|||
Age | 189 | 47 (38, 57) | 46 (37, 60) | 48 (39, 56) |
Unknown | 11 | 7 | 4 | |
Marker Level (ng/mL) | 190 | 0.64 (0.22, 1.41) | 0.84 (0.23, 1.60) | 0.52 (0.18, 1.21) |
Unknown | 10 | 6 | 4 | |
T Stage | 200 | |||
T1 | 53 (27%) | 28 (29%) | 25 (25%) | |
T2 | 54 (27%) | 25 (26%) | 29 (28%) | |
T3 | 43 (22%) | 22 (22%) | 21 (21%) | |
T4 | 50 (25%) | 23 (23%) | 27 (26%) | |
Grade | 200 | |||
I | 68 (34%) | 35 (36%) | 33 (32%) | |
II | 68 (34%) | 32 (33%) | 36 (35%) | |
III | 64 (32%) | 31 (32%) | 33 (32%) | |
Months to Death/Censor | 200 | 22.4 (15.9, 24.0) | 23.5 (17.4, 24.0) | 21.2 (14.5, 24.0) |
1
Median (25%, 75%) or Frequency (%) |
函数 | 描述 |
---|---|
add_p() |
将 p 值添加到输出中,比较各组的值 |
add_overall() |
添加包含总体汇总统计数据的列 |
add_n() |
为每个变量添加一个包含 N(或缺少 N)的列 |
add_difference() |
为两组之间的差值、置信区间和 p 值添加列 |
add_stat_label() |
为每行中显示的汇总统计数据添加标签 |
add_stat() |
泛型函数,用于添加具有用户定义值的列 |
add_q() |
添加一列 Q 值以控制多个比较 |
modify_header() |
更新列标题 |
modify_footnote() |
更新列脚注 |
modify_spanning_header() |
更新跨标头 |
modify_caption() |
更新表格标题/标题 |
bold_labels() |
粗体变量标签 |
bold_levels() |
粗体可变级别 |
italicize_labels() |
将变量标签斜体化 |
italicize_levels() |
斜体变量级别 |
bold_p() |
粗体显著性 p 值 |
trial2 |>
tbl_summary(by = trt) |>
add_p(
test=list(all_continuous() ~ "t.test",
all_categorical() ~ "chisq.test.no.correct"),
pvalue_fun = label_style_pvalue(digits = 3),
)
Characteristic |
Drug A |
Drug B |
p-value 2 |
---|---|---|---|
Age | 46 (37, 60) | 48 (39, 56) | 0.834 |
Unknown | 7 | 4 | |
Marker Level (ng/mL) | 0.84 (0.23, 1.60) | 0.52 (0.18, 1.21) | 0.116 |
Unknown | 6 | 4 | |
T Stage | 0.866 | ||
T1 | 28 (29%) | 25 (25%) | |
T2 | 25 (26%) | 29 (28%) | |
T3 | 22 (22%) | 21 (21%) | |
T4 | 23 (23%) | 27 (26%) | |
Grade | 0.871 | ||
I | 35 (36%) | 33 (32%) | |
II | 32 (33%) | 36 (35%) | |
III | 31 (32%) | 33 (32%) | |
Months to Death/Censor | 23.5 (17.4, 24.0) | 21.2 (14.5, 24.0) | 0.108 |
1
Median (Q1, Q3); n (%) |
|||
2
Welch Two Sample t-test; Pearson’s Chi-squared test |
# ?tests
wilcox.test(age~trt,data = trial2)
#>
#> Wilcoxon rank sum test with continuity correction
#>
#> data: age by trt
#> W = 4323, p-value = 0.7183
#> alternative hypothesis: true location shift is not equal to 0
table(trial2$grade,trial2$trt)|> chisq.test()
#>
#> Pearson's Chi-squared test
#>
#> data: table(trial2$grade, trial2$trt)
#> X-squared = 0.27673, df = 2, p-value = 0.8708
my_ttest3 <- function(data, variable, by, ...) {
t.test(data[[variable]] ~ as.factor(data[[by]])) %>%
broom::tidy() %>%
mutate(
statistic = sprintf("%.3f", statistic),
p.value = case_when(
p.value >=0.05 ~ sprintf("%.3f (ns)",p.value),
p.value >=0.01 ~ sprintf("%.3f (*)",p.value),
p.value >=0.001 ~sprintf("%.3f (**)",p.value),
p.value <0.001 ~ sprintf("%.3f (***)",p.value),
)
) %>%
select(statistic, p.value)
}
trial |>
tbl_summary(
by = trt,
include = c(trt, age, marker),
missing = "ifany"
) |>
add_stat(fns = everything() ~ my_ttest3) %>%
modify_header(statistic = "**t-statistic**", p.value = "**p-value**")
Characteristic |
Drug A |
Drug B |
t-statistic |
p-value |
---|---|---|---|---|
Age | 46 (37, 60) | 48 (39, 56) | -0.209 | 0.834 (ns) |
Unknown | 7 | 4 | ||
Marker Level (ng/mL) | 0.84 (0.23, 1.60) | 0.52 (0.18, 1.21) | 1.578 | 0.116 (ns) |
Unknown | 6 | 4 | ||
1
Median (Q1, Q3) |
mean_ci <- function(data, variable, ...) {
test <- t.test(data[[variable]])
dplyr::tibble(
mean = test$estimate,
conf.low = test$conf.int[1],
conf.high = test$conf.int[2]
)
}
trial |>
tbl_custom_summary(
include = c("marker", "ttdeath"),
by = "trt",
stat_fns = ~ mean_ci,
statistic = ~ "{mean} [{conf.low}; {conf.high}]"
) |>
add_overall(last = TRUE) |>
modify_footnote(
all_stat_cols() ~ "mean [95% CI]"
)
Characteristic |
Drug A |
Drug B |
Overall |
---|---|---|---|
Marker Level (ng/mL) | 1.02 [0.83; 1.20] | 0.82 [0.65; 0.99] | 0.92 [0.79; 1.04] |
Unknown | 6 | 4 | 10 |
Months to Death/Censor | 20.2 [19.2; 21.2] | 19.0 [18.0; 20.1] | 19.6 [18.9; 20.4] |
1
mean [95% CI] |
theme_gtsummary_journal(journal = "jama")
#> Setting theme "JAMA"
trial %>%
select(trt, age, grade) %>%
tbl_summary(by = trt) %>%
add_p()
Characteristic |
Drug A |
Drug B |
p-value 1 |
---|---|---|---|
Age, Median (IQR) | 46 (37 – 60) | 48 (39 – 56) | 0.72 |
Unknown | 7 | 4 | |
Grade, n (%) | 0.87 | ||
I | 35 (36) | 33 (32) | |
II | 32 (33) | 36 (35) | |
III | 31 (32) | 33 (32) | |
1
Wilcoxon rank sum test; Pearson’s Chi-squared test |
# 清除所有先前设置的主题
reset_gtsummary_theme()
# load dataset
data(Titanic)
df = as.data.frame(Titanic)
# load library
library(gtsummary)
# create the model
model = glm(Survived ~ Age + Class + Sex + Freq, family=binomial, data=df)
# generate table
model %>%
tbl_regression() %>% # regression summary function
add_global_p() %>% # add p-values
bold_labels() %>% # make label in bold
italicize_levels() # make categories in label in italic
Characteristic |
log(OR) 1 |
95% CI 1 |
p-value |
---|---|---|---|
Age | 0.5 | ||
Child | — | — | |
Adult | 0.62 | -1.0, 2.4 | |
Class | >0.9 | ||
1st | — | — | |
2nd | -0.03 | -2.0, 2.0 | |
3rd | 0.25 | -1.8, 2.4 | |
Crew | 0.27 | -1.8, 2.4 | |
Sex | 0.6 | ||
Male | — | — | |
Female | -0.37 | -1.9, 1.1 | |
Freq | -0.01 | -0.02, 0.00 | 0.2 |
1
OR = Odds Ratio, CI = Confidence Interval |
# generate table
model %>%
tbl_regression(intercept=TRUE, conf.level=0.9) %>%
add_glance_source_note() %>%
add_global_p() %>%
add_q()
Characteristic |
log(OR) |
90% CI |
p-value |
q-value |
---|---|---|---|---|
(Intercept) | 0.10 | -1.4, 1.6 | >0.9 | >0.9 |
Age | 0.5 | >0.9 | ||
Child | — | — | ||
Adult | 0.62 | -0.78, 2.1 | ||
Class | >0.9 | >0.9 | ||
1st | — | — | ||
2nd | -0.03 | -1.7, 1.7 | ||
3rd | 0.25 | -1.5, 2.0 | ||
Crew | 0.27 | -1.5, 2.0 | ||
Sex | 0.6 | >0.9 | ||
Male | — | — | ||
Female | -0.37 | -1.7, 0.89 | ||
Freq | -0.01 | -0.02, 0.00 | 0.2 | 0.9 |
Null deviance = 44.4; Null df = 31; Log-likelihood = -21.3; AIC = 56.5; BIC = 66.8; Deviance = 42.5; Residual df = 25; No. Obs. = 32 |
||||
1 OR = Odds Ratio, CI = Confidence Interval |
||||
2 False discovery rate correction for multiple testing |
data(trial)
library(survival)
model_reglog = glm(response ~ trt + grade, data=trial, family = binomial) %>% tbl_regression()
model_cox = coxph(Surv(ttdeath, death) ~ trt + grade, data=trial) %>% tbl_regression()
tbl_merge(
list(model_reglog, model_cox),
tab_spanner = c("**Tumor Response**", "**Time to Death**")
)
Characteristic |
Tumor Response |
Time to Death |
||||
---|---|---|---|---|---|---|
log(OR) 1 |
95% CI 1 |
p-value |
log(HR) 1 |
95% CI 1 |
p-value |
|
Chemotherapy Treatment | ||||||
Drug A | — | — | — | — | ||
Drug B | 0.19 | -0.41, 0.81 | 0.5 | 0.22 | -0.15, 0.59 | 0.2 |
Grade | ||||||
I | — | — | — | — | ||
II | -0.06 | -0.82, 0.68 | 0.9 | 0.25 | -0.22, 0.72 | 0.3 |
III | 0.08 | -0.66, 0.82 | 0.8 | 0.52 | 0.07, 0.98 | 0.024 |
1
OR = Odds Ratio, CI = Confidence Interval, HR = Hazard Ratio |
mpg | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
---|---|---|---|---|---|---|---|---|---|---|
4 | ||||||||||
Volvo 142E | 21.4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 |
Toyota Corona | 21.5 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 |
Datsun 710 | 22.8 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
6 | ||||||||||
Merc 280C | 17.8 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 |
Valiant | 18.1 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
Merc 280 | 19.2 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
8 | ||||||||||
Cadillac Fleetwood | 10.4 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 |
Lincoln Continental | 10.4 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 |
Camaro Z28 | 13.3 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 |
tab <- tab |>
tab_spanner(
label = "Performance",
columns = c(mpg, disp, hp, drat, wt, qsec)
)
tab <- tab |>
tab_spanner(
label = "Specs",
columns = c(vs, am, gear, carb)
)
tab
Performance | Specs | |||||||||
---|---|---|---|---|---|---|---|---|---|---|
mpg | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
4 | ||||||||||
Volvo 142E | 21.4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 |
Toyota Corona | 21.5 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 |
Datsun 710 | 22.8 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
6 | ||||||||||
Merc 280C | 17.8 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 |
Valiant | 18.1 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
Merc 280 | 19.2 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
8 | ||||||||||
Cadillac Fleetwood | 10.4 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 |
Lincoln Continental | 10.4 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 |
Camaro Z28 | 13.3 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 |
tab <- tab |>
tab_header(
title = md("The Cars of **mtcars**"),
subtitle = "These are some fine automobiles"
)
tab
The Cars of mtcars |
||||||||||
---|---|---|---|---|---|---|---|---|---|---|
These are some fine automobiles | ||||||||||
Performance | Specs | |||||||||
mpg | disp | hp | drat | wt | qsec | vs | am | gear | carb | |
4 | ||||||||||
Volvo 142E | 21.4 | 121.0 | 109 | 4.11 | 2.780 | 18.60 | 1 | 1 | 4 | 2 |
Toyota Corona | 21.5 | 120.1 | 97 | 3.70 | 2.465 | 20.01 | 1 | 0 | 3 | 1 |
Datsun 710 | 22.8 | 108.0 | 93 | 3.85 | 2.320 | 18.61 | 1 | 1 | 4 | 1 |
6 | ||||||||||
Merc 280C | 17.8 | 167.6 | 123 | 3.92 | 3.440 | 18.90 | 1 | 0 | 4 | 4 |
Valiant | 18.1 | 225.0 | 105 | 2.76 | 3.460 | 20.22 | 1 | 0 | 3 | 1 |
Merc 280 | 19.2 | 167.6 | 123 | 3.92 | 3.440 | 18.30 | 1 | 0 | 4 | 4 |
8 | ||||||||||
Cadillac Fleetwood | 10.4 | 472.0 | 205 | 2.93 | 5.250 | 17.98 | 0 | 0 | 3 | 4 |
Lincoln Continental | 10.4 | 460.0 | 215 | 3.00 | 5.424 | 17.82 | 0 | 0 | 3 | 4 |
Camaro Z28 | 13.3 | 350.0 | 245 | 3.73 | 3.840 | 15.41 | 0 | 0 | 3 | 4 |