7 base 函数

7.1 赋值运算符

Assignment operator in R	Description
`<-`	Left assignment
=	Left assignment (not recommended) and argument assignment
`->`	Right assignment
`<<-`	Left lexicographic assignment (for advanced users)
`->>`	Right lexicographic assignment (for advanced users)

7.2 其他运算符

Miscellaneous operator in R	Description
$	Named list or dataframe column subset
:	Sequence generator
::	Accessing functions of packages It is not usually needed
:::	Accessing internal functions of packages
~	Model formulae
@	Accessing slots in S4 classes (Advanced)

7.3 逻辑索引

Show the code

1:length(mtcars$mpg)
#>  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#> [26] 26 27 28 29 30 31 32
seq_along(mtcars$mpg)
#>  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#> [26] 26 27 28 29 30 31 32

x <- c(1,3,6,4,0,7)
# 返回值为TRUE的索引
which(x>2)
#> [1] 2 3 4 6
which.max(x)
#> [1] 6
which.min(x)
#> [1] 5

# 返回值
x[x>2]
#> [1] 3 6 4 7
x[which(x>2)]
#> [1] 3 6 4 7

# 包含
# match(x, table, nomatch = NA_integer_, incomparables = NULL)
# x %in% table
1:10 %in% c(1,3,5,9)
#>  [1]  TRUE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE  TRUE FALSE

match(1:10, c(1,3,5,9))
#>  [1]  1 NA  2 NA  3 NA NA NA  4 NA

all(colnames(mpg)==colnames(mpg))
#> [1] TRUE
any(colnames(mpg)==colnames(mtcars))
#> [1] FALSE


x <- c(9:20, 1:5, 3:7, 0:8)
duplicated(x)
#>  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
#> [13] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE
#> [25]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE

xu <- x[!duplicated(x)]
xu
#>  [1]  9 10 11 12 13 14 15 16 17 18 19 20  1  2  3  4  5  6  7  0  8

7.4 子集运算

[，$，[[

S4 对象： @ slot

7.4.1 `[ ]`

Show the code

x <- c(2.1, 4.2, 3.3, 5.4)

x[c(3, 1)]
#> [1] 3.3 2.1

x[-c(3, 1)]
#> [1] 4.2 5.4

x[c(TRUE, TRUE, FALSE, FALSE)]
#> [1] 2.1 4.2
x[x > 3]
#> [1] 4.2 3.3 5.4

x[c(TRUE, FALSE)] # recycling rules 循环
#> [1] 2.1 3.3

x[]
#> [1] 2.1 4.2 3.3 5.4

x[0]
#> numeric(0)

7.4.2 `$`, `[[ ]]`

$是一个简写运算符, x$y大致相当于 x[["y"]] ,从左到右部分赋值

Show the code

x <- list(abc = 1)
x$a
#> [1] 1

x[["a"]]
#> NULL

options(warnPartialMatchDollar = TRUE)
x$a
#> [1] 1

7.5 生成数值序列

Show the code

seq(from=1,to=30,by=3)  # 生成一个序列
#>  [1]  1  4  7 10 13 16 19 22 25 28
seq(from=1,to=30,length=10) 
#>  [1]  1.000000  4.222222  7.444444 10.666667 13.888889 17.111111 20.333333
#>  [8] 23.555556 26.777778 30.000000

sequence(30,by=3)
#>  [1]  1  4  7 10 13 16 19 22 25 28 31 34 37 40 43 46 49 52 55 58 61 64 67 70 73
#> [26] 76 79 82 85 88


rep(x = c("A",1,"B",2),times = 3)           #重复序列
#>  [1] "A" "1" "B" "2" "A" "1" "B" "2" "A" "1" "B" "2"
rep(x = c("A",1,"B",2),times = c(1,2,3,4))
#>  [1] "A" "1" "1" "B" "B" "B" "2" "2" "2" "2"
rep(x = c("A",1,"B",2),each = 3,times = 2)
#>  [1] "A" "A" "A" "1" "1" "1" "B" "B" "B" "2" "2" "2" "A" "A" "A" "1" "1" "1" "B"
#> [20] "B" "B" "2" "2" "2"

7.6 排序

Show the code

x <- c("b", "c", "a")
order(x)
#> [1] 3 1 2
x[order(x)]
#> [1] "a" "b" "c"

Show the code

sort()
dplyr::arrange()

# 分配秩次
rank()

7.7 拆分

Show the code

set.seed(3)

df <- CO2[sample(1:nrow(CO2), 10), ]
head(df)

	Plant	Type	Treatment	conc	uptake
5	Qn1	Quebec	nonchilled	500	35.3
58	Mn3	Mississippi	nonchilled	175	19.4
12	Qn2	Quebec	nonchilled	500	40.6
36	Qc3	Quebec	chilled	95	15.1
83	Mc3	Mississippi	chilled	675	18.9
8	Qn2	Quebec	nonchilled	95	13.6

Show the code

dfs <- split(df, f = list(df$Type, df$Treatment))
dfs
#> $Quebec.nonchilled
#>    Plant   Type  Treatment conc uptake
#> 5    Qn1 Quebec nonchilled  500   35.3
#> 12   Qn2 Quebec nonchilled  500   40.6
#> 8    Qn2 Quebec nonchilled   95   13.6
#> 20   Qn3 Quebec nonchilled  675   43.9
#> 
#> $Mississippi.nonchilled
#>    Plant        Type  Treatment conc uptake
#> 58   Mn3 Mississippi nonchilled  175   19.4
#> 55   Mn2 Mississippi nonchilled  675   31.1
#> 
#> $Quebec.chilled
#>    Plant   Type Treatment conc uptake
#> 36   Qc3 Quebec   chilled   95   15.1
#> 40   Qc3 Quebec   chilled  500   38.9
#> 
#> $Mississippi.chilled
#>    Plant        Type Treatment conc uptake
#> 83   Mc3 Mississippi   chilled  675   18.9
#> 74   Mc2 Mississippi   chilled  350   13.0

7.8 合并

Show the code

set.seed(61)

employee_id <- 1:10
employee_name <- c("Andrew", "Susan", "John", "Joe", "Jack",
                   "Jacob", "Mary", "Kate", "Jacqueline", "Ivy")
employee_salary <- round(rnorm(10, mean = 1500, sd = 200))
employee_age <- round(rnorm(10, mean = 50, sd = 8))
employee_position <- c("CTO", "CFO", "Administrative", rep("Technician", 7))


df_1 <- data.frame(id = employee_id[1:8], name = employee_name[1:8],
                   month_salary = employee_salary[1:8])
df_2 <- data.frame(id = employee_id[-5], name = employee_name[-5],
                   age = employee_age[-5], position = employee_position[-5])

df_1

id	name	month_salary
1	Andrew	1424
2	Susan	1425
3	John	1156
4	Joe	1570
5	Jack	1223
6	Jacob	1462
7	Mary	1641
8	Kate	1603

Show the code

df_2

id	name	age	position
1	Andrew	40	CTO
2	Susan	38	CFO
3	John	54	Administrative
4	Joe	66	Technician
6	Jacob	38	Technician
7	Mary	53	Technician
8	Kate	56	Technician
9	Jacqueline	55	Technician
10	Ivy	43	Technician

7.8.1 连接

Show the code

# 内连接
merge(x = df_1, y = df_2, by = c("id", "name"))

id	name	month_salary	age	position
1	Andrew	1424	40	CTO
2	Susan	1425	38	CFO
3	John	1156	54	Administrative
4	Joe	1570	66	Technician
6	Jacob	1462	38	Technician
7	Mary	1641	53	Technician
8	Kate	1603	56	Technician

Show the code


# 全连接
merge(x = df_1, y = df_2, all = TRUE)

id	name	month_salary	age	position
1	Andrew	1424	40	CTO
2	Susan	1425	38	CFO
3	John	1156	54	Administrative
4	Joe	1570	66	Technician
5	Jack	1223	NA	NA
6	Jacob	1462	38	Technician
7	Mary	1641	53	Technician
8	Kate	1603	56	Technician
9	Jacqueline	NA	55	Technician
10	Ivy	NA	43	Technician

Show the code


# 左连接
merge(x = df_1, y = df_2, all.x = TRUE)

id	name	month_salary	age	position
1	Andrew	1424	40	CTO
2	Susan	1425	38	CFO
3	John	1156	54	Administrative
4	Joe	1570	66	Technician
5	Jack	1223	NA	NA
6	Jacob	1462	38	Technician
7	Mary	1641	53	Technician
8	Kate	1603	56	Technician

Show the code


# 右连接
merge(x = df_1, y = df_2, all.y = TRUE)

id	name	month_salary	age	position
1	Andrew	1424	40	CTO
2	Susan	1425	38	CFO
3	John	1156	54	Administrative
4	Joe	1570	66	Technician
6	Jacob	1462	38	Technician
7	Mary	1641	53	Technician
8	Kate	1603	56	Technician
9	Jacqueline	NA	55	Technician
10	Ivy	NA	43	Technician

Show the code


# 交叉连接
Merged <- merge(x = df_1, y = df_2, by = NULL)
head(Merged)

id.x	name.x	month_salary	id.y	name.y	age	position
1	Andrew	1424	1	Andrew	40	CTO
2	Susan	1425	1	Andrew	40	CTO
3	John	1156	1	Andrew	40	CTO
4	Joe	1570	1	Andrew	40	CTO
5	Jack	1223	1	Andrew	40	CTO
6	Jacob	1462	1	Andrew	40	CTO

7.8.2 按行名合并

Show the code

df1 <- data.frame(var = c("one", "two", "three", "four", "five"),
                  data = c(1, 5, 1, 6, 8))
rownames(df1) <- c("A", "B", "C", "D", "E")
df1

	var	data
A	one	1
B	two	5
C	three	1
D	four	6
E	five	8

Show the code


df2 <- data.frame(var = c("three", "one", "eight", "two", "nine"),
                  data = c(1, 5, 1, 6, 8))
rownames(df2) <- c("E", "A", "B", "D", "C")
df2

	var	data
E	three	1
A	one	5
B	eight	1
D	two	6
C	nine	8

Show the code

merge(df1, df2, by = 0, all = TRUE)

Row.names	var.x	data.x	var.y	data.y
A	one	1	one	5
B	two	5	eight	1
C	three	1	nine	8
D	four	6	two	6
E	five	8	three	1

Show the code

merge(df1, df2, by = "row.names", all = TRUE) # Equivalent

Row.names	var.x	data.x	var.y	data.y
A	one	1	one	5
B	two	5	eight	1
C	three	1	nine	8
D	four	6	two	6
E	five	8	three	1

7.9 AsIs `I()`

I()函数来自于stats包，它是as.is函数的别名。I()函数用于改变对象的类别，指示R在进行操作时应该将该对象视为其原始形式，不对它进行任何转换。

Show the code


summary(mtcars$mpg) |> as_tibble()
#> Error in dimnames(x) <- dnx: 'dimnames' applied to non-array

lapply(summary(mtcars$mpg), I) 
#> $Min.
#> [1] 10.4
#> 
#> $`1st Qu.`
#> [1] 15.425
#> 
#> $Median
#> [1] 19.2
#> 
#> $Mean
#> [1] 20.09062
#> 
#> $`3rd Qu.`
#> [1] 22.8
#> 
#> $Max.
#> [1] 33.9
lapply(summary(mtcars$mpg), I) |> as_tibble()

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
10.4	15.425	19.2	20.090625	22.8	33.9

Show the code


sapply(summary(mtcars$mpg), I, simplify = F) 
#> $Min.
#> [1] 10.4
#> 
#> $`1st Qu.`
#> [1] 15.425
#> 
#> $Median
#> [1] 19.2
#> 
#> $Mean
#> [1] 20.09062
#> 
#> $`3rd Qu.`
#> [1] 22.8
#> 
#> $Max.
#> [1] 33.9
sapply(summary(mtcars$mpg), I, simplify = F) |> as_tibble()

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
10.4	15.425	19.2	20.090625	22.8	33.9

7.10 数学函数

Show the code

x<-c(25,-4,3.66,3.42,-5.99)
abs(x)                       #绝对值
#> [1] 25.00  4.00  3.66  3.42  5.99
sqrt(x)                      #平方根
#> [1] 5.000000      NaN 1.913113 1.849324      NaN
ceiling(x)                   #向上取整
#> [1] 25 -4  4  4 -5
floor(x)                     #向下取整
#> [1] 25 -4  3  3 -6
trunc(x)                     #整数部分
#> [1] 25 -4  3  3 -5
round(x,digits = 2)          #四舍五入，保留2位小数
#> [1] 25.00 -4.00  3.66  3.42 -5.99
signif(x,digits = 2)         #四舍五入，保留2有效数字
#> [1] 25.0 -4.0  3.7  3.4 -6.0
log(x,base=4)                #对x取以base为底的对数
#> [1] 2.3219281       NaN 0.9359218 0.8869982       NaN
log(x)                          #自然对数
#> [1] 3.218876      NaN 1.297463 1.229641      NaN
log10(x)                        #常用对数
#> [1] 1.3979400       NaN 0.5634811 0.5340261       NaN
exp(x)                       #e指数函数
#> [1] 7.200490e+10 1.831564e-02 3.886134e+01 3.056942e+01 2.503664e-03

7.11 统计函数

Show the code

mean(x,trim = 0.05,na.rm = TRUE)     #算术平均值
#> [1] 4.418
median(x)                            #中位数
#> [1] 3.42
sd(x)                                #标准差
#> [1] 12.29206
var(x)                               #方差
#> [1] 151.0946
mad(x)                               #绝对中位差
#> [1] 11.00089
quantile(x,probs = c(0,0.25,0.5,0.75,1))     #分位数
#>    0%   25%   50%   75%  100% 
#> -5.99 -4.00  3.42  3.66 25.00
range(x)                             # 值域
#> [1] -5.99 25.00
scale(x,center = TRUE,scale = TRUE)  #标准化（均值为0、标准差为1）
#>             [,1]
#> [1,]  1.67441487
#> [2,] -0.68483259
#> [3,] -0.06166585
#> [4,] -0.08119065
#> [5,] -0.84672578
#> attr(,"scaled:center")
#> [1] 4.418
#> attr(,"scaled:scale")
#> [1] 12.29206
scale(x,center = TRUE,scale =FALSE)  #中心化：减去均值
#>         [,1]
#> [1,]  20.582
#> [2,]  -8.418
#> [3,]  -0.758
#> [4,]  -0.998
#> [5,] -10.408
#> attr(,"scaled:center")
#> [1] 4.418


x <- c(1,3,5,7,9,11,13)
# Find the "previous" (lag()) or "next" (lead()) values in a vector
dplyr::lag(x,n=2)                      # n阶滞后
#> [1] NA NA  1  3  5  7  9
dplyr::lead(x,n=2)                     # n阶前移
#> [1]  5  7  9 11 13 NA NA

# 滞后差分  lag阶滞后  difference阶差分
diff(x,lag = 1,difference=1)     # 隔0个值后位减前位，进行1次
#> [1] 2 2 2 2 2 2
diff(x,lag = 1,difference=2)     ## 隔0个值后位减前位，进行2次
#> [1] 0 0 0 0 0
diff(x,lag = 1,difference=3)     # 隔0个值后位减前位，进行3次
#> [1] 0 0 0 0

diff(x,lag = 2,difference=1)  # 隔1个值后位减前位，进行1次
#> [1] 4 4 4 4 4
diff(x,lag = 2,difference=2) ## 隔1个值后位减前位，进行2次
#> [1] 0 0 0

7.12 字符串函数

Show the code


# 匹配
data <- data.frame(
  name = c("Alice", "Bob", "Carol", "Dave", "Eve"),
  description = c("Software developer", "Data analyst", "UX designer", "Project manager", "Data scientist")
)
data

name	description
Alice	Software developer
Bob	Data analyst
Carol	UX designer
Dave	Project manager
Eve	Data scientist

Show the code


data$has_data_analyst <- str_detect(data$description, "Data analyst")
print(data)
#>    name        description has_data_analyst
#> 1 Alice Software developer            FALSE
#> 2   Bob       Data analyst             TRUE
#> 3 Carol        UX designer            FALSE
#> 4  Dave    Project manager            FALSE
#> 5   Eve     Data scientist            FALSE

data$has_data_grepl <- grepl("Data", data$description)
print(data)
#>    name        description has_data_analyst has_data_grepl
#> 1 Alice Software developer            FALSE          FALSE
#> 2   Bob       Data analyst             TRUE           TRUE
#> 3 Carol        UX designer            FALSE          FALSE
#> 4  Dave    Project manager            FALSE          FALSE
#> 5   Eve     Data scientist            FALSE           TRUE

Show the code

#子串
substr(x="qwertyyuio",start = 2,stop=4)   
#> [1] "wer"
str_sub(string = "qwertyyuio",start = 2,end = 4)
#> [1] "wer"
#查找替换
sub(pattern = " ",replacement = ".",
    x="hello world hello !",ignore.case = FALSE,fixed = FALSE)  
#> [1] "hello.world hello !"

str_replace_all("hello world hello !"," ",replacement = ".")
#> [1] "hello.world.hello.!"
# 查找,返回下标
grep(pattern = "v",x=c("a","v","D","A","f","J"),ignore.case = FALSE,fixed = FALSE)   
#> [1] 2

 # 分隔，\\转义字符
strsplit(x="a.fa.fag",split = "\\.",fixed = FALSE) 
#> [[1]]
#> [1] "a"   "fa"  "fag"


#连接
paste("x",c("a","b"),sep="",collapse = "?")  
#> [1] "xa?xb"
paste0("x",c("A","B"),collapse="?")
#> [1] "xA?xB"

cat("hello","BOb","\b\n","\bIsn\' R","\t","GREAT?\n",sep = " " )
#> hello BOb 
#>  Isn' R      GREAT?

#
toupper("abc")              #大写转换
#> [1] "ABC"
tolower("aaAGEErg")         #小写转换
#> [1] "aaageerg"

7.13 打印函数 `sprintf()`

Show the code

a <- "string"
sprintf("This is where a %s goes.", a)
#> [1] "This is where a string goes."

x <- 8
sprintf("Regular:%04d", x)
#> [1] "Regular:0008"

sprintf("%f", pi)         # "3.141593"
#> [1] "3.141593"
sprintf("%.3f", pi)       # "3.142"
#> [1] "3.142"
sprintf("%1.0f", pi)      # "3"
#> [1] "3"
sprintf("%5.1f", pi)      # "  3.1"
#> [1] "  3.1"
sprintf("%05.1f", pi)     # "003.1"
#> [1] "003.1"
sprintf("%+f", pi)        # "+3.141593"
#> [1] "+3.141593"
sprintf("% f", pi)        # " 3.141593"
#> [1] " 3.141593"
sprintf("%-10f", pi)      # "3.141593  "   (left justified)
#> [1] "3.141593  "
sprintf("%e", pi)         #"3.141593e+00"
#> [1] "3.141593e+00"
sprintf("%E", pi)         # "3.141593E+00"
#> [1] "3.141593E+00"
sprintf("%g", pi)         # "3.14159"
#> [1] "3.14159"
sprintf("%g",   1e6 * pi) # "3.14159e+06"  (exponential)
#> [1] "3.14159e+06"
sprintf("%.9g", 1e6 * pi) # "3141592.65"   ("fixed")
#> [1] "3141592.65"
sprintf("%G", 1e-6 * pi)  # "3.14159E-06"
#> [1] "3.14159E-06"


x <- "string"
sprintf("Substitute in multiple strings: %s %s", x, "string2")
#> [1] "Substitute in multiple strings: string string2"

sprintf("A single percent sign here： %%")
#> [1] "A single percent sign here： %"

7.14 apply函数簇

7.14.1 `apply()`

Show the code

apply(X = mtcars, MARGIN = 2, FUN = mean)
#>        mpg        cyl       disp         hp       drat         wt       qsec 
#>  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
#>         vs         am       gear       carb 
#>   0.437500   0.406250   3.687500   2.812500

7.14.2 `aggregate()`

Show the code

aggregate(x = mtcars,
          by = list(am=mtcars$am,cyl=mtcars$cyl),
          FUN = mean)

am	cyl	mpg	cyl	disp	hp	drat	wt	qsec	vs	am	gear	carb
0	4	22.90000	4	135.8667	84.66667	3.770000	2.935000	20.97000	1.000	0	3.666667	1.666667
1	4	28.07500	4	93.6125	81.87500	4.183750	2.042250	18.45000	0.875	1	4.250000	1.500000
0	6	19.12500	6	204.5500	115.25000	3.420000	3.388750	19.21500	1.000	0	3.500000	2.500000
1	6	20.56667	6	155.0000	131.66667	3.806667	2.755000	16.32667	0.000	1	4.333333	4.666667
0	8	15.05000	8	357.6167	194.16667	3.120833	4.104083	17.14250	0.000	0	3.000000	3.083333
1	8	15.40000	8	326.0000	299.50000	3.880000	3.370000	14.55000	0.000	1	5.000000	6.000000

Show the code

aggregate(.~am+cyl,
          data = mtcars,
          FUN = mean)

am	cyl	mpg	disp	hp	drat	wt	qsec	vs	gear	carb
0	4	22.90000	135.8667	84.66667	3.770000	2.935000	20.97000	1.000	3.666667	1.666667
1	4	28.07500	93.6125	81.87500	4.183750	2.042250	18.45000	0.875	4.250000	1.500000
0	6	19.12500	204.5500	115.25000	3.420000	3.388750	19.21500	1.000	3.500000	2.500000
1	6	20.56667	155.0000	131.66667	3.806667	2.755000	16.32667	0.000	4.333333	4.666667
0	8	15.05000	357.6167	194.16667	3.120833	4.104083	17.14250	0.000	3.000000	3.083333
1	8	15.40000	326.0000	299.50000	3.880000	3.370000	14.55000	0.000	5.000000	6.000000

Show the code

aggregate(mpg~am+cyl,
          data = mtcars,
          FUN = summary)

am	cyl	mpg
0	4	21.50000
1	4	21.40000
0	6	17.80000
1	6	19.70000
0	8	10.40000
1	8	15.00000

7.14.3 `by()`, `tapply()`

by()是应用于数据框的tapply的面向对象包装器。

Show the code

by(data = mtcars[,1:3],
   INDICES = list(cyl=mtcars$cyl),
   FUN = summary)
#> cyl: 4
#>       mpg             cyl         disp       
#>  Min.   :21.40   Min.   :4   Min.   : 71.10  
#>  1st Qu.:22.80   1st Qu.:4   1st Qu.: 78.85  
#>  Median :26.00   Median :4   Median :108.00  
#>  Mean   :26.66   Mean   :4   Mean   :105.14  
#>  3rd Qu.:30.40   3rd Qu.:4   3rd Qu.:120.65  
#>  Max.   :33.90   Max.   :4   Max.   :146.70  
#> ------------------------------------------------------------ 
#> cyl: 6
#>       mpg             cyl         disp      
#>  Min.   :17.80   Min.   :6   Min.   :145.0  
#>  1st Qu.:18.65   1st Qu.:6   1st Qu.:160.0  
#>  Median :19.70   Median :6   Median :167.6  
#>  Mean   :19.74   Mean   :6   Mean   :183.3  
#>  3rd Qu.:21.00   3rd Qu.:6   3rd Qu.:196.3  
#>  Max.   :21.40   Max.   :6   Max.   :258.0  
#> ------------------------------------------------------------ 
#> cyl: 8
#>       mpg             cyl         disp      
#>  Min.   :10.40   Min.   :8   Min.   :275.8  
#>  1st Qu.:14.40   1st Qu.:8   1st Qu.:301.8  
#>  Median :15.20   Median :8   Median :350.5  
#>  Mean   :15.10   Mean   :8   Mean   :353.1  
#>  3rd Qu.:16.25   3rd Qu.:8   3rd Qu.:390.0  
#>  Max.   :19.20   Max.   :8   Max.   :472.0

by(data = mtcars[,1:3],
   INDICES = factor(mtcars$cyl),
   FUN = function(x) lm( disp ~ mpg, data = x))
#> factor(mtcars$cyl): 4
#> 
#> Call:
#> lm(formula = disp ~ mpg, data = x)
#> 
#> Coefficients:
#> (Intercept)          mpg  
#>     233.067       -4.798  
#> 
#> ------------------------------------------------------------ 
#> factor(mtcars$cyl): 6
#> 
#> Call:
#> lm(formula = disp ~ mpg, data = x)
#> 
#> Coefficients:
#> (Intercept)          mpg  
#>     125.122        2.947  
#> 
#> ------------------------------------------------------------ 
#> factor(mtcars$cyl): 8
#> 
#> Call:
#> lm(formula = disp ~ mpg, data = x)
#> 
#> Coefficients:
#> (Intercept)          mpg  
#>      560.87       -13.76

tapply(X = mtcars[,1:3],
       INDEX = list(cyl=mtcars$cyl),
       FUN = summary)
#> $`4`
#>       mpg             cyl         disp       
#>  Min.   :21.40   Min.   :4   Min.   : 71.10  
#>  1st Qu.:22.80   1st Qu.:4   1st Qu.: 78.85  
#>  Median :26.00   Median :4   Median :108.00  
#>  Mean   :26.66   Mean   :4   Mean   :105.14  
#>  3rd Qu.:30.40   3rd Qu.:4   3rd Qu.:120.65  
#>  Max.   :33.90   Max.   :4   Max.   :146.70  
#> 
#> $`6`
#>       mpg             cyl         disp      
#>  Min.   :17.80   Min.   :6   Min.   :145.0  
#>  1st Qu.:18.65   1st Qu.:6   1st Qu.:160.0  
#>  Median :19.70   Median :6   Median :167.6  
#>  Mean   :19.74   Mean   :6   Mean   :183.3  
#>  3rd Qu.:21.00   3rd Qu.:6   3rd Qu.:196.3  
#>  Max.   :21.40   Max.   :6   Max.   :258.0  
#> 
#> $`8`
#>       mpg             cyl         disp      
#>  Min.   :10.40   Min.   :8   Min.   :275.8  
#>  1st Qu.:14.40   1st Qu.:8   1st Qu.:301.8  
#>  Median :15.20   Median :8   Median :350.5  
#>  Mean   :15.10   Mean   :8   Mean   :353.1  
#>  3rd Qu.:16.25   3rd Qu.:8   3rd Qu.:390.0  
#>  Max.   :19.20   Max.   :8   Max.   :472.0

7.14.4 `lapply()`, `sapply()`

lapply() returns a list of the same length as X,

sapply() is a user-friendly version and wrapper of lapply() by default returning a vector or matrix

Show the code

x <- list(a = 1:10, beta = exp(-3:3), logic = c(TRUE,FALSE,FALSE,TRUE))
lapply(X = x,FUN = quantile)
#> $a
#>    0%   25%   50%   75%  100% 
#>  1.00  3.25  5.50  7.75 10.00 
#> 
#> $beta
#>          0%         25%         50%         75%        100% 
#>  0.04978707  0.25160736  1.00000000  5.05366896 20.08553692 
#> 
#> $logic
#>   0%  25%  50%  75% 100% 
#>  0.0  0.0  0.5  1.0  1.0

sapply(x, quantile)
#>          a        beta logic
#> 0%    1.00  0.04978707   0.0
#> 25%   3.25  0.25160736   0.0
#> 50%   5.50  1.00000000   0.5
#> 75%   7.75  5.05366896   1.0
#> 100% 10.00 20.08553692   1.0

7.14.5 `vapply`

Show the code

by_cyl <- split(mtcars, mtcars$cyl)
models <- lapply(by_cyl, function(data) lm(mpg ~ wt, data = data))
models
#> $`4`
#> 
#> Call:
#> lm(formula = mpg ~ wt, data = data)
#> 
#> Coefficients:
#> (Intercept)           wt  
#>      39.571       -5.647  
#> 
#> 
#> $`6`
#> 
#> Call:
#> lm(formula = mpg ~ wt, data = data)
#> 
#> Coefficients:
#> (Intercept)           wt  
#>       28.41        -2.78  
#> 
#> 
#> $`8`
#> 
#> Call:
#> lm(formula = mpg ~ wt, data = data)
#> 
#> Coefficients:
#> (Intercept)           wt  
#>      23.868       -2.192
vapply(models, function(x) coef(x)[[2]], double(1))
#>         4         6         8 
#> -5.647025 -2.780106 -2.192438

7.15 矩阵运算

Show the code

X <- matrix(data = 1:9,ncol = 3,nrow = 3)
X
#>      [,1] [,2] [,3]
#> [1,]    1    4    7
#> [2,]    2    5    8
#> [3,]    3    6    9

diag(X)
#> [1] 1 5 9
det(X)
#> [1] 0
eigen(X)
#> eigen() decomposition
#> $values
#> [1]  1.611684e+01 -1.116844e+00 -5.700691e-16
#> 
#> $vectors
#>            [,1]       [,2]       [,3]
#> [1,] -0.4645473 -0.8829060  0.4082483
#> [2,] -0.5707955 -0.2395204 -0.8164966
#> [3,] -0.6770438  0.4038651  0.4082483

X
#>      [,1] [,2] [,3]
#> [1,]    1    4    7
#> [2,]    2    5    8
#> [3,]    3    6    9
# 协方差矩阵
cov(X)
#>      [,1] [,2] [,3]
#> [1,]    1    1    1
#> [2,]    1    1    1
#> [3,]    1    1    1

# 相关系数矩阵
cor(X)
#>      [,1] [,2] [,3]
#> [1,]    1    1    1
#> [2,]    1    1    1
#> [3,]    1    1    1

7.16 统计分布函数

形如[dpqr]distribution_abbreviation，其中密度函数d，分布函数p，分位数函数q，随机数生成函数r

Show the code

### 正态分布
dnorm(3,0,2)  #N(0,4)在 3 处的密度值
#> [1] 0.0647588
pnorm(1:3,0,2)#N(0,4)在1，2，3处的分布概率值
#> [1] 0.6914625 0.8413447 0.9331928
qnorm(1-0.025,0,1)# N(0,1)的上0.025分位数
#> [1] 1.959964
rnorm(5,3,3)  # 生成5个服从N(3,9)的随机数
#> [1] 3.401818 2.115231 1.348325 5.882787 3.584287


### 二项分布
1-sum(dbinom(0:1,400,0.02))# 命中率为0.02，独立射击400次，至少击中2次的概率
#> [1] 0.9971655
ggplot()

Show the code




### 多元正态分布
mean<-c(230.7,146.7,3.6)
sigma<-matrix(c(15360.8,6721.2,-47.1,
                6721.2,4700.9,-16.5,
                -47.1,16.5,0.3),nrow = 3,ncol = 3)
library(MASS)
multi <- mvrnorm(500,mean,sigma)
head(multi)
#>          [,1]     [,2]     [,3]
#> [1,] 365.6210 131.9249 3.193131
#> [2,] 277.0565 215.3377 3.732176
#> [3,] 188.6413  64.2639 4.161602
#> [4,] 180.2383 113.7082 3.760537
#> [5,] 327.8288 211.1226 3.160674
#> [6,] 383.8595 220.1269 2.838524

7.1 赋值运算符

7.2 其他运算符

7.3 逻辑索引

7.4 子集运算

7.4.1 [ ]

7.4.2 $, [[ ]]

7.5 生成数值序列

7.6 排序

7.7 拆分

7.8 合并

7.8.1 连接

7.8.2 按行名合并

7.9 AsIs I()

7.10 数学函数

7.11 统计函数

7.12 字符串函数

7.13 打印函数 sprintf()

7.14 apply函数簇

7.14.1 apply()

7.14.2 aggregate()

7.14.3 by(), tapply()

7.14.4 lapply(), sapply()

7.14.5 vapply

7.15 矩阵运算

7.16 统计分布函数

7.4.1 `[ ]`

7.4.2 `$`, `[[ ]]`

7.9 AsIs `I()`

7.13 打印函数 `sprintf()`

7.14.1 `apply()`

7.14.2 `aggregate()`

7.14.3 `by()`, `tapply()`

7.14.4 `lapply()`, `sapply()`

7.14.5 `vapply`