1  ExpressionSet

1.1 组件

1.1.1 表达矩阵 Assay data

包含实际的表达数据。通常是一个矩阵,其中行代表基因,列代表样本。数据可以是原始计数值、归一化的表达值等。

Code
dataDirectory <- system.file("extdata", package="Biobase")
exprsFile <- file.path(dataDirectory, "exprsData.txt")

exprs <- as.matrix(read.table(exprsFile, header=TRUE, sep="\t",
                              row.names=1,as.is=TRUE))
class(exprs)
#> [1] "matrix" "array"
dim(exprs)
#> [1] 500  26
colnames(exprs)
#>  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
#> [20] "T" "U" "V" "W" "X" "Y" "Z"
head(exprs[,1:5])
#>                        A         B        C        D        E
#> AFFX-MurIL2_at  192.7420  85.75330 176.7570 135.5750 64.49390
#> AFFX-MurIL10_at  97.1370 126.19600  77.9216  93.3713 24.39860
#> AFFX-MurIL4_at   45.8192   8.83135  33.0632  28.7072  5.94492
#> AFFX-MurFAS_at   22.5445   3.60093  14.6883  12.3397 36.86630
#> AFFX-BioB-5_at   96.7875  30.43800  46.1271  70.9319 56.17440
#> AFFX-BioB-M_at   89.0730  25.84610  57.2033  69.9766 49.58220

1.1.2 列数据

包含样本的数据,样本为细胞或病人。这个部分存储有关样本的信息,比如样本的处理条件、时间点等。通常以 AnnotatedDataFrame 的形式存储。

Code
pDataFile <- file.path(dataDirectory, "pData.txt")
pData <- read.table(pDataFile,row.names=1, header=TRUE, sep="\t")

dim(pData)
#> [1] 26  3
colnames(pData)
#> [1] "gender" "type"   "score"
sapply(pData, class)
#>      gender        type       score 
#> "character" "character"   "numeric"

rownames(pData)
#>  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
#> [20] "T" "U" "V" "W" "X" "Y" "Z"

summary(pData)
#>     gender              type               score       
#>  Length:26          Length:26          Min.   :0.1000  
#>  Class :character   Class :character   1st Qu.:0.3275  
#>  Mode  :character   Mode  :character   Median :0.4150  
#>                                        Mean   :0.5369  
#>                                        3rd Qu.:0.7650  
#>                                        Max.   :0.9800

all(rownames(pData)==colnames(exprs))
#> [1] TRUE

元数据

Code
metadata <- data.frame(labelDescription= c("Patient gender","Case/control status",
                                           "Tumor progress on XYZ scale"),
                       row.names=c("gender", "type", "score"))
metadata
#>                   labelDescription
#> gender              Patient gender
#> type           Case/control status
#> score  Tumor progress on XYZ scale

Biobase 提供了AnnotatedDataFrame 类来保存表型数据及其元数据

Code
library(Biobase)
phenoData <- new("AnnotatedDataFrame",data=pData, varMetadata=metadata)
phenoData
#> An object of class 'AnnotatedDataFrame'
#>   rowNames: A B ... Z (26 total)
#>   varLabels: gender type score
#>   varMetadata: labelDescription

切片

Code
head(pData(phenoData))
#>   gender    type score
#> A Female Control  0.75
#> B   Male    Case  0.40
#> C   Male Control  0.73
#> D   Male    Case  0.42
#> E Female    Case  0.93
#> F   Male Control  0.22

phenoData[c("A","Z"),"gender"]
#> An object of class 'AnnotatedDataFrame'
#>   rowNames: A Z
#>   varLabels: gender
#>   varMetadata: labelDescription
pData(phenoData[phenoData$score>0.8,])
#>   gender    type score
#> E Female    Case  0.93
#> G   Male    Case  0.96
#> X   Male Control  0.98
#> Y Female    Case  0.94

1.1.3 行数据

包含基因的元数据。这个部分存储有关基因的信息,比如基因的注释、位置等。通常以 AnnotatedDataFrame 的形式存储。

annotationAnnotationDbi包为元数据包提供了基本的数据操作

Code
annotation <- "hgu95av2"

1.1.4 实验元数据

包含实验的元数据,比如实验设计、数据处理流程等。

MIAME ob ject

Code
experimentData <- new("MIAME",name="Pierre Fermat",lab="Francis Galton Lab",
                      contact="pfermat@lab.not.exist",
                      title="Smoking-Cancer Experiment",
                      abstract="An example ExpressionSet",
                      url="www.lab.not.exist",
                      other=list(
                        notes="Created from text files"
                        )
                      )
experimentData
#> Experiment data
#>   Experimenter name: Pierre Fermat 
#>   Laboratory: Francis Galton Lab 
#>   Contact information: pfermat@lab.not.exist 
#>   Title: Smoking-Cancer Experiment 
#>   URL: www.lab.not.exist 
#>   PMIDs:  
#> 
#>   Abstract: A 3 word abstract is available. Use 'abstract' method.
#>   notes:
#>    notes:     
#>       Created from text files

1.2 构造ExpressionSet

Code
#help("ExpressionSet-class")
miniSet <- ExpressionSet(assayData=exprs)
miniSet
#> ExpressionSet (storageMode: lockedEnvironment)
#> assayData: 500 features, 26 samples 
#>   element names: exprs 
#> protocolData: none
#> phenoData: none
#> featureData: none
#> experimentData: use 'experimentData(object)'
#> Annotation:

exampleSet <- ExpressionSet(assayData=exprs,
                            phenoData=phenoData,
                            experimentData=experimentData,
                            annotation="hgu95av2")

exampleSet
#> ExpressionSet (storageMode: lockedEnvironment)
#> assayData: 500 features, 26 samples 
#>   element names: exprs 
#> protocolData: none
#> phenoData
#>   sampleNames: A B ... Z (26 total)
#>   varLabels: gender type score
#>   varMetadata: labelDescription
#> featureData: none
#> experimentData: use 'experimentData(object)'
#> Annotation: hgu95av2

1.2.1 访问

Code
exprs(exampleSet) |>  head()
#>                        A         B        C        D        E       F        G
#> AFFX-MurIL2_at  192.7420  85.75330 176.7570 135.5750 64.49390 76.3569 160.5050
#> AFFX-MurIL10_at  97.1370 126.19600  77.9216  93.3713 24.39860 85.5088  98.9086
#> AFFX-MurIL4_at   45.8192   8.83135  33.0632  28.7072  5.94492 28.2925  30.9694
#> AFFX-MurFAS_at   22.5445   3.60093  14.6883  12.3397 36.86630 11.2568  23.0034
#> AFFX-BioB-5_at   96.7875  30.43800  46.1271  70.9319 56.17440 42.6756  86.5156
#> AFFX-BioB-M_at   89.0730  25.84610  57.2033  69.9766 49.58220 26.1262  75.0083
#>                       H       I         J        K       L       M       N
#> AFFX-MurIL2_at  65.9631 56.9039 135.60800 63.44320 78.2126 83.0943 89.3372
#> AFFX-MurIL10_at 81.6932 97.8015  90.48380 70.57330 94.5418 75.3455 68.5827
#> AFFX-MurIL4_at  14.7923 14.2399  34.48740 20.35210 14.1554 20.6251 15.9231
#> AFFX-MurFAS_at  16.2134 12.0375   4.54978  8.51782 27.2852 10.1616 20.2488
#> AFFX-BioB-5_at  30.7927 19.7183  46.35200 39.13260 41.7698 80.2197 36.4903
#> AFFX-BioB-M_at  42.3352 41.1207  91.53070 39.91360 49.8397 63.4794 24.7007
#>                       O       P        Q        R         S       T         U
#> AFFX-MurIL2_at  91.0615 95.9377 179.8450 152.4670 180.83400 85.4146 157.98900
#> AFFX-MurIL10_at 87.4050 84.4581  87.6806 108.0320 134.26300 91.4031  -8.68811
#> AFFX-MurIL4_at  20.1579 27.8139  32.7911  33.5292  19.81720 20.4190  26.87200
#> AFFX-MurFAS_at  15.7849 14.3276  15.9488  14.6753  -7.91911 12.8875  11.91860
#> AFFX-BioB-5_at  36.4021 35.3054  58.6239 114.0620  93.44020 22.5168  48.64620
#> AFFX-BioB-M_at  47.4641 47.3578  58.1331 104.1220 115.83100 58.1224  73.42210
#>                        V       W         X       Y         Z
#> AFFX-MurIL2_at  146.8000 93.8829 103.85500 64.4340 175.61500
#> AFFX-MurIL10_at  85.0212 79.2998  71.65520 64.2369  78.70680
#> AFFX-MurIL4_at   31.1488 22.3420  19.01350 12.1686  17.37800
#> AFFX-MurFAS_at   12.8324 11.1390   7.55564 19.9849   8.96849
#> AFFX-BioB-5_at   90.2215 42.0053  57.57380 44.8216  61.70440
#> AFFX-BioB-M_at   64.6066 40.3068  41.82090 46.1087  49.41220

pData(exampleSet)
#>   gender    type score
#> A Female Control  0.75
#> B   Male    Case  0.40
#> C   Male Control  0.73
#> D   Male    Case  0.42
#> E Female    Case  0.93
#> F   Male Control  0.22
#> G   Male    Case  0.96
#> H   Male    Case  0.79
#> I Female    Case  0.37
#> J   Male Control  0.63
#> K   Male    Case  0.26
#> L Female Control  0.36
#> M   Male    Case  0.41
#> N   Male    Case  0.80
#> O Female    Case  0.10
#> P Female Control  0.41
#> Q Female    Case  0.16
#> R   Male Control  0.72
#> S   Male    Case  0.17
#> T Female    Case  0.74
#> U   Male Control  0.35
#> V Female Control  0.77
#> W   Male Control  0.27
#> X   Male Control  0.98
#> Y Female    Case  0.94
#> Z Female    Case  0.32

fData(exampleSet)
#> data frame with 0 columns and 500 rows
experimentData(exampleSet)
#> Experiment data
#>   Experimenter name: Pierre Fermat 
#>   Laboratory: Francis Galton Lab 
#>   Contact information: pfermat@lab.not.exist 
#>   Title: Smoking-Cancer Experiment 
#>   URL: www.lab.not.exist 
#>   PMIDs:  
#> 
#>   Abstract: A 3 word abstract is available. Use 'abstract' method.
#>   notes:
#>    notes:     
#>       Created from text files

1.2.2 切片

Code
exampleSet$gender[1:5]
#> [1] "Female" "Male"   "Male"   "Male"   "Female"
exampleSet$gender[1:5] == "Female"
#> [1]  TRUE FALSE FALSE FALSE  TRUE

featureNames(exampleSet)[1:5]
#> [1] "AFFX-MurIL2_at"  "AFFX-MurIL10_at" "AFFX-MurIL4_at"  "AFFX-MurFAS_at" 
#> [5] "AFFX-BioB-5_at"
sampleNames(exampleSet)[1:5]
#> [1] "A" "B" "C" "D" "E"
varLabels(exampleSet)
#> [1] "gender" "type"   "score"
exprs(exampleSet)[1:6,1:5]
#>                        A         B        C        D        E
#> AFFX-MurIL2_at  192.7420  85.75330 176.7570 135.5750 64.49390
#> AFFX-MurIL10_at  97.1370 126.19600  77.9216  93.3713 24.39860
#> AFFX-MurIL4_at   45.8192   8.83135  33.0632  28.7072  5.94492
#> AFFX-MurFAS_at   22.5445   3.60093  14.6883  12.3397 36.86630
#> AFFX-BioB-5_at   96.7875  30.43800  46.1271  70.9319 56.17440
#> AFFX-BioB-M_at   89.0730  25.84610  57.2033  69.9766 49.58220