Code
library(Biobase)
conflicts_prefer(GenomicRanges::setdiff)

4.1 组件

4.1.1 Assay data

包含实际的表达数据。通常是一个矩阵,其中行代表基因,列代表样本。数据可以是原始计数值、归一化的表达值等。

Code
dataDirectory <- system.file("extdata", package="Biobase")
exprsFile <- file.path(dataDirectory, "exprsData.txt")

exprs <- as.matrix(read.table(exprsFile, header=TRUE, sep="\t",
                              row.names=1,as.is=TRUE))
class(exprs)
#> [1] "matrix" "array"
dim(exprs)
#> [1] 500  26
colnames(exprs)
#>  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
#> [20] "T" "U" "V" "W" "X" "Y" "Z"
head(exprs[,1:5])
#>                        A         B        C        D        E
#> AFFX-MurIL2_at  192.7420  85.75330 176.7570 135.5750 64.49390
#> AFFX-MurIL10_at  97.1370 126.19600  77.9216  93.3713 24.39860
#> AFFX-MurIL4_at   45.8192   8.83135  33.0632  28.7072  5.94492
#> AFFX-MurFAS_at   22.5445   3.60093  14.6883  12.3397 36.86630
#> AFFX-BioB-5_at   96.7875  30.43800  46.1271  70.9319 56.17440
#> AFFX-BioB-M_at   89.0730  25.84610  57.2033  69.9766 49.58220

4.1.2 样本元数据

包含样本的元数据。这个部分存储有关样本的信息,比如样本的处理条件、时间点等。通常以 AnnotatedDataFrame 的形式存储。

Code
pDataFile <- file.path(dataDirectory, "pData.txt")
pData <- read.table(pDataFile,row.names=1, header=TRUE, sep="\t")

dim(pData)
#> [1] 26  3
colnames(pData)
#> [1] "gender" "type"   "score"
sapply(pData, class)
#>      gender        type       score 
#> "character" "character"   "numeric"

rownames(pData)
#>  [1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S"
#> [20] "T" "U" "V" "W" "X" "Y" "Z"

summary(pData)
#>     gender              type               score       
#>  Length:26          Length:26          Min.   :0.1000  
#>  Class :character   Class :character   1st Qu.:0.3275  
#>  Mode  :character   Mode  :character   Median :0.4150  
#>                                        Mean   :0.5369  
#>                                        3rd Qu.:0.7650  
#>                                        Max.   :0.9800

all(rownames(pData)==colnames(exprs))
#> [1] TRUE

元数据

Code
metadata <- data.frame(labelDescription= c("Patient gender","Case/control status",
                                           "Tumor progress on XYZ scale"),
                       row.names=c("gender", "type", "score"))
metadata
labelDescription
gender Patient gender
type Case/control status
score Tumor progress on XYZ scale

Biobase 提供了AnnotatedDataFrame 类来保存表型数据及其元数据

Code
phenoData <- new("AnnotatedDataFrame",data=pData, varMetadata=metadata)
phenoData
#> An object of class 'AnnotatedDataFrame'
#>   rowNames: A B ... Z (26 total)
#>   varLabels: gender type score
#>   varMetadata: labelDescription

切片

Code
head(pData(phenoData))
gender type score
A Female Control 0.75
B Male Case 0.40
C Male Control 0.73
D Male Case 0.42
E Female Case 0.93
F Male Control 0.22
Code

phenoData[c("A","Z"),"gender"]
#> An object of class 'AnnotatedDataFrame'
#>   rowNames: A Z
#>   varLabels: gender
#>   varMetadata: labelDescription
pData(phenoData[phenoData$score>0.8,])
gender type score
E Female Case 0.93
G Male Case 0.96
X Male Control 0.98
Y Female Case 0.94

4.1.3 基因元数据及其注释

包含基因的元数据。这个部分存储有关基因的信息,比如基因的注释、位置等。通常以 AnnotatedDataFrame 的形式存储。

annotationAnnotationDbi包为元数据包提供了基本的数据操作

Code
annotation <- "hgu95av2"

4.1.4 实验元数据

包含实验的元数据,比如实验设计、数据处理流程等。

MIAME ob ject

Code
experimentData <- new("MIAME",name="Pierre Fermat",lab="Francis Galton Lab",
                      contact="pfermat@lab.not.exist",
                      title="Smoking-Cancer Experiment",
                      abstract="An example ExpressionSet",
                      url="www.lab.not.exist",
                      other=list(
                        notes="Created from text files"
                        )
                      )
experimentData
#> Experiment data
#>   Experimenter name: Pierre Fermat 
#>   Laboratory: Francis Galton Lab 
#>   Contact information: pfermat@lab.not.exist 
#>   Title: Smoking-Cancer Experiment 
#>   URL: www.lab.not.exist 
#>   PMIDs:  
#> 
#>   Abstract: A 3 word abstract is available. Use 'abstract' method.
#>   notes:
#>    notes:     
#>       Created from text files

4.2 构造ExpressionSet

Code
#help("ExpressionSet-class")
miniSet <- ExpressionSet(assayData=exprs)
miniSet
#> ExpressionSet (storageMode: lockedEnvironment)
#> assayData: 500 features, 26 samples 
#>   element names: exprs 
#> protocolData: none
#> phenoData: none
#> featureData: none
#> experimentData: use 'experimentData(object)'
#> Annotation:

exampleSet <- ExpressionSet(assayData=exprs,
                            phenoData=phenoData,
                            experimentData=experimentData,
                            annotation="hgu95av2")

exampleSet
#> ExpressionSet (storageMode: lockedEnvironment)
#> assayData: 500 features, 26 samples 
#>   element names: exprs 
#> protocolData: none
#> phenoData
#>   sampleNames: A B ... Z (26 total)
#>   varLabels: gender type score
#>   varMetadata: labelDescription
#> featureData: none
#> experimentData: use 'experimentData(object)'
#> Annotation: hgu95av2

4.2.1 访问

Code
exprs(exampleSet) %>% head()
#>                        A         B        C        D        E       F        G
#> AFFX-MurIL2_at  192.7420  85.75330 176.7570 135.5750 64.49390 76.3569 160.5050
#> AFFX-MurIL10_at  97.1370 126.19600  77.9216  93.3713 24.39860 85.5088  98.9086
#> AFFX-MurIL4_at   45.8192   8.83135  33.0632  28.7072  5.94492 28.2925  30.9694
#> AFFX-MurFAS_at   22.5445   3.60093  14.6883  12.3397 36.86630 11.2568  23.0034
#> AFFX-BioB-5_at   96.7875  30.43800  46.1271  70.9319 56.17440 42.6756  86.5156
#> AFFX-BioB-M_at   89.0730  25.84610  57.2033  69.9766 49.58220 26.1262  75.0083
#>                       H       I         J        K       L       M       N
#> AFFX-MurIL2_at  65.9631 56.9039 135.60800 63.44320 78.2126 83.0943 89.3372
#> AFFX-MurIL10_at 81.6932 97.8015  90.48380 70.57330 94.5418 75.3455 68.5827
#> AFFX-MurIL4_at  14.7923 14.2399  34.48740 20.35210 14.1554 20.6251 15.9231
#> AFFX-MurFAS_at  16.2134 12.0375   4.54978  8.51782 27.2852 10.1616 20.2488
#> AFFX-BioB-5_at  30.7927 19.7183  46.35200 39.13260 41.7698 80.2197 36.4903
#> AFFX-BioB-M_at  42.3352 41.1207  91.53070 39.91360 49.8397 63.4794 24.7007
#>                       O       P        Q        R         S       T         U
#> AFFX-MurIL2_at  91.0615 95.9377 179.8450 152.4670 180.83400 85.4146 157.98900
#> AFFX-MurIL10_at 87.4050 84.4581  87.6806 108.0320 134.26300 91.4031  -8.68811
#> AFFX-MurIL4_at  20.1579 27.8139  32.7911  33.5292  19.81720 20.4190  26.87200
#> AFFX-MurFAS_at  15.7849 14.3276  15.9488  14.6753  -7.91911 12.8875  11.91860
#> AFFX-BioB-5_at  36.4021 35.3054  58.6239 114.0620  93.44020 22.5168  48.64620
#> AFFX-BioB-M_at  47.4641 47.3578  58.1331 104.1220 115.83100 58.1224  73.42210
#>                        V       W         X       Y         Z
#> AFFX-MurIL2_at  146.8000 93.8829 103.85500 64.4340 175.61500
#> AFFX-MurIL10_at  85.0212 79.2998  71.65520 64.2369  78.70680
#> AFFX-MurIL4_at   31.1488 22.3420  19.01350 12.1686  17.37800
#> AFFX-MurFAS_at   12.8324 11.1390   7.55564 19.9849   8.96849
#> AFFX-BioB-5_at   90.2215 42.0053  57.57380 44.8216  61.70440
#> AFFX-BioB-M_at   64.6066 40.3068  41.82090 46.1087  49.41220

pData(exampleSet)
gender type score
A Female Control 0.75
B Male Case 0.40
C Male Control 0.73
D Male Case 0.42
E Female Case 0.93
F Male Control 0.22
G Male Case 0.96
H Male Case 0.79
I Female Case 0.37
J Male Control 0.63
K Male Case 0.26
L Female Control 0.36
M Male Case 0.41
N Male Case 0.80
O Female Case 0.10
P Female Control 0.41
Q Female Case 0.16
R Male Control 0.72
S Male Case 0.17
T Female Case 0.74
U Male Control 0.35
V Female Control 0.77
W Male Control 0.27
X Male Control 0.98
Y Female Case 0.94
Z Female Case 0.32
Code

fData(exampleSet)
AFFX-MurIL2_at
AFFX-MurIL10_at
AFFX-MurIL4_at
AFFX-MurFAS_at
AFFX-BioB-5_at
AFFX-BioB-M_at
AFFX-BioB-3_at
AFFX-BioC-5_at
AFFX-BioC-3_at
AFFX-BioDn-5_at
AFFX-BioDn-3_at
AFFX-CreX-5_at
AFFX-CreX-3_at
AFFX-BioB-5_st
AFFX-BioB-M_st
AFFX-BioB-3_st
AFFX-BioC-5_st
AFFX-BioC-3_st
AFFX-BioDn-5_st
AFFX-BioDn-3_st
AFFX-CreX-5_st
AFFX-CreX-3_st
AFFX-hum_alu_at
AFFX-DapX-5_at
AFFX-DapX-M_at
AFFX-DapX-3_at
AFFX-LysX-5_at
AFFX-LysX-M_at
AFFX-LysX-3_at
AFFX-PheX-5_at
AFFX-PheX-M_at
AFFX-PheX-3_at
AFFX-ThrX-5_at
AFFX-ThrX-M_at
AFFX-ThrX-3_at
AFFX-TrpnX-5_at
AFFX-TrpnX-M_at
AFFX-TrpnX-3_at
AFFX-HUMISGF3A/M97935_5_at
AFFX-HUMISGF3A/M97935_MA_at
AFFX-HUMISGF3A/M97935_MB_at
AFFX-HUMISGF3A/M97935_3_at
AFFX-HUMRGE/M10098_5_at
AFFX-HUMRGE/M10098_M_at
AFFX-HUMRGE/M10098_3_at
AFFX-HUMGAPDH/M33197_5_at
AFFX-HUMGAPDH/M33197_M_at
AFFX-HUMGAPDH/M33197_3_at
AFFX-HSAC07/X00351_5_at
AFFX-HSAC07/X00351_M_at
AFFX-HSAC07/X00351_3_at
AFFX-HUMTFRR/M11507_5_at
AFFX-HUMTFRR/M11507_M_at
AFFX-HUMTFRR/M11507_3_at
AFFX-M27830_5_at
AFFX-M27830_M_at
AFFX-M27830_3_at
AFFX-HSAC07/X00351_3_st
AFFX-HUMGAPDH/M33197_5_st
AFFX-HUMGAPDH/M33197_M_st
AFFX-HUMGAPDH/M33197_3_st
AFFX-HSAC07/X00351_5_st
AFFX-HSAC07/X00351_M_st
AFFX-YEL002c/WBP1_at
AFFX-YEL018w/_at
AFFX-YEL024w/RIP1_at
AFFX-YEL021w/URA3_at
31307_at
31308_at
31309_r_at
31310_at
31311_at
31312_at
31313_at
31314_at
31315_at
31316_at
31317_r_at
31318_at
31319_at
31320_at
31321_at
31322_at
31323_r_at
31324_at
31325_at
31326_at
31327_at
31328_at
31329_at
31330_at
31331_at
31332_at
31333_at
31334_at
31335_at
31336_at
31337_at
31338_at
31339_at
31340_at
31341_at
31342_at
31343_at
31344_at
31345_at
31346_at
31347_at
31348_at
31349_at
31350_at
31351_at
31352_at
31353_f_at
31354_r_at
31355_at
31356_at
31357_at
31358_at
31359_at
31360_at
31361_at
31362_at
31363_at
31364_i_at
31365_f_at
31366_at
31367_at
31368_at
31369_at
31370_at
31371_at
31372_at
31373_at
31374_at
31375_at
31376_at
31377_r_at
31378_at
31379_at
31380_at
31381_at
31382_f_at
31383_at
31384_at
31385_at
31386_at
31387_at
31388_at
31389_at
31390_at
31391_at
31392_r_at
31393_r_at
31394_at
31395_i_at
31396_r_at
31397_at
31398_at
31399_at
31400_at
31401_r_at
31402_at
31403_at
31404_at
31405_at
31406_at
31407_at
31408_at
31409_at
31410_at
31411_at
31412_at
31413_at
31414_at
31415_at
31416_at
31417_at
31418_at
31419_r_at
31420_at
31421_at
31422_at
31423_at
31424_at
31425_g_at
31426_at
31427_at
31428_at
31429_at
31430_at
31431_at
31432_g_at
31433_at
31434_at
31435_at
31436_s_at
31437_r_at
31438_s_at
31439_f_at
31440_at
31441_at
31442_at
31443_at
31444_s_at
31445_at
31446_s_at
31447_at
31448_s_at
31449_at
31450_s_at
31451_at
31452_at
31453_s_at
31454_f_at
31455_r_at
31456_at
31457_at
31458_at
31459_i_at
31460_f_at
31461_at
31462_f_at
31463_s_at
31464_at
31465_g_at
31466_at
31467_at
31468_f_at
31469_s_at
31470_at
31471_at
31472_s_at
31473_s_at
31474_r_at
31475_at
31476_g_at
31477_at
31478_at
31479_f_at
31480_f_at
31481_s_at
31482_at
31483_g_at
31484_at
31485_at
31486_s_at
31487_at
31488_s_at
31489_at
31490_at
31491_s_at
31492_at
31493_s_at
31494_at
31495_at
31496_g_at
31497_at
31498_f_at
31499_s_at
31500_at
31501_at
31502_at
31503_at
31504_at
31505_at
31506_s_at
31507_at
31508_at
31509_at
31510_s_at
31511_at
31512_at
31513_at
31514_at
31515_at
31516_f_at
31517_f_at
31518_i_at
31519_f_at
31520_at
31521_f_at
31522_f_at
31523_f_at
31524_f_at
31525_s_at
31526_f_at
31527_at
31528_f_at
31529_at
31530_at
31531_g_at
31532_at
31533_s_at
31534_at
31535_i_at
31536_at
31537_at
31538_at
31539_r_at
31540_at
31541_at
31542_at
31543_at
31544_at
31545_at
31546_at
31547_at
31548_at
31549_at
31550_at
31551_at
31552_at
31553_at
31554_at
31555_at
31556_at
31557_at
31558_at
31559_at
31560_at
31561_at
31562_at
31563_at
31564_at
31565_at
31566_at
31567_at
31568_at
31569_at
31570_at
31571_at
31572_at
31573_at
31574_i_at
31575_f_at
31576_at
31577_at
31578_at
31579_at
31580_at
31581_at
31582_at
31583_at
31584_at
31585_at
31586_f_at
31587_at
31588_at
31589_at
31590_g_at
31591_s_at
31592_at
31593_at
31594_at
31595_at
31596_f_at
31597_r_at
31598_s_at
31599_f_at
31600_s_at
31601_s_at
31602_at
31603_at
31604_at
31605_at
31606_at
31607_at
31608_g_at
31609_s_at
31610_at
31611_s_at
31612_at
31613_at
31614_at
31615_i_at
31616_r_at
31617_at
31618_at
31619_at
31620_at
31621_s_at
31622_f_at
31623_f_at
31624_at
31625_at
31626_i_at
31627_f_at
31628_at
31629_at
31630_at
31631_f_at
31632_at
31633_g_at
31634_at
31635_g_at
31636_s_at
31637_s_at
31638_at
31639_f_at
31640_r_at
31641_s_at
31642_at
31643_at
31644_at
31645_at
31646_at
31647_at
31648_at
31649_at
31650_g_at
31651_at
31652_at
31653_at
31654_at
31655_at
31656_at
31657_at
31658_at
31659_at
31660_at
31661_at
31662_at
31663_at
31664_at
31665_s_at
31666_f_at
31667_r_at
31668_f_at
31669_s_at
31670_s_at
31671_at
31672_g_at
31673_s_at
31674_s_at
31675_s_at
31676_at
31677_at
31678_at
31679_at
31680_at
31681_at
31682_s_at
31683_at
31684_at
31685_at
31686_at
31687_f_at
31688_at
31689_at
31690_at
31691_g_at
31692_at
31693_f_at
31694_at
31695_g_at
31696_at
31697_s_at
31698_at
31699_at
31700_at
31701_r_at
31702_at
31703_at
31704_at
31705_at
31706_at
31707_at
31708_at
31709_at
31710_at
31711_at
31712_at
31713_s_at
31714_at
31715_at
31716_at
31717_at
31718_at
31719_at
31720_s_at
31721_at
31722_at
31723_at
31724_at
31725_s_at
31726_at
31727_at
31728_at
31729_at
31730_at
31731_at
31732_at
31733_at
31734_at
31735_at
31736_at
31737_at
31738_at
31739_at
Code
experimentData(exampleSet)
#> Experiment data
#>   Experimenter name: Pierre Fermat 
#>   Laboratory: Francis Galton Lab 
#>   Contact information: pfermat@lab.not.exist 
#>   Title: Smoking-Cancer Experiment 
#>   URL: www.lab.not.exist 
#>   PMIDs:  
#> 
#>   Abstract: A 3 word abstract is available. Use 'abstract' method.
#>   notes:
#>    notes:     
#>       Created from text files

4.2.2 切片

Code
exampleSet$gender[1:5]
#> [1] "Female" "Male"   "Male"   "Male"   "Female"
exampleSet$gender[1:5] == "Female"
#> [1]  TRUE FALSE FALSE FALSE  TRUE

featureNames(exampleSet)[1:5]
#> [1] "AFFX-MurIL2_at"  "AFFX-MurIL10_at" "AFFX-MurIL4_at"  "AFFX-MurFAS_at" 
#> [5] "AFFX-BioB-5_at"
sampleNames(exampleSet)[1:5]
#> [1] "A" "B" "C" "D" "E"
varLabels(exampleSet)
#> [1] "gender" "type"   "score"
exprs(exampleSet)[1:6,1:5]
#>                        A         B        C        D        E
#> AFFX-MurIL2_at  192.7420  85.75330 176.7570 135.5750 64.49390
#> AFFX-MurIL10_at  97.1370 126.19600  77.9216  93.3713 24.39860
#> AFFX-MurIL4_at   45.8192   8.83135  33.0632  28.7072  5.94492
#> AFFX-MurFAS_at   22.5445   3.60093  14.6883  12.3397 36.86630
#> AFFX-BioB-5_at   96.7875  30.43800  46.1271  70.9319 56.17440
#> AFFX-BioB-M_at   89.0730  25.84610  57.2033  69.9766 49.58220