BioMart
访问Ensembl ID
查找智人(homo sapien)基因组
https://bioconductor.org/packages/release/bioc/vignettes/biomaRt/inst/doc/accessing_ensembl.html#introduction
Code
# BiocManager::install("biomaRt")
library(biomaRt)
listEnsembl()
#> biomart version
#> 1 genes Ensembl Genes 115
#> 2 mouse_strains Mouse strains 115
#> 3 snps Ensembl Variation 115
#> 4 regulation Ensembl Regulation 115
# 连接ENSEMBL数据库 智人基因数据集
ensembl <- useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl")
ensembl
#> Object of class 'Mart':
#> Using the ENSEMBL_MART_ENSEMBL BioMart database
#> Using the hsapiens_gene_ensembl dataset
# 获取基因信息
genes <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol", "chromosome_name",
"start_position", "end_position", "gene_biotype"),
mart = ensembl)
genes |> head() |> DT::datatable()
Code
# 获取转录本信息
transcripts <- getBM(attributes = c("ensembl_transcript_id", "ensembl_gene_id",
"transcript_start", "transcript_end"),
mart = ensembl)
transcripts |> head() |> DT::datatable()
GRCh38(Genome Research Consortium human genome build 38)
Code
# 人类基因BioMart
ensembl <- useEnsembl(biomart = "ensembl",
dataset = "hsapiens_gene_ensembl",
mirror = "asia")
映射
Code
listMarts()
#> biomart version
#> 1 ENSEMBL_MART_ENSEMBL Ensembl Genes 115
#> 2 ENSEMBL_MART_MOUSE Mouse strains 115
#> 3 ENSEMBL_MART_SNP Ensembl Variation 115
#> 4 ENSEMBL_MART_FUNCGEN Ensembl Regulation 115
# 连接到Ensembl BioMart
mart <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl")
listAttributes(mart) |> DT::datatable()
Code
entrez_id <- c("1017", "1018", "1019")
getBM(attributes = c("entrezgene_id", "ensembl_gene_id", "external_gene_name"),
filters = "entrezgene_id",
values = entrez_id,,
mart = mart)
#> entrezgene_id ensembl_gene_id external_gene_name
#> 1 1017 ENSG00000123374 CDK2
#> 2 1018 ENSG00000250506 CDK3
#> 3 1019 ENSG00000135446 CDK4
使用 Ensembl 的存档版本
Code
listEnsemblArchives()
#> name date url version
#> 1 Ensembl GRCh37 Feb 2014 https://grch37.ensembl.org GRCh37
#> 2 Ensembl 115 Sep 2025 https://sep2025.archive.ensembl.org 115
#> 3 Ensembl 114 May 2025 https://may2025.archive.ensembl.org 114
#> 4 Ensembl 113 Oct 2024 https://oct2024.archive.ensembl.org 113
#> 5 Ensembl 112 May 2024 https://may2024.archive.ensembl.org 112
#> 6 Ensembl 111 Jan 2024 https://jan2024.archive.ensembl.org 111
#> 7 Ensembl 110 Jul 2023 https://jul2023.archive.ensembl.org 110
#> 8 Ensembl 109 Feb 2023 https://feb2023.archive.ensembl.org 109
#> 9 Ensembl 108 Oct 2022 https://oct2022.archive.ensembl.org 108
#> 10 Ensembl 107 Jul 2022 https://jul2022.archive.ensembl.org 107
#> 11 Ensembl 106 Apr 2022 https://apr2022.archive.ensembl.org 106
#> 12 Ensembl 105 Dec 2021 https://dec2021.archive.ensembl.org 105
#> 13 Ensembl 104 May 2021 https://may2021.archive.ensembl.org 104
#> 14 Ensembl 103 Feb 2021 https://feb2021.archive.ensembl.org 103
#> 15 Ensembl 102 Nov 2020 https://nov2020.archive.ensembl.org 102
#> 16 Ensembl 101 Aug 2020 https://aug2020.archive.ensembl.org 101
#> 17 Ensembl 100 Apr 2020 https://apr2020.archive.ensembl.org 100
#> 18 Ensembl 80 May 2015 https://may2015.archive.ensembl.org 80
#> 19 Ensembl 77 Oct 2014 https://oct2014.archive.ensembl.org 77
#> 20 Ensembl 75 Feb 2014 https://feb2014.archive.ensembl.org 75
#> 21 Ensembl 54 May 2009 https://may2009.archive.ensembl.org 54
#> current_release
#> 1
#> 2 *
#> 3
#> 4
#> 5
#> 6
#> 7
#> 8
#> 9
#> 10
#> 11
#> 12
#> 13
#> 14
#> 15
#> 16
#> 17
#> 18
#> 19
#> 20
#> 21
listEnsembl(version = 112)
#> biomart version
#> 1 genes Ensembl Genes 112
#> 2 mouse_strains Mouse strains 112
#> 3 snps Ensembl Variation 112
#> 4 regulation Ensembl Regulation 112
ensembl_112 <- useEnsembl(biomart = 'genes',
dataset = 'hsapiens_gene_ensembl',
mirror = "asia",
version = 112)
使用 Ensembl 基因组
Code
listEnsemblGenomes()
#> biomart version
#> 1 protists_mart Ensembl Protists Genes 62
#> 2 protists_variations Ensembl Protists Variations 62
#> 3 fungi_mart Ensembl Fungi Genes 62
#> 4 fungi_variations Ensembl Fungi Variations 62
#> 5 metazoa_mart Ensembl Metazoa Genes 62
#> 6 metazoa_variations Ensembl Metazoa Variations 62
#> 7 plants_mart Ensembl Plants Genes 62
#> 8 plants_variations Ensembl Plants Variations 62
ensembl_plants <- useEnsemblGenomes(biomart = "plants_mart")
searchDatasets(ensembl_plants, pattern = "Arabidopsis")
#> dataset description version
#> 6 ahalleri_eg_gene Arabidopsis halleri genes (Ahal2.2) Ahal2.2
#> 10 alyrata_eg_gene Arabidopsis lyrata genes (v.1.0) v.1.0
#> 15 athaliana_eg_gene Arabidopsis thaliana genes (TAIR10) TAIR10
ensembl_arabidopsis <- useEnsemblGenomes(biomart = "plants_mart",
dataset = "athaliana_eg_gene")