possible_annotations <- builtin_annotations()
# grep to keep those containing "hg19"
hg19_annots <- grep("hg19_genes", possible_annotations, value = T)
# WHY DID WE PICK hg19?
# let's keep 5' utr, cds, intron, 3' utr and intergenic
my_hg19_annots <- hg19_annots[c(3, 4, 7, 10, 11)]
# build the annotation database
annotations <- build_annotations(genome = "hg19", annotations = my_hg19_annots)
annotations
GRanges object with 1446460 ranges and 5 metadata columns:
seqnames ranges strand | id tx_id
<Rle> <IRanges> <Rle> | <character> <character>
[1] chr1 12190-12227 + | CDS:1 uc010nxq.1
[2] chr1 12595-12721 + | CDS:2 uc010nxq.1
[3] chr1 13403-13639 + | CDS:3 uc010nxq.1
[4] chr1 69091-70008 + | CDS:4 uc001aal.1
[5] chr1 324343-324345 + | CDS:5 uc009vjk.2
... ... ... ... . ... ...
[1446456] chrUn_gl000246 1-38154 * | intergenic:17023 <NA>
[1446457] chrUn_gl000247 1-5786 * | intergenic:17024 <NA>
[1446458] chrUn_gl000247 10817-36422 * | intergenic:17025 <NA>
[1446459] chrUn_gl000248 1-39786 * | intergenic:17026 <NA>
[1446460] chrUn_gl000249 1-38502 * | intergenic:17027 <NA>
gene_id symbol type
<character> <character> <character>
[1] 100287102 DDX11L1 hg19_genes_cds
[2] 100287102 DDX11L1 hg19_genes_cds
[3] 100287102 DDX11L1 hg19_genes_cds
[4] 79501 OR4F5 hg19_genes_cds
[5] 100133331 <NA> hg19_genes_cds
... ... ... ...
[1446456] <NA> <NA> hg19_genes_intergenic
[1446457] <NA> <NA> hg19_genes_intergenic
[1446458] <NA> <NA> hg19_genes_intergenic
[1446459] <NA> <NA> hg19_genes_intergenic
[1446460] <NA> <NA> hg19_genes_intergenic
-------
seqinfo: 93 sequences (1 circular) from hg19 genome