ChIPseeker
ChIPseeker copied to clipboard
bug fix #193
add columns
to annotatePeak() to fix #193
The columns
can help to get the specific columns from database.
From the bug reported from #193, users can not get ENSEMBL even if pass in database
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(EnsDb.Hsapiens.v86)
files <- getSampleFiles()
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
Anno <- annotatePeak(files[[4]], tssRegion=c(-3000, 3000),
TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene,
annoDb="EnsDb.Hsapiens.v86")
> Anno@anno
GRanges object with 1331 ranges and 13 metadata columns:
seqnames ranges strand | V4 V5
<Rle> <IRanges> <Rle> | <character> <numeric>
[1] chr1 815093-817883 * | MACS_peak_1 295.76
[2] chr1 1243288-1244338 * | MACS_peak_2 63.19
[3] chr1 2979977-2981228 * | MACS_peak_3 100.16
[4] chr1 3566182-3567876 * | MACS_peak_4 558.89
[5] chr1 3816546-3818111 * | MACS_peak_5 57.57
... ... ... ... . ... ...
[1327] chrX 135244783-135245821 * | MACS_peak_1327 55.54
[1328] chrX 139171964-139173506 * | MACS_peak_1328 270.19
[1329] chrX 139583954-139586126 * | MACS_peak_1329 918.73
[1330] chrX 139592002-139593238 * | MACS_peak_1330 210.88
[1331] chrY 13845134-13845777 * | MACS_peak_1331 58.39
annotation geneChr geneStart geneEnd geneLength
<character> <integer> <integer> <integer> <integer>
[1] Promoter (2-3kb) 1 803451 812182 8732
[2] Promoter (<=1kb) 1 1243994 1247057 3064
[3] Promoter (<=1kb) 1 2976181 2980350 4170
[4] Promoter (<=1kb) 1 3547331 3566671 19341
[5] Promoter (<=1kb) 1 3816968 3832011 15044
... ... ... ... ... ...
[1327] Intron (uc010nrz.2/2.. 23 135251455 135293518 42064
[1328] Promoter (<=1kb) 23 139173826 139175070 1245
[1329] Promoter (1-2kb) 23 139585152 139587225 2074
[1330] Distal Intergenic 23 139585152 139587225 2074
[1331] Distal Intergenic 24 14517915 14533389 15475
geneStrand geneId transcriptId distanceToTSS SYMBOL GENENAME
<integer> <character> <character> <numeric> <character> <character>
[1] 2 284593 uc001abt.4 -2911 FAM41C FAM41C
[2] 1 126789 uc001aed.3 0 PUSL1 PUSL1
[3] 2 440556 uc001aka.3 0 LINC00982 LINC00982
[4] 2 49856 uc001ako.3 0 WRAP73 WRAP73
[5] 1 100133612 uc001alg.3 0 LINC01134 LINC01134
... ... ... ... ... ... ...
[1327] 1 2273 uc004ezn.2 -5634 FHL1 FHL1
[1328] 1 389895 uc031tkm.1 -320 <NA> <NA>
[1329] 2 6658 uc004fbd.1 1099 SOX3 SOX3
[1330] 2 6658 uc004fbd.1 -4777 SOX3 SOX3
[1331] 2 352887 uc022cji.1 687612 <NA> <NA>
-------
seqinfo: 24 sequences from hg19 genome
after adding columns
, users can get the specific columns from database.
Anno1 <- annotatePeak(files[[4]], tssRegion=c(-3000, 3000),
TxDb=TxDb.Hsapiens.UCSC.hg19.knownGene,
annoDb="EnsDb.Hsapiens.v86",
columns=c("ENTREZID", "GENEID", "SYMBOL", "GENENAME"))
> Anno1@anno
GRanges object with 1331 ranges and 14 metadata columns:
seqnames ranges strand | V4 V5
<Rle> <IRanges> <Rle> | <character> <numeric>
[1] chr1 815093-817883 * | MACS_peak_1 295.76
[2] chr1 1243288-1244338 * | MACS_peak_2 63.19
[3] chr1 2979977-2981228 * | MACS_peak_3 100.16
[4] chr1 3566182-3567876 * | MACS_peak_4 558.89
[5] chr1 3816546-3818111 * | MACS_peak_5 57.57
... ... ... ... . ... ...
[1327] chrX 135244783-135245821 * | MACS_peak_1327 55.54
[1328] chrX 139171964-139173506 * | MACS_peak_1328 270.19
[1329] chrX 139583954-139586126 * | MACS_peak_1329 918.73
[1330] chrX 139592002-139593238 * | MACS_peak_1330 210.88
[1331] chrY 13845134-13845777 * | MACS_peak_1331 58.39
annotation geneChr geneStart geneEnd geneLength
<character> <integer> <integer> <integer> <integer>
[1] Promoter (2-3kb) 1 803451 812182 8732
[2] Promoter (<=1kb) 1 1243994 1247057 3064
[3] Promoter (<=1kb) 1 2976181 2980350 4170
[4] Promoter (<=1kb) 1 3547331 3566671 19341
[5] Promoter (<=1kb) 1 3816968 3832011 15044
... ... ... ... ... ...
[1327] Intron (uc010nrz.2/2.. 23 135251455 135293518 42064
[1328] Promoter (<=1kb) 23 139173826 139175070 1245
[1329] Promoter (1-2kb) 23 139585152 139587225 2074
[1330] Distal Intergenic 23 139585152 139587225 2074
[1331] Distal Intergenic 24 14517915 14533389 15475
geneStrand geneId transcriptId distanceToTSS GENEID
<integer> <character> <character> <numeric> <character>
[1] 2 284593 uc001abt.4 -2911 ENSG00000230368
[2] 1 126789 uc001aed.3 0 ENSG00000169972
[3] 2 440556 uc001aka.3 0 ENSG00000177133
[4] 2 49856 uc001ako.3 0 ENSG00000116213
[5] 1 100133612 uc001alg.3 0 ENSG00000236423
... ... ... ... ... ...
[1327] 1 2273 uc004ezn.2 -5634 ENSG00000022267
[1328] 1 389895 uc031tkm.1 -320 <NA>
[1329] 2 6658 uc004fbd.1 1099 ENSG00000134595
[1330] 2 6658 uc004fbd.1 -4777 ENSG00000134595
[1331] 2 352887 uc022cji.1 687612 <NA>
SYMBOL GENENAME
<character> <character>
[1] FAM41C FAM41C
[2] PUSL1 PUSL1
[3] LINC00982 LINC00982
[4] WRAP73 WRAP73
[5] LINC01134 LINC01134
... ... ...
[1327] FHL1 FHL1
[1328] <NA> <NA>
[1329] SOX3 SOX3
[1330] SOX3 SOX3
[1331] <NA> <NA>
-------
seqinfo: 24 sequences from hg19 genome
here we can see the ENSEMBL id