clusterProfiler
clusterProfiler copied to clipboard
Simplify to work on enricher() result when using GO collection from MSigDB
So I'm trying to run enricher() on both the MSigDB Hallmark set and the C5 (GO) set, by doing the following:
m_t2g <- msigdbr::msigdbr(species = "Homo sapiens", category = c('H')) %>%
dplyr::select(gs_name, entrez_gene) %>% dplyr::distinct(gs_name, entrez_gene)
go_m_t2g <- msigdbr::msigdbr(species = "Homo sapiens", category = c('C5')) %>%
dplyr::select(gs_name, entrez_gene) %>% dplyr::distinct(gs_name, entrez_gene)
m_t2g <- bind_rows(m_t2g, go_m_t2g)
enriched <- enricher(names(enrich_ready), TERM2GENE=m_t2g)
However, this will lead to a lot of redundant GO terms in the enrichResult object. While simplify() can be applied to enrichGO results, it can not be applied to enricher() results when using GO ontology. Is there a possibility of including this functionality?
Alternatively, do you know of any way to filter out the redundancy in the msigdbr C5 (GO) selection beforehand?
So I think I got it working with a workaround:
m_t2g <- msigdbr::msigdbr(species = "Homo sapiens", category = c('H')) %>%
dplyr::select(gs_name, entrez_gene) %>% dplyr::distinct(gs_name, entrez_gene)
go_m_t2g <- msigdbr::msigdbr(species = "Homo sapiens", category = c('C5')) %>% filter(., gs_subcat != 'HPO') %>%
dplyr::select(gs_name, entrez_gene) %>% dplyr::distinct(gs_name, entrez_gene)
m_t2g <- bind_rows(m_t2g, go_m_t2g)
go_jointable <- msigdbr::msigdbr(species = "Homo sapiens", category = c('C5')) %>% filter(., gs_subcat != 'HPO')
then
enriched <- enricher(names(enrich_ready), TERM2GENE=m_t2g)
enricher_result <- enriched@result
enricher_result <- enricher_result %>% mutate(go_jointable[match(enricher_result$ID, go_jointable$gs_name), c('gs_name', 'gs_subcat', 'gs_exact_source')])
hallmark_result <- filter(enricher_result, is.na(gs_name)) %>% select(!c('gs_name', 'gs_subcat', 'gs_exact_source'))
go_result <- filter(enricher_result, !is.na(gs_name))
go_result <- mutate(go_result, ONTOLOGY = str_split(go_result$gs_subcat, ':', simplify = T)[,2])
go_result$ID <- go_result$gs_exact_source
go_result <- go_result %>% select(c("ONTOLOGY","ID","Description","GeneRatio","BgRatio","pvalue","p.adjust","qvalue","geneID","Count" ))
rownames(go_result) <- go_result$ID
go_result_bp <- filter(go_result, ONTOLOGY == 'BP')
go_result_mf <- filter(go_result, ONTOLOGY == 'MF')
go_result_cc <- filter(go_result, ONTOLOGY == 'CC')
enriched@ontology <- 'BP'
enriched@keytype <- "ENTREZID"
enriched@organism <- "Homo sapiens"
enriched@result <- go_result_bp
enriched <- simplify(enriched)
simplified_bp <- enriched@result
enriched@result <- go_result_mf
enriched <- simplify(enriched)
simplified_mf <- enriched@result
enriched@result <- go_result_cc
enriched <- simplify(enriched)
simplified_cc <- enriched@result
total_simplified <- bind_rows(simplified_bp, simplified_mf, simplified_cc) %>% select(!ONTOLOGY) %>% mutate(ID = Description)
rownames(total_simplified) <- total_simplified$ID
total_simplified <- bind_rows(total_simplified, hallmark_result)
enriched@result <- total_simplified
enriched@result <- enriched@result[order(enriched@result$p.adjust),]