datahub
datahub copied to clipboard
IMPACT Genes Differ Between Institutional Instances and Panel IDs
Included IMPACT genes differ depending on whether you pull panel ID 'MSK-IMPACT***'
or 'IMPACT***'
and also whether you are connected to public cBioPortal, internal MSK cBioPortal or internal GENIE cBioPortal.
The differences seem mostly attributed to older gene aliases being used in the internal versions. Additionally, this is difficult to reconcile in a systematic way in analyses because gene alias lookup is not bi-directional.
Are there plans to reconcile these? Which is the accepted version? From what I can tell, the external cBioPortal (panel ID 'IMPACT***'
) seems to use the newer aliases but I'm not sure.
Thanks for your work on cBioPortal!
library(cbioportalR)
# MSK Database -------------------------------------------------------
# Connect to https://cbioportal.mskcc.org/
Sys.setenv(CBIOPORTAL_TOKEN= <your-msk-token>)
set_cbioportal_db("msk")
#> ✔ You are successfully connected!
#> ✔ base_url for this R session is now set to "cbioportal.mskcc.org/api"
internal_msk <- get_gene_panel(panel_id = "MSK-IMPACT410")
internal_no_msk <- get_gene_panel(panel_id = "IMPACT410")
setdiff(internal_msk$hugoGeneSymbol, internal_no_msk$hugoGeneSymbol)
#> [1] "PAK6"
setdiff(internal_no_msk$hugoGeneSymbol, internal_msk$hugoGeneSymbol)
#> [1] "CDKN2AP14ARF" "CDKN2AP16INK4A" "PAK5"
# These appear to be aliases of each other, but alias API is not bi-directonal (related to another issue filed) so not easy to reconcile
get_alias("PAK5")
#> # A tibble: 2 × 2
#> hugo_symbol alias
#> <chr> <chr>
#> 1 PAK5 PAK5
#> 2 PAK5 PAK7
get_alias("PAK6")
#> # A tibble: 1 × 2
#> hugo_symbol alias
#> <chr> <chr>
#> 1 PAK6 PAK5
# Public Database -------------------------------------------------------
# Connect to https://www.cbioportal.org/
set_cbioportal_db("public")
#> ✔ You are successfully connected!
#> ✔ base_url for this R session is now set to "www.cbioportal.org/api"
external_no_msk <- get_gene_panel(panel_id = "IMPACT410")
setdiff(external_no_msk$hugoGeneSymbol, internal_no_msk$hugoGeneSymbol)
#> [1] "H1-2" "H2BC5" "H3-3A" "H3-3B" "H3-4" "H3-5" "H3C1" "H3C10" "H3C11"
#> [10] "H3C12" "H3C13" "H3C14" "H3C2" "H3C3" "H3C4" "H3C6" "H3C7" "H3C8"
setdiff(internal_no_msk$hugoGeneSymbol, external_no_msk$hugoGeneSymbol)
#> [1] "CDKN2AP14ARF" "CDKN2AP16INK4A" "H3F3A" "H3F3B"
#> [5] "H3F3C" "HIST1H1C" "HIST1H2BD" "HIST1H3A"
#> [9] "HIST1H3B" "HIST1H3C" "HIST1H3D" "HIST1H3E"
#> [13] "HIST1H3F" "HIST1H3G" "HIST1H3H" "HIST1H3I"
#> [17] "HIST1H3J" "HIST2H3C" "HIST2H3D" "HIST3H3"
# Genie Database -------------------------------------------------------
# Connect to https://genie.cbioportal.org/
Sys.setenv(CBIOPORTAL_TOKEN= <your-genie-token>)
set_cbioportal_db("genie.cbioportal.org/api")
#> ✔ You are successfully connected!
#> ✔ base_url for this R session is now set to "genie.cbioportal.org/api"
genie_msk <- get_gene_panel(panel_id = "MSK-IMPACT410")
setdiff(genie_msk$hugoGeneSymbol, internal_msk$hugoGeneSymbol)
#> [1] "PAK5"
setdiff(internal_msk$hugoGeneSymbol, genie_msk$hugoGeneSymbol)
#> [1] "PAK6"
setdiff(genie_msk$hugoGeneSymbol, internal_no_msk$hugoGeneSymbol)
#> character(0)
setdiff(internal_no_msk$hugoGeneSymbol, genie_msk$hugoGeneSymbol)
#> [1] "CDKN2AP14ARF" "CDKN2AP16INK4A"
setdiff(genie_msk$hugoGeneSymbol, external_no_msk$hugoGeneSymbol)
#> [1] "H3F3A" "H3F3B" "H3F3C" "HIST1H1C" "HIST1H2BD" "HIST1H3A"
#> [7] "HIST1H3B" "HIST1H3C" "HIST1H3D" "HIST1H3E" "HIST1H3F" "HIST1H3G"
#> [13] "HIST1H3H" "HIST1H3I" "HIST1H3J" "HIST2H3C" "HIST2H3D" "HIST3H3"
setdiff(external_no_msk$hugoGeneSymbol, genie_msk$hugoGeneSymbol)
#> [1] "H1-2" "H2BC5" "H3-3A" "H3-3B" "H3-4" "H3-5" "H3C1" "H3C10" "H3C11"
#> [10] "H3C12" "H3C13" "H3C14" "H3C2" "H3C3" "H3C4" "H3C6" "H3C7" "H3C8"