datahub icon indicating copy to clipboard operation
datahub copied to clipboard

IMPACT Genes Differ Between Institutional Instances and Panel IDs

Open karissawhiting opened this issue 1 year ago • 0 comments

Included IMPACT genes differ depending on whether you pull panel ID 'MSK-IMPACT***' or 'IMPACT***' and also whether you are connected to public cBioPortal, internal MSK cBioPortal or internal GENIE cBioPortal.

The differences seem mostly attributed to older gene aliases being used in the internal versions. Additionally, this is difficult to reconcile in a systematic way in analyses because gene alias lookup is not bi-directional.

Are there plans to reconcile these? Which is the accepted version? From what I can tell, the external cBioPortal (panel ID 'IMPACT***') seems to use the newer aliases but I'm not sure.

Thanks for your work on cBioPortal!

library(cbioportalR)

# MSK Database -------------------------------------------------------

# Connect to https://cbioportal.mskcc.org/
Sys.setenv(CBIOPORTAL_TOKEN= <your-msk-token>)
set_cbioportal_db("msk")
#> ✔ You are successfully connected!
#> ✔ base_url for this R session is now set to "cbioportal.mskcc.org/api"

internal_msk <- get_gene_panel(panel_id = "MSK-IMPACT410")
internal_no_msk <- get_gene_panel(panel_id = "IMPACT410")

setdiff(internal_msk$hugoGeneSymbol, internal_no_msk$hugoGeneSymbol)
#> [1] "PAK6"
setdiff(internal_no_msk$hugoGeneSymbol, internal_msk$hugoGeneSymbol)
#> [1] "CDKN2AP14ARF"   "CDKN2AP16INK4A" "PAK5"

# These appear to be aliases of each other, but alias API is not bi-directonal (related to another issue filed) so not easy to reconcile
get_alias("PAK5")
#> # A tibble: 2 × 2
#>   hugo_symbol alias
#>   <chr>       <chr>
#> 1 PAK5        PAK5 
#> 2 PAK5        PAK7
get_alias("PAK6")
#> # A tibble: 1 × 2
#>   hugo_symbol alias
#>   <chr>       <chr>
#> 1 PAK6        PAK5


# Public Database -------------------------------------------------------

# Connect to https://www.cbioportal.org/
set_cbioportal_db("public")
#> ✔ You are successfully connected!
#> ✔ base_url for this R session is now set to "www.cbioportal.org/api"

external_no_msk <- get_gene_panel(panel_id = "IMPACT410")

setdiff(external_no_msk$hugoGeneSymbol, internal_no_msk$hugoGeneSymbol)
#>  [1] "H1-2"  "H2BC5" "H3-3A" "H3-3B" "H3-4"  "H3-5"  "H3C1"  "H3C10" "H3C11"
#> [10] "H3C12" "H3C13" "H3C14" "H3C2"  "H3C3"  "H3C4"  "H3C6"  "H3C7"  "H3C8"
setdiff(internal_no_msk$hugoGeneSymbol, external_no_msk$hugoGeneSymbol)
#>  [1] "CDKN2AP14ARF"   "CDKN2AP16INK4A" "H3F3A"          "H3F3B"         
#>  [5] "H3F3C"          "HIST1H1C"       "HIST1H2BD"      "HIST1H3A"      
#>  [9] "HIST1H3B"       "HIST1H3C"       "HIST1H3D"       "HIST1H3E"      
#> [13] "HIST1H3F"       "HIST1H3G"       "HIST1H3H"       "HIST1H3I"      
#> [17] "HIST1H3J"       "HIST2H3C"       "HIST2H3D"       "HIST3H3"

# Genie Database -------------------------------------------------------

# Connect to https://genie.cbioportal.org/
Sys.setenv(CBIOPORTAL_TOKEN= <your-genie-token>)
set_cbioportal_db("genie.cbioportal.org/api")
#> ✔ You are successfully connected!
#> ✔ base_url for this R session is now set to "genie.cbioportal.org/api"

genie_msk <- get_gene_panel(panel_id = "MSK-IMPACT410")

setdiff(genie_msk$hugoGeneSymbol, internal_msk$hugoGeneSymbol)
#> [1] "PAK5"
setdiff(internal_msk$hugoGeneSymbol, genie_msk$hugoGeneSymbol)
#> [1] "PAK6"

setdiff(genie_msk$hugoGeneSymbol, internal_no_msk$hugoGeneSymbol)
#> character(0)
setdiff(internal_no_msk$hugoGeneSymbol, genie_msk$hugoGeneSymbol)
#> [1] "CDKN2AP14ARF"   "CDKN2AP16INK4A"

setdiff(genie_msk$hugoGeneSymbol, external_no_msk$hugoGeneSymbol)
#>  [1] "H3F3A"     "H3F3B"     "H3F3C"     "HIST1H1C"  "HIST1H2BD" "HIST1H3A" 
#>  [7] "HIST1H3B"  "HIST1H3C"  "HIST1H3D"  "HIST1H3E"  "HIST1H3F"  "HIST1H3G" 
#> [13] "HIST1H3H"  "HIST1H3I"  "HIST1H3J"  "HIST2H3C"  "HIST2H3D"  "HIST3H3"
setdiff(external_no_msk$hugoGeneSymbol, genie_msk$hugoGeneSymbol)
#>  [1] "H1-2"  "H2BC5" "H3-3A" "H3-3B" "H3-4"  "H3-5"  "H3C1"  "H3C10" "H3C11"
#> [10] "H3C12" "H3C13" "H3C14" "H3C2"  "H3C3"  "H3C4"  "H3C6"  "H3C7"  "H3C8"

karissawhiting avatar Jun 01 '23 20:06 karissawhiting