clusterProfiler
clusterProfiler copied to clipboard
Add ssGSEA into clusterProfiler
I have used GSVA and GenePattern or GenePattern for ssGSEA, GSVA didn't satisfy all need, it cannot compute the normalized enrichment scores using permutations, and ssGSEA in Gene Pattern isn't convenient in R, I have wrote some code based on GSVA, GenePattern and clusterProfiler, but it's too slow and inconvenient. Could u bring simily function with more efficient in clusterProfiler?
my slowly code is below(much based on clusterProfiler)
### AUR for area.under.RES, KS for Kolmogorov-Smirnov
gsea <- function(gene_list, gene_set, weight, statistic = c("AUR", "KS")){
###################################################################
## geneList ##
## ##
## 1. Rank order the N genes in D to form L = { g_1, ... , g_N} ##
## according to the correlation, r(g_j)=r_j, ##
## of their expression profiles with C. ##
## ##
###################################################################
###################################################################
## exponent ##
## ##
## An weight p to control the weight of the step. ##
## When p = 0, Enrichment Score ( ES(S) ) reduces to ##
## the standard Kolmogorov-Smirnov statistic. ##
## When p = 1, we are weighting the genes in S ##
## by their correlation with C normalized ##
## by the sum of the correlations over all of the genes in S. ##
## ##
###################################################################
## genes defined in gene_set should appear in gene_list
## this is a must, see https://github.com/GuangchuangYu/DOSE/issues/23
gene_set <- intersect(gene_set, names(gene_list))
N <- length(gene_list)
Nh <- length(gene_set)
Phit <- Pmiss <- numeric(N)
hits <- names(gene_list) %in% gene_set ## logical
gene_list <- scale(gene_list, center = TRUE, scale = TRUE)
Phit[hits] <- abs(gene_list[hits])^weight
NR <- sum(Phit)
Phit <- cumsum(Phit/NR)
Pmiss[!hits] <- 1/(N-Nh)
Pmiss <- cumsum(Pmiss)
runningES <- Phit - Pmiss
if (statistic == "KS") {
## ES is the maximum deviation from zero of Phit-Pmiss
max.ES <- max(runningES)
min.ES <- min(runningES)
if( abs(max.ES) > abs(min.ES) ) {
ES <- max.ES
} else {
ES <- min.ES
}
}
if (statistic == "AUR") {
# calculates the area under RES by adding up areas of individual
# triangles + rectangles
miss_auc <- 0.5 * ( Pmiss - c(0, Pmiss[-N]) ) + c(0, Pmiss[-N])
hit_auc <- 0.5 * ( Phit - c(0, Phit[-N]) ) + c(0, Phit[-N])
ES <- sum(hit_auc) - sum(miss_auc)
}
df <- data.frame(x=seq_along(runningES),
runningScore=runningES,
position = hits
)
df$gene = names(gene_list)
res <- list(ES=ES, runningES = df)
return(res)
}
perm_gene_set <- function(gene_list) {
perm.idx <- sample.int(length(gene_list), replace = FALSE)
perm_gene_set <- gene_list
names(perm_gene_set) <- names(gene_list)[perm.idx]
return(perm_gene_set)
}
perm_gsea_es <- function(gene_list, gene_set, weight, statistic) {
gene_list <- perm_gene_set(gene_list)
res <- gsea(
gene_list = gene_list,
gene_set = gene_set,
weight = weight,
statistic = statistic
)$ES
return(res)
}
project_to_geneset <- function(data_matrix, gene_set_list, weight, statistic, NES, nPerm, BPPARAM, verbose){
gene_name <- rownames(data_matrix)
message("running ssGSEA ", "using ", statistic, sep = "")
es_res <- BiocParallel::bplapply(1:ncol(data_matrix), function(j) {
ssgsea_data <- data_matrix[, j, drop = TRUE]
names(ssgsea_data) <- gene_name
ssgsea_data <- sort(ssgsea_data, decreasing = TRUE)
if (verbose) {
message_nes <- ""
if (NES) message_nes <- ", NES and p-values"
message(
"calculating ES", message_nes, " for ",
j, "th sample ", colnames(data_matrix)[j]
)
}
enrich_score <- lapply(gene_set_list, function(gene_set){
es <- gsea(
gene_list = ssgsea_data,
gene_set = gene_set,
weight = weight,
statistic = statistic
)$ES
if (NES) {
perm_scores <- lapply(1:nPerm, function(i) {
perm_gsea_es(
gene_list = ssgsea_data,
gene_set = gene_set,
weight = weight,
statistic = statistic
)
})
perm_scores <- unlist(perm_scores)
pos_m <- mean(perm_scores[perm_scores > 0])
neg_m <- abs(mean(perm_scores[perm_scores < 0]))
if (sign(es) == 1 || sign(es) == 0) m <- pos_m
if (sign(es) == -1) m <- neg_m
NES <- es/m
if( is.na(NES) ) {
pvals <- NA
} else if ( NES >= 0 ) {
pvals <- (sum(perm_scores >= NES) +1) / (sum(perm_scores >= 0)+1)
} else { # NES < 0
pvals <- (sum(perm_scores <= NES) +1) / (sum(perm_scores < 0)+1)
}
return(list(ES = es, NES = NES, P.values = pvals))
}
return(list(ES = es))
})
return(enrich_score)
}, BPPARAM = BPPARAM)
names(es_res) <- colnames(data_matrix)
ES <- lapply(es_res, function(smp) {
sample_es_vector <- lapply(smp, function(gene_set) gene_set$ES)
return(unlist(sample_es_vector, use.names = TRUE))
})
ES <- do.call("cbind", ES)
res <- list(ES = ES)
if (NES) {
NES <- lapply(es_res, function(smp) {
sample_nes_vector <- lapply(smp, function(gene_set) gene_set$NES)
return(unlist(sample_nes_vector, use.names = TRUE))
})
NES <- do.call("cbind", NES)
P.values <- lapply(es_res, function(smp) {
sample_pva_vector <- lapply(smp, function(gene_set) {
return(gene_set$P.values)
})
return(unlist(sample_pva_vector, use.names = TRUE))
})
P.values <- do.call("cbind", P.values)
res$NES <- NES
res$P.values <- P.values
}
return(res)
} # end of project_to_geneset
```
Y叔,习惯用clusterProfiler做富集!希望也能单样本!!
单样本我也希望能够做, @huerqiang
谢谢Y叔,谢谢您 @huerqiang