Error: colours encodes as numbers must be positive
I am new to computational work. I keep running into this specific error when trying to use any of the plot functions, such as PlotDMEsVolcano: Error: colours encodes as numbers must be positive. However, each of my modules are named after positive numbers and therefore their assigned color numbers are also positive. I have followed the vignettes exactly with the only changes being that I used my own dataset. I had a look online and found an answer that said "The issue is that you did not map on aesthetics but instead pass vectors to arguments. When doing so you have to pass color names or codes or a positive number to the color argument". Not sure if this is what the actual issue is and if so, then the function would need to be altered? Is there any advice that you would suggest as mapping to aesthetics didn't solve the error?
Can you please provide the code that you ran?
Yes of course! This is the entirety of my code up until the plot:
single-cell analysis package
library(Seurat)
plotting and data science packages
library(tidyverse) library(cowplot) library(patchwork)
co-expression network analysis packages:
library(WGCNA) library(hdWGCNA)
using the cowplot theme for ggplot
theme_set(theme_cowplot())
set random seed for reproducibility
set.seed(12345)
#load data Smith_microglia <- readRDS("~/WGCNA_Smith/microglia_cluster.rds")
library(readr) ##filter for protein coding genes background <- read_tsv("~/scFlow_Workshop/refs/ensembl_mappings.tsv") background_genes <- background$external_gene_name[which(background$gene_biotype=='protein_coding')]
Smith_microglia <- Smith_microglia[which(rownames(Smith_microglia) %in% background_genes),]
#Find variable genes in Smith data ######################################################################################### Smith_microglia <- NormalizeData(Smith_microglia) #Check percentage mitochondrial genes Smith_microglia[["percent.mt"]] <- PercentageFeatureSet(Smith_microglia, pattern = "^MT-") #Set cell identities of the tsne projection Idents(Smith_microglia) <- Smith_microglia$cluster_celltype
Smith_microglia <- FindVariableFeatures(Smith_microglia, selection.method = "vst", nfeatures = 2000) genes.keep <- VariableFeatures(Smith_microglia)
#seuratObj2 <- subset(Smith_microglia, subset = genes.keep) ???
#subset.seurat <- Smith_microglia@assays$originalexp@data[genes.keep, ]
#subset.seurat <- as.data.frame(GetAssayData(Smith_microglia, assay='originalexp', slot='data')[genes.keep,]) #subset.seurat <- CreateSeuratObject(subset.seurat, #metadata = metadata) #metadata <- [email protected]
#setup seurat object microglia <- SetupForWGCNA( Smith_microglia, gene_select = "variable", # the gene selection approach wgcna_name = "microglia" # the name of the hdWGCNA experiment )
construct metacells in each group
microglia <- MetacellsByGroups( seurat_obj = microglia, group.by = c("individual", "cluster_celltype"), # specify the columns in [email protected] to group by k = 40, # nearest-neighbors parameter max_shared = 10, # maximum number of shared cells between two metacells ident.group = 'cluster_celltype', reduction = "tSNE_Liger"# set the Idents of the metacell seurat object )
normalize metacell expression matrix:
microglia <- NormalizeMetacells(microglia)
#Optional: Process the metacell seurat object #metacell_obj <- GetMetacellObject(microglia)
#metacell_obj <- NormalizeMetacells(metacell_obj) #metacell_obj <- ScaleMetacells(metacell_obj, features=VariableFeatures(metacell_obj)) #metacell_obj <- RunPCAMetacells(metacell_obj, features=VariableFeatures(metacell_obj)) #metacell_obj <- RunHarmonyMetacells(metacell_obj, group.by.vars='Sample') #metacell_obj <- RunUMAPMetacells(metacell_obj, reduction='harmony', dims=1:15)
#p1 <- DimPlotMetacells(seurat_obj, group.by='cell_type') + umap_theme() + ggtitle("Cell Type") #p2 <- DimPlotMetacells(seurat_obj, group.by='Sample') + umap_theme() + ggtitle("Sample")
#p1 | p2
#co-expression network analysis
microglia <- SetDatExpr( microglia, group_name = "Micro", # the name of the group of interest in the group.by column group.by='cluster_celltype', # the metadata column containing the cell type info. This same column should have also been used in MetacellsByGroups assay = 'originalexp', # using RNA assay slot = 'data' # using normalized data )
#Test Soft Powers
Test different soft powers:
microglia<- TestSoftPowers( microglia, networkType = 'signed' # you can also use "unsigned" or "signed hybrid" )
plot the results:
plot_list <- PlotSoftPowers(microglia)
assemble with patchwork
wrap_plots(plot_list, ncol=2)
#power table is stored in seurat object and can be accessed at any time. power_table <- GetPowerTable(microglia) head(power_table)
#Construct co-expression network
construct co-expression network:
microglia <- ConstructNetwork( microglia, soft_power=6, setDatExpr=FALSE, tom_name = 'Micro', randomSeed = 12345, corType = "pearson", deepSplit=2, TOMType = "signed", minModuleSize = 10, reassignThreshold = 0, mergeCutHeight = 0.4, detectCutHeight = 0.995, numericLabels = TRUE, pamRespectsDendro = FALSE, saveTOMs = FALSE, verbose = 3, networkType="signed")# name of the topoligical overlap matrix written to disk
PlotDendrogram(microglia, main='Microglia hdWGCNA Dendrogram')
#optional to inspect TOM #TOM <- GetTOM(seurat_obj)
#Module Eigengenes and Connectivity #Harmony batch correction is applied
need to run ScaleData first or else harmony throws an error:
microglia <- ScaleData(microglia, features=VariableFeatures(microglia))
compute all MEs in the full single-cell dataset
microglia <- ModuleEigengenes( microglia, group.by.vars="individual" )
#Stores MEs as a matrix cell x module.
harmonized module eigengenes:
hMEs <- GetMEs(microglia)
module eigengenes:
MEs <- GetMEs(microglia, harmonized=FALSE)
#Compute module connectivity
often want to focus on the “hub genes”, those which are highly connected within each module.
#To determine the eigengene-based connectivity, also known as kME, of each gene. hdWGCNA includes the ModuleConnectivity to compute the kME values in the full single-cell dataset, rather than the metacell dataset. #This function essentially computes pairwise correlations between genes and module eigengenes
compute eigengene-based connectivity (kME):
microglia <- ModuleConnectivity( microglia, group.by = 'cluster_celltype', group_name = 'Micro' )
rename the modules
Also option to change colours of modules
microglia <- ResetModuleNames( microglia, new_name = "Micro-M" )
plot genes ranked by kME for each module
p <- PlotKMEs(microglia, ncol=5)
p
#Get module assignment table
get the module assignment table:
modules <- GetModules(microglia)
show the first 6 columns:
head(modules[,1:6])
get hub genes
#sorted by kME value hub_df <- GetHubGenes(microglia, n_hubs = 10)
head(hub_df)
#Extract top hub of each module hub <- hub_df %>% group_by(module) %>% filter(kME == max(kME)) %>% ungroup()
#save progress saveRDS(microglia, file='hdWGCNA_object.rds')
#Compute hub gene signature scores #Gene scoring analysis is a popular method in single-cell transcriptomics for computing a score for the overall signature of a set of genes. #Seurat implements their own gene scoring technique using the AddModuleScore function, but there are also alternative approaches such as UCell. #hdWGCNA includes the function ModuleExprScore to compute gene scores for a give number of genes for each module, using either the Seurat or UCell algorithm. #Gene scoring is an alternative way of summarizing the expression of a module from computing the module eigengene.
compute gene scoring for the top 25 hub genes by kME for each module
with Seurat method
microglia <- ModuleExprScore( microglia, n_genes = 25, method='Seurat' )
compute gene scoring for the top 25 hub genes by kME for each module
with UCell method
library(UCell) microglia <- ModuleExprScore( microglia, n_genes = 25, method='UCell' )
#Basic vsualisations
make a featureplot of hMEs for each module
plot_list1 <- ModuleFeaturePlot( microglia, features='hMEs', # plot the hMEs order=TRUE, reduction = "tSNE_Liger"# order so the points with highest hMEs are on top )
stitch together with patchwork
wrap_plots(plot_list1, ncol=6)
make a featureplot of hub scores for each module
plot_list2 <- ModuleFeaturePlot( microglia, features='scores', # plot the hub gene scores order='shuffle', # order so cells are shuffled ucell = TRUE, reduction = "tSNE_Liger"# depending on Seurat vs UCell for gene scoring )
stitch together with patchwork
wrap_plots(plot_list2, ncol=6)
#Module Correlations #hdWGCNA includes the ModuleCorrelogram function to visualize the correlation between each module based on their hMEs, MEs, or hub gene scores using the R package corrplot.
plot module correlagram
ModuleCorrelogram(microglia)
#Seurat plotting functions!!! #The base Seurat plotting functions are also great for visualizing hdWGCNA outputs. Here we demonstrate plotting hMEs using DotPlot and VlnPlot. The key to using Seurat’s plotting functions to visualize the hdWGCNA data is to add it into the Seurat object’s @meta.data slot:
get hMEs from seurat object
MEs <- GetMEs(microglia, harmonized=TRUE) mods <- colnames(MEs); mods <- mods[mods != 'grey']
add hMEs to Seurat meta-data:
[email protected] <- cbind([email protected], MEs)
#Seurat's Dotplot function
plot with Seurat's DotPlot function
dotp <- DotPlot(microglia, features=mods, group.by = 'cluster_celltype')
flip the x/y axes, rotate the axis labels, and change color scheme:
dotp <- dotp +
coord_flip() +
RotatedAxis() + scale_color_gradient2(high='red', mid='grey95', low='blue') +
plot output
dotp
Plot INH-M4 hME using Seurat VlnPlot function
p <- VlnPlot( microglia, features = '1', group.by = 'cluster_celltype', pt.size = 0 # don't show actual data points )
add box-and-whisker plots on top:
p <- p + geom_boxplot(width=.25, fill='white')
change axis labels and remove legend:
p <- p + xlab('') + ylab('hME') + NoLegend()
plot output
p
######################################################################################### #Network Visualisation
Individual module networks
ModuleNetworkPlot(microglia)
Error in col2rgb(col, alpha = TRUE) : numerical color values must be positive
################################################################################# #DME Analysis #split by diagnosis
#DONT #Add 0 and 1 for diagnosis [email protected] <- [email protected] %>% mutate_all(., as.character) %>% mutate(AD = diagnosis, Control = diagnosis, Male = sex, Female = sex) %>% mutate_at(vars("AD"), ~ ifelse(. == "AD", 1, 0)) %>% mutate_at(vars("Control"), ~ ifelse(. == "Control", 1, 0)) %>% mutate_at(vars("Male"), ~ ifelse(. == "M", 1, 0)) %>% mutate_at(vars("Female"), ~ ifelse(. == "F", 1, 0)) %>% dplyr::select(-c(diagnosis, sex, orig.ident, barcode, manifest, sequence_id, individual, brain_region, diagnosis_brain_region, individual_brain_region, is_empty_drop, is_singlet, cluster_celltype, Allan2019_ML1, Allan2019_ML2, Zeisel2018_ML4, Zeisel2018_ML5, allan_celltype, allan_layer, allan_cluster_gene_1, allan_cluster_2, cluster_celltype_pipeline, metacell_grouping)) %>% mutate_all(as.numeric)
group1 <- [email protected] %>% subset(AD == 1) %>% rownames group2 <- [email protected] %>% subset(Control == 1) %>% rownames
head(group1)
DMEs <- FindDMEs( microglia, barcodes1 = group1, barcodes2 = group2, test.use='wilcox', wgcna_name='microglia' )
head(DMEs)
PlotDMEsVolcano( microglia, DMEs, wgcna_name = 'microglia' )
Error: colours encodes as numbers must be positive