seurat
seurat copied to clipboard
CellCycleScoring gives error when subsetting object and then applying SCTransform
I am subsetting cells from a big Seurat object
t.cells.filt = subset(x = all.cells, cells = t.cell.names.to.recluster)
dim(t.cells.filt)
[1] 16116 753
object.list = SplitObject(object = t.cells.filt, split.by = "orig.ident")
object.list = lapply(X = object.list, FUN = SCTransform, vars.to.regress = "percent.mito", method = "glmGamPoi", conserve.memory = TRUE, vst.flavor = "v2", verbose = TRUE)
features = SelectIntegrationFeatures(object.list = object.list, nfeatures = 3000)
object = MergeSeuratObjects(object.list)
VariableFeatures(object = object) = features
object = RunPCA(object, nfeatures.print = 10, verbose = TRUE, reduction.name = "sct.pca")
cell.cycle.genes = readLines(con = "path/to/cell_cycle_genes.txt")
s.features = cell.cycle.genes[1:49]
g2m.features = cell.cycle.genes[50:105]
> s.features
[1] "Mcm5" "Pcna" "Tyms" "Fen1" "Mchm2" "Mcm2" "Mcm7" "Mcm4" "Rrm1" "Ung" "Gins2" "Mcm6" "Cdca7" "Tnfsf13b" "Dtl" "Prim1"
[17] "Uhrf1" "Cenpu" "Hells" "Rfc2" "Polr1b" "Rpa2" "Nasp" "Rad51ap1" "Gmnn" "Wdr76" "Slbp" "Ccne2" "Ubr7" "Pold3" "Msh2" "Atad2"
[33] "Rad51" "Rrm2" "Cdc45" "Cdc6" "Exo1" "Tipin" "Dscc1" "Blm" "Casp8ap2" "Usp1" "Clspn" "Pola1" "Chaf1b" "Mrpl36" "Brip1" "E2f8"
[49] "Hmgb2"
> g2m.features
[1] "Cdk1" "Nusap1" "Ube2c" "Birc5" "Dazl" "Tpx2" "Top2a" "Ndc80" "Cks2" "Nuf2" "Cks1b" "Mki67" "Tmpo" "Cenpf" "Tacc3" "Pimreg" "Smc4" "Ccnb2"
[19] "Ckap2l" "Ckap2" "Aurkb" "Bub1" "Kif11" "Anp32e" "Tubb4b" "Gtse1" "Kif20b" "Hjurp" "Cdca3" "Notch1" "Jpt1" "Cdc20" "Ttk" "Ttbk2" "Cdc25c" "Kif2c"
[37] "Rangap1" "Ncapd2" "Dlgap5" "Cdca2" "Cdca8" "Ect2" "Kif23" "Hmmr" "Aurka" "Psrc1" "Anln" "Lbr" "Ckap5" "Cenpe" "Ctcf" "Nek2" "G2e3" "Gas2l3"
[55] "Cbx5" "Cenpa"
object = CellCycleScoring(object = object, s.features = s.features, g2m.features = g2m.features, set.ident = TRUE)
I get the following error -
Warning: The following features are not present in the object: Mchm2, Ung, Tnfsf13b, Cenpu, Polr1b, Wdr76, Ccne2, Cdc6, Exo1, Blm, Brip1, E2f8, not searching for symbol synonyms
Warning: The following features are not present in the object: Dazl, Nuf2, Pimreg, Bub1, Gtse1, Kif20b, Ttk, Cdc25c, Kif2c, Dlgap5, Ect2, Psrc1, Nek2, Gas2l3, not searching for symbol synonyms
Error in `cut_number()`:
! Insufficient data values to produce 24 bins.
Run `rlang::last_error()` to see where the error occurred.
I looked at #1227 and setting min.cells=1
is irrelevant since I am subsetting an object rather than producing a new one.
Also, most of the cell cycle genes exist within the object, so I do not understand why I am getting this error -
s.features %in% rownames(object[["SCT"]][]) [1] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE [31] TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE g2m.features %in% rownames(object[["SCT"]][]) [1] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE [31] TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE
When trying to use a different subset of cells all works well and when using a third subset of cells I get a different error -
Warning: The following features are not present in the object: Mchm2, Ung, Dtl, Prim1, Cenpu, Polr1b, Rad51ap1, Wdr76, Ccne2, Cdc45, Cdc6, Exo1, Dscc1, Clspn, Chaf1b, Brip1, E2f8, not searching for symbol synonyms
Warning: The following features are not present in the object: Dazl, Ndc80, Nuf2, Pimreg, Ckap2, Bub1, Gtse1, Kif20b, Ttk, Ttbk2, Cdc25c, Kif2c, Ncapd2, Dlgap5, Cdca2, Hmmr, Aurka, Psrc1, Nek2, not searching for symbol synonyms
Error in sample.int(length(x), size, replace, prob) :
cannot take a sample larger than the population when 'replace = FALSE'
All subsets are of the same size (~750 cells)
Any help would be much appreciated
Gil
Hi,
Could you try running the function for CellCycleScoring before you subset out your cells of interest? As the other issues mention, this function does not work well when many of the genes in your objects have very low counts. This might be happening because your subsetted object contains a small number of cells.
I moved the CellCycleScoring step to an early stage after I filter the cells and it works correctly as expected. Then when I subset the cells the values remain. Thanks.