seurat ScaleData " 'qr' and 'y' must have the same number of rows"

I get the following error in trying to scale my data, but I do not know why since I have copied this code from another R workbook where it did work for me:

Code:

Removing individual object data

h6_RQC h9_RQC <- DietSeurat(h9_RQC) h9crispr_RQC <- DietSeurat(h9crispr_RQC) h9het1_RQC <- DietSeurat(h9het1_RQC) h9het2_RQC <- DietSeurat(h9het2_RQC)

h6_RQC[['integrated']] <- NULL h6crispr_RQC[['integrated']] <- NULL h6het1_RQC[['integrated']] <- NULL h6het2_RQC[['integrated']] <- NULL h9_RQC[['integrated']] <- NULL h9crispr_RQC[['integrated']] <- NULL h9het1_RQC[['integrated']] <- NULL h9het2_RQC[['integrated']] <- NULL

h6_RQC[['SCT']] <- NULL h6crispr_RQC[['SCT']] <- NULL h6het1_RQC[['SCT']] <- NULL h6het2_RQC[['SCT']] <- NULL h9_RQC[['SCT']] <- NULL h9crispr_RQC[['SCT']] <- NULL h9het1_RQC[['SCT']] <- NULL h9het2_RQC[['SCT']] <- NULL

empty_matrix <- sparseMatrix(dims = c(nrow(h6_RQC),ncol(h6_RQC)), i={}, j={}) empty_matrix <- as(empty_matrix, "dgCMatrix") dimnames(empty_matrix) <- dimnames(h6_RQC) h6_RQC <- SetAssayData(h6_RQC, slot = "data", new.data = empty_matrix)

empty_matrix <- sparseMatrix(dims = c(nrow(h6crispr_RQC),ncol(h6crispr_RQC)), i={}, j={}) empty_matrix <- as(empty_matrix, "dgCMatrix") dimnames(empty_matrix) <- dimnames(h6crispr_RQC) h6crispr_RQC <- SetAssayData(h6crispr_RQC, slot = "data", new.data = empty_matrix)

empty_matrix <- sparseMatrix(dims = c(nrow(h6het1_RQC),ncol(h6het1_RQC)), i={}, j={}) empty_matrix <- as(empty_matrix, "dgCMatrix") dimnames(empty_matrix) <- dimnames(h6het1_RQC) h6het1_RQC <- SetAssayData(h6het1_RQC, slot = "data", new.data = empty_matrix)

empty_matrix <- sparseMatrix(dims = c(nrow(h6het2_RQC),ncol(h6het2_RQC)), i={}, j={}) empty_matrix <- as(empty_matrix, "dgCMatrix") dimnames(empty_matrix) <- dimnames(h6het2_RQC) h6het2_RQC <- SetAssayData(h6het2_RQC, slot = "data", new.data = empty_matrix)

empty_matrix <- sparseMatrix(dims = c(nrow(h9_RQC),ncol(h9_RQC)), i={}, j={}) empty_matrix <- as(empty_matrix, "dgCMatrix") dimnames(empty_matrix) <- dimnames(h9_RQC) h9_RQC <- SetAssayData(h9_RQC, slot = "data", new.data = empty_matrix)

empty_matrix <- sparseMatrix(dims = c(nrow(h9crispr_RQC),ncol(h9crispr_RQC)), i={}, j={}) empty_matrix <- as(empty_matrix, "dgCMatrix") dimnames(empty_matrix) <- dimnames(h9crispr_RQC) h9crispr_RQC <- SetAssayData(h9crispr_RQC, slot = "data", new.data = empty_matrix)

empty_matrix <- sparseMatrix(dims = c(nrow(h9het1_RQC),ncol(h9het1_RQC)), i={}, j={}) empty_matrix <- as(empty_matrix, "dgCMatrix") dimnames(empty_matrix) <- dimnames(h9het1_RQC) h9het1_RQC <- SetAssayData(h9het1_RQC, slot = "data", new.data = empty_matrix)

empty_matrix <- sparseMatrix(dims = c(nrow(h9het2_RQC),ncol(h9het2_RQC)), i={}, j={}) empty_matrix <- as(empty_matrix, "dgCMatrix") dimnames(empty_matrix) <- dimnames(h9het2_RQC) h9het2_RQC <- SetAssayData(h9het2_RQC, slot = "data", new.data = empty_matrix)

Creating new integrated data:

sample.list <- list(h6_RQC,
                    h6crispr_RQC,
                    h6het1_RQC,
                    h6het2_RQC,
                    h9_RQC,
                    h9crispr_RQC,
                    h9het1_RQC,
                    h9het2_RQC)

sample.list <- lapply(X = sample.list, FUN = SCTransform, vars.to.regress = "percent.mt")
features <- SelectIntegrationFeatures(object.list = sample.list, nfeatures = 5000)
sample.list <- PrepSCTIntegration(object.list = sample.list, anchor.features = features)
sample.list <- lapply(X = sample.list, FUN = RunPCA, features = features, npcs = 100)

anchors_trial <- FindIntegrationAnchors(object.list = sample.list, reference = c(1, 5), scale = FALSE, normalization.method = "SCT", reduction = "rpca", dims = 1:100, anchor.features = features, k.anchor = 25)

RNA_Integ <- IntegrateData(anchorset = anchors_trial, normalization.method = "SCT", dims = 1:100)
RNA_Integ <- RunPCA(RNA_Integ, npcs = 100)
RNA_Integ <- RunUMAP(RNA_Integ, dims = 1:75)

DefaultAssay(object = RNA_Integ) # make sure it is integrated not SCT or RNA
RNA_Integ <- FindNeighbors(RNA_Integ, dims = 1:75)#, n.trees = 100, k.param = 25) #uncomment for finer clustering
RNA_Integ <- FindClusters(RNA_Integ, method = "igraph", random.seed = 10)#,  resolution = 2

DefaultAssay(RNA_Integ) <- "RNA"
RNA_Integ <- NormalizeData(RNA_Integ)
RNA_Integ <- FindVariableFeatures(RNA_Integ, nfeatures = 5000)
all.genes <- rownames(RNA_Integ)
RNA_Integ <- ScaleData(RNA_Integ, features = all.genes, vars.to.regress = c("percent.mt", "Cell_Line"), scale.max = 100, block.size = 2000)

On scale data, I get the error:

Regressing out percent.mt, Cell_Line
  |                                                                                                                                       |   0%Error in qr.resid(qr = qr, y = data.expr[x, ]) : 
  'qr' and 'y' must have the same number of rows

I saw Issue #4552 and

I have no na or null counts

> sum(is.null(RNA_Integ$nCount_RNA))
[1] 0
> sum(is.na(RNA_Integ$nCount_RNA))
[1] 0
> sum(is.null(RNA_Integ$nFeature_RNA))
[1] 0
> sum(is.na(RNA_Integ$nFeature_RNA))
[1] 0

the issue comes up regardless of what metadata I try to regress with

Traceback

Error in qr.resid(qr = qr, y = data.expr[x, ]) : 'qr' and 'y' must have the same number of rows
11.
stop("'qr' and 'y' must have the same number of rows")
10.
qr.resid(qr = qr, y = data.expr[x, ])
9.
RegressOutMatrix(data.expr = object[, split.cells[[x]], drop = FALSE], latent.data = latent.data[split.cells[[x]], , drop = FALSE], features.regress = features, model.use = model.use, use.umi = use.umi, verbose = verbose)
8.
FUN(X[[i]], ...)
7.
lapply(X = names(x = split.cells), FUN = function(x) { if (verbose && length(x = split.cells) > 1) { message("Regressing out variables from split ", x) } ...
6.
ScaleData.default(object = ldata, features = features, vars.to.regress = vars.to.regress, latent.data = latent.data, split.by = split.by, model.use = model.use, use.umi = use.umi, do.scale = do.scale, do.center = do.center, scale.max = scale.max, block.size = block.size, min.cells.to.block = min.cells.to.block, ...
5.
ScaleData(object = ldata, features = features, vars.to.regress = vars.to.regress, latent.data = latent.data, split.by = split.by, model.use = model.use, use.umi = use.umi, do.scale = do.scale, do.center = do.center, scale.max = scale.max, block.size = block.size, min.cells.to.block = min.cells.to.block, ...
4.
ScaleData.StdAssay(object = object[[assay]], features = features, vars.to.regress = vars.to.regress, latent.data = latent.data, split.by = split.by, model.use = model.use, use.umi = use.umi, do.scale = do.scale, do.center = do.center, scale.max = scale.max, ...
3.
ScaleData(object = object[[assay]], features = features, vars.to.regress = vars.to.regress, latent.data = latent.data, split.by = split.by, model.use = model.use, use.umi = use.umi, do.scale = do.scale, do.center = do.center, scale.max = scale.max, block.size = block.size, min.cells.to.block = min.cells.to.block, ...
2.
ScaleData.Seurat(RNA_Integ, features = all.genes, vars.to.regress = c("percent.mt", "Cell_Line"), scale.max = 100, block.size = 2000)
1.
ScaleData(RNA_Integ, features = all.genes, vars.to.regress = c("percent.mt", "Cell_Line"), scale.max = 100, block.size = 2000)

Mar 18 '24 13:03 AAA-3

You might have zero counts. Check

which(RNA_Integ$nCount_RNA == 0)

Apr 15 '24 16:04 gvogler

The issue is indeed most likely that some cells (or features) have 0 counts for any of the integrated features. We also typically do not recommend regressing out metadata categories like Cell_Line, does this fix your issue if you remove this?

Closing this for the moment as I expect this will solve the issue here.

Jun 24 '24 14:06 rsatija

seurat seurat copied to clipboard

ScaleData " 'qr' and 'y' must have the same number of rows"

seurat
seurat copied to clipboard