CATALYST
CATALYST copied to clipboard
Normalizing runs together and Debarcoding separately
We have seven runs of an experiment. We want each run to be debarcoded separately (so they are essentially replicates of each other) but normalized together (so they are comparable)
It works when we normalize and debarcode together, however if we write them to .fcs files after normalzation the de-barcoding gives an error message - 'Error in assignPrelim(sce, mat) : Couldn't match masses extracted from channel names and debarcoding scheme.'
Case 1 - The pipeline works in this case
#NORMALIZATION
# apply normalization; keep raw data
res <- normCytof(sce, beads = "dvs", k = 50,
assays = c("counts", "exprs"), overwrite = FALSE)
# check number & percentage of bead / removed events
n <- ncol(sce); ns <- c(ncol(res$beads), ncol(res$removed))
data.frame(
check.names = FALSE,
"#" = c(ns[1], ns[2]),
"%" = 100*c(ns[1]/n, ns[2]/n),
row.names = c("beads", "removed"))
# extract data excluding beads & doublets,
# and including normalized intensitied
sce <- res$data
assayNames(sce)
#DEBARCODING
#read in the debarcoding sample key
key <- read.csv('Run_4_barcode.csv', check.names = FALSE)
#The debarcoding scheme should be a binary table with sample IDs as row and numeric barcode masses as column names:
rownames(key) <- key[[1]]
key[[1]] <- NULL
mat <- as.matrix(key)
mat
#debarcode
sce <- assignPrelim(sce, mat)
Case 2 - Here we get an error during debarcoding
###Normalisation as per normal
#Read in list of filenames
f <- read_excel('Input_4-7_raw.xlsx')
f$filename
#PREPROCESSING
#Create Single Cell Experiment with Raw Data
raw_data <- read.flowSet(f$filename, transformation = FALSE, truncate_max_range = FALSE)
sce <- prepData(raw_data)
#NORMALIZATION
# apply normalization; keep raw data
res <- normCytof(sce, beads = "dvs", k = 50,
assays = c("counts", "exprs"), overwrite = FALSE)
# check number & percentage of bead / removed events
n <- ncol(sce); ns <- c(ncol(res$beads), ncol(res$removed))
data.frame(
check.names = FALSE,
"#" = c(ns[1], ns[2]),
"%" = 100*c(ns[1]/n, ns[2]/n),
row.names = c("beads", "removed"))
# extract data excluding beads & doublets,
# and including normalized intensitied
sce <- res$data
assayNames(sce)
###Here we write the output to .fcs
####
# convert to 'flowSet' with one frame per sample
fs <- sce2fcs(sce, split_by = "sample_id")
# equals number of cells in each 'flowFrame'
all(c(fsApply(fs, nrow)) == table(sce$sample_id))
fs
# get sample identifiers
ids <- fsApply(fs, identifier)
for (id in ids) {
ff <- fs[[id]] # subset 'flowFrame'
fn <- sprintf("normalised_%s", id) # specify output name that includes ID
fn <- file.path("C:/Users/ammas/OneDrive/Desktop/Projects/Duneia - CYTOF/Runs_normalised", fn) # construct output path
write.FCS(ff, fn) # write frame to FCS
}
###Here we read in the .fcs again
#Set input working Directory
setwd("C:/Users/ammas/OneDrive/Desktop/Projects/Duneia - CYTOF/Runs_normalised")
#Read in list of filenames
f <- read_excel('Input_Run_4-7.xlsx')
f$filename
#PREPROCESSING
#Create Single Cell Experiment with Raw Data
raw_data <- read.flowSet(f$filename, transformation = FALSE, truncate_max_range = FALSE)
sce <- prepData(raw_data)
###Then we debarcode
#DEBARCODING
#read in the debarcoding sample key
key <- read.csv('Run_4_barcode.csv', check.names = FALSE)
key
#The debarcoding scheme should be a binary table with sample IDs as row and numeric barcode masses as column names:
rownames(key) <- key[[1]]
key[[1]] <- NULL
mat <- as.matrix(key)
mat
##
#debarcode
sce <- assignPrelim(sce, mat)
In this case the debarcoding does not work, giving the error message.
Error in assignPrelim(sce, mat) :
Couldn't match masses extracted from channel names and debarcoding scheme.
I am wondering if it is because of the way we saved the sce2fcs and re read them in.
when we use the sce2fcs function we get the message
fs <- sce2fcs(sce, split_by = "sample_id")
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3
There were 50 or more warnings (use warnings() to see the first 50)
And when we re-input the flowset we get the message
> sce <- prepData(raw_data)
Not all samples contain information on their acquisition time; ignoring argument 'by_time'. Samples will be kept in their original order.
This message only appears when reading in the normalised .fcs files NOT the original one.
Thank you so much for your help.
I think this can be fixed if we can prevent the sce2fcs from renaming channels, though I'm not sure how to do this.
fs <- sce2fcs(sce, split_by = "sample_id")
orig_channel_name new_channel_name
$P7N CD45 CD45-1
$P9N CD45 CD45-2
$P11N CD45 CD45-3
$P12N CD45 CD45-4
$P13N CD45 CD45-5
$P21N EQ EQ-1
$P23N EQ EQ-2
$P58N EQ EQ-3