CATALYST icon indicating copy to clipboard operation
CATALYST copied to clipboard

Normalizing runs together and Debarcoding separately

Open ammasakshay opened this issue 4 years ago • 1 comments
trafficstars

We have seven runs of an experiment. We want each run to be debarcoded separately (so they are essentially replicates of each other) but normalized together (so they are comparable)

It works when we normalize and debarcode together, however if we write them to .fcs files after normalzation the de-barcoding gives an error message - 'Error in assignPrelim(sce, mat) : Couldn't match masses extracted from channel names and debarcoding scheme.'

Case 1 - The pipeline works in this case

#NORMALIZATION
# apply normalization; keep raw data
res <- normCytof(sce, beads = "dvs", k = 50, 
                 assays = c("counts", "exprs"), overwrite = FALSE)
# check number & percentage of bead / removed events
n <- ncol(sce); ns <- c(ncol(res$beads), ncol(res$removed))
data.frame(
  check.names = FALSE, 
  "#" = c(ns[1], ns[2]), 
  "%" = 100*c(ns[1]/n, ns[2]/n),
  row.names = c("beads", "removed"))
# extract data excluding beads & doublets,
# and including normalized intensitied
sce <- res$data
assayNames(sce)
#DEBARCODING
#read in the debarcoding sample key
key <- read.csv('Run_4_barcode.csv', check.names = FALSE)
#The debarcoding scheme should be a binary table with sample IDs as row and numeric barcode masses as column names:

rownames(key) <- key[[1]]
key[[1]] <- NULL
mat <- as.matrix(key)
mat

#debarcode
sce <- assignPrelim(sce, mat)

Case 2 - Here we get an error during debarcoding

###Normalisation as per normal
#Read in list of filenames
f <- read_excel('Input_4-7_raw.xlsx')

f$filename
#PREPROCESSING
#Create Single Cell Experiment with Raw Data
raw_data <- read.flowSet(f$filename, transformation = FALSE, truncate_max_range = FALSE)

sce <- prepData(raw_data)

#NORMALIZATION
# apply normalization; keep raw data
res <- normCytof(sce, beads = "dvs", k = 50, 
                 assays = c("counts", "exprs"), overwrite = FALSE)
# check number & percentage of bead / removed events
n <- ncol(sce); ns <- c(ncol(res$beads), ncol(res$removed))
data.frame(
  check.names = FALSE, 
  "#" = c(ns[1], ns[2]), 
  "%" = 100*c(ns[1]/n, ns[2]/n),
  row.names = c("beads", "removed"))
# extract data excluding beads & doublets,
# and including normalized intensitied
sce <- res$data
assayNames(sce)

###Here we write the output to .fcs
####
# convert to 'flowSet' with one frame per sample
fs <- sce2fcs(sce, split_by = "sample_id")
# equals number of cells in each 'flowFrame'
all(c(fsApply(fs, nrow)) == table(sce$sample_id))
fs
# get sample identifiers
ids <- fsApply(fs, identifier)
for (id in ids) {
  ff <- fs[[id]]                     # subset 'flowFrame'
  fn <- sprintf("normalised_%s", id) # specify output name that includes ID
  fn <- file.path("C:/Users/ammas/OneDrive/Desktop/Projects/Duneia - CYTOF/Runs_normalised", fn)         # construct output path
  write.FCS(ff, fn)                  # write frame to FCS
}

###Here we read in the .fcs again

#Set input working Directory
setwd("C:/Users/ammas/OneDrive/Desktop/Projects/Duneia - CYTOF/Runs_normalised")
#Read in list of filenames
f <- read_excel('Input_Run_4-7.xlsx')

f$filename
#PREPROCESSING
#Create Single Cell Experiment with Raw Data
raw_data <- read.flowSet(f$filename, transformation = FALSE, truncate_max_range = FALSE)

sce <- prepData(raw_data)

###Then we debarcode
#DEBARCODING
#read in the debarcoding sample key
key <- read.csv('Run_4_barcode.csv', check.names = FALSE)
key

#The debarcoding scheme should be a binary table with sample IDs as row and numeric barcode masses as column names:
rownames(key) <- key[[1]]
key[[1]] <- NULL
mat <- as.matrix(key)
mat

##
#debarcode
sce <- assignPrelim(sce, mat)

In this case the debarcoding does not work, giving the error message.

Error in assignPrelim(sce, mat) : 
  Couldn't match masses extracted from channel names and debarcoding scheme.

I am wondering if it is because of the way we saved the sce2fcs and re read them in.

when we use the sce2fcs function we get the message

fs <- sce2fcs(sce, split_by = "sample_id")

      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3
There were 50 or more warnings (use warnings() to see the first 50)

And when we re-input the flowset we get the message

> sce <- prepData(raw_data)
Not all samples contain information on their acquisition time; ignoring argument 'by_time'. Samples will be kept in their original order.

This message only appears when reading in the normalised .fcs files NOT the original one.

Thank you so much for your help.

ammasakshay avatar Mar 27 '21 19:03 ammasakshay

I think this can be fixed if we can prevent the sce2fcs from renaming channels, though I'm not sure how to do this.

fs <- sce2fcs(sce, split_by = "sample_id")

      orig_channel_name new_channel_name
$P7N               CD45           CD45-1
$P9N               CD45           CD45-2
$P11N              CD45           CD45-3
$P12N              CD45           CD45-4
$P13N              CD45           CD45-5
$P21N                EQ             EQ-1
$P23N                EQ             EQ-2
$P58N                EQ             EQ-3

ammasakshay avatar Mar 28 '21 01:03 ammasakshay