sanbomics_scripts
sanbomics_scripts copied to clipboard
No fragment file present when extracting gene activity
Hello, I am new to bioinformatics and in R and from wet lab background. I am analyzing three single cell multiomics (RNA + ATAC) datasets from 10x genomics. When I merge the three datasets, the fragment files for each datasets were present. But after integrating with harmony when I try to extract gene activity the error comes as no fragment file present. Here I am attaching full code.
##Create seurat object reading .h5 file and meta file
counts_APL1 <- Read10X_h5(filename = "path/to/filtered_feature_bc_matrix.h5")
meta_APL1 <- read.csv(
file = 'path/to/per_barcode_metrics.csv',
header = TRUE,
row.names = 1)
chrom_assay <- CreateChromatinAssay(
counts = counts_APL1$Peaks,
sep = c(":", "-"),
genome = 'hg38',
fragments = 'path/to/atac_fragments.tsv.gz',
min.cells = 3,
min.features = 200
)
data_APL1 <- CreateSeuratObject(
counts = chrom_assay,
assay = "peaks",
meta.data = meta_APL1
)
data_APL1[[]]
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotations) <- 'UCSC'
Annotation(data_APL1) <- annotations
data_APL21<- NucleosomeSignal(object = data_APL1) #fragment ratio 147-294: <147
data_APL2 <- TSSEnrichment(object = data_APL1, fast = FALSE)
##Here I get the error. I cant find any blacklist_region_fragments nor the pct_reads_in_peaks. Even the peak_region_fragments is placed in metadata in name of atac_peak_region_fragments.
So proceeded without these two parameters in QC
#data_APL1$blacklist_ratio <- data_APL1$blacklist_region_fragments / data$peak_region_fragments
#data_APL1[[]]
#data$pct_reads_in_peaks <- data$peak_region_fragments / data$passed_filters * 100
VlnPlot(
object = data_APL1,
features = c('nCount_peaks',
'nucleosome_signal', 'TSS.enrichment'),
pt.size = 0.1,
ncol = 5
)
data_APL1 <- subset(
x = data_APL1,
subset = atac_peak_region_fragments > 1000 &
atac_peak_region_fragments < 100000 &
nucleosome_signal < 4 &
TSS.enrichment > 1)
##Created 3 SeuratObjects from 3 three samples
#add sample information for merging samples
Control$dataset <- "Control"
APL1$dataset <- "APL1"
APL2$dataset <- "APL2"
merged_atac <- merge(Control, y = c(APL1, APL2), add.cell.ids = c("Control", "APL1", "APL2"), project = "Integrated_ATAC" )
[email protected]
merged_atac <- FindTopFeatures(merged_atac, min.cutoff = 'q0')
merged_atac <- RunTFIDF(merged_atac)
merged_atac <- RunSVD(merged_atac)
merged_atac
#An object of class Seurat 181982 features across 3848 samples within 1 assay Active assay: peaks (181982 features, 181982 variable features) 1 dimensional reduction calculated: lsi
merged_atac <- RunUMAP(object = merged_atac, reduction = 'lsi', dims = 2:30)
merged_atac <- FindNeighbors(object = merged_atac, reduction = 'lsi', dims = 2:30)
merged_atac <- FindClusters(object = merged_atac, verbose = FALSE, algorithm = 3, resolution = .4)
DimPlot(object = merged_atac, label = TRUE) + NoLegend()
DimPlot(object = merged_atac, label = TRUE, group.by = "dataset") + NoLegend()
#Batch correction using Harmony
integrated_atac_harmony <- RunHarmony(object = merged_atac, group.by.vars = 'dataset', reduction = 'lsi', assay.use = 'peaks', project.dim = FALSE)
integrated_atac_harmony <- RunUMAP(integrated_atac_harmony, dims = 2:10, reduction = 'harmony')
DimPlot(integrated_atac_harmony, group.by = 'dataset', pt.size = 0.5)
# Do UMAP and clustering using ** Harmony embeddings instead of PCA **
integrated_atac_harmony <- integrated_atac_harmony %>%
RunUMAP(reduction = 'harmony', dims = 2:10) %>%
FindNeighbors(reduction = "harmony", dims = 2:10) %>%
FindClusters(resolution = 0.3)
# visualize
DimPlot(integrated_atac_harmony, reduction = 'umap', group.by = 'dataset')
DimPlot(integrated_atac_harmony, reduction = 'umap')
gene.activities <- GeneActivity(integrated_atac_harmony)
##Here I get the error ##Error in GeneActivity(integrated_atac_harmony) : No fragment information found for requested assay
Although the fragment files were present when I created SeuratObject. Could you please help me to find this issue and better suggestion to improve the pipeline for further integration with merged scRNA dataset.