Single-cell ATAC-seq analysis with Signac (R/Seurat) and ArchR. Process 10X Genomics scATAC data, perform QC, dimensionality reduction, clustering, peak calling, and motif activity scoring with chromVAR. Use when analyzing single-cell ATAC-seq data.
Reference examples tested with: MACS2 2.2+, scanpy 1.10+
Before using code patterns, verify installed versions match. If versions differ:
packageVersion('<pkg>') then ?function_name to verify parametersIf code throws ImportError, AttributeError, or TypeError, introspect the installed package and adapt the example to match the actual API rather than retrying.
"Analyze my single-cell ATAC-seq data" → Process peak-barcode matrices, perform QC/filtering, reduce dimensions with LSI, cluster cells, call peaks per cluster, and score motif activity.
Signac::CreateChromatinAssay() → RunTFIDF() → FindTopFeatures() → RunSVD()ArchR::createArrowFiles() for large datasetsAnalyze single-cell chromatin accessibility data to identify cell types and regulatory elements.
| Tool | Ecosystem | Strengths |
|---|---|---|
| Signac | Seurat | Integration with scRNA-seq, familiar API |
| ArchR | Standalone | Memory efficient, comprehensive |
| chromVAR | Bioconductor | TF motif deviation scoring |
| SnapATAC2 | Python | Fast, scalable |
Goal: Process scATAC-seq data through QC, normalization, dimensionality reduction, and clustering to identify cell types by chromatin accessibility.
Approach: Create a ChromatinAssay from a peak-barcode matrix with fragment files, compute QC metrics (TSS enrichment, nucleosome signal), normalize with TF-IDF, reduce dimensions with LSI (SVD), then cluster and annotate using gene activity scores.
install.packages('Signac')
BiocManager::install(c('EnsDb.Hsapiens.v86', 'biovizBase', 'motifmatchr', 'chromVAR', 'JASPAR2020', 'TFBSTools'))
library(Signac)
library(Seurat)
library(EnsDb.Hsapiens.v86)
counts <- Read10X_h5('filtered_peak_bc_matrix.h5')
metadata <- read.csv('singlecell.csv', header = TRUE, row.names = 1)
chrom_assay <- CreateChromatinAssay(
counts = counts,
sep = c(':', '-'),
genome = 'hg38',
fragments = 'fragments.tsv.gz',
min.cells = 10,
min.features = 200
)
obj <- CreateSeuratObject(counts = chrom_assay, assay = 'peaks', meta.data = metadata)
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
seqlevelsStyle(annotations) <- 'UCSC'
Annotation(obj) <- annotations
obj <- NucleosomeSignal(obj)
obj <- TSSEnrichment(obj, fast = FALSE)
obj$pct_reads_in_peaks <- obj$peak_region_fragments / obj$passed_filters * 100
obj$blacklist_ratio <- obj$blacklist_region_fragments / obj$peak_region_fragments
VlnPlot(obj, features = c('pct_reads_in_peaks', 'peak_region_fragments', 'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'), pt.size = 0.1, ncol = 5)
obj <- subset(obj,
peak_region_fragments > 3000 &
peak_region_fragments < 20000 &
pct_reads_in_peaks > 15 &
blacklist_ratio < 0.05 &
nucleosome_signal < 4 &
TSS.enrichment > 2
)
obj <- RunTFIDF(obj)
obj <- FindTopFeatures(obj, min.cutoff = 'q0')
obj <- RunSVD(obj)
DepthCor(obj) # Check correlation with sequencing depth
obj <- RunUMAP(obj, reduction = 'lsi', dims = 2:30)
obj <- FindNeighbors(obj, reduction = 'lsi', dims = 2:30)
obj <- FindClusters(obj, algorithm = 3, resolution = 0.5)
DimPlot(obj, label = TRUE) + NoLegend()
gene_activities <- GeneActivity(obj)
obj[['RNA']] <- CreateAssayObject(counts = gene_activities)
obj <- NormalizeData(obj, assay = 'RNA', normalization.method = 'LogNormalize', scale.factor = median(obj$nCount_RNA))
DefaultAssay(obj) <- 'RNA'
FeaturePlot(obj, features = c('CD34', 'MS4A1', 'CD3D', 'CD14'), pt.size = 0.1, max.cutoff = 'q95')
peaks <- CallPeaks(obj, group.by = 'seurat_clusters', macs2.path = '/path/to/macs2')
# Quantify peaks
peak_counts <- FeatureMatrix(fragments = Fragments(obj), features = peaks, cells = colnames(obj))
obj[['peaks_called']] <- CreateChromatinAssay(counts = peak_counts, fragments = Fragments(obj), annotation = Annotation(obj))
DefaultAssay(obj) <- 'peaks'
da_peaks <- FindMarkers(obj, ident.1 = 'cluster1', ident.2 = 'cluster2', test.use = 'LR', latent.vars = 'peak_region_fragments')
# Top differentially accessible peaks
head(da_peaks[order(da_peaks$avg_log2FC, decreasing = TRUE), ], 10)
library(JASPAR2020)
library(TFBSTools)
library(motifmatchr)
pfm <- getMatrixSet(JASPAR2020, opts = list(collection = 'CORE', tax_group = 'vertebrates', all_versions = FALSE))
obj <- AddMotifs(obj, genome = BSgenome.Hsapiens.UCSC.hg38, pfm = pfm)
obj <- RunChromVAR(obj, genome = BSgenome.Hsapiens.UCSC.hg38)
DefaultAssay(obj) <- 'chromvar'
FeaturePlot(obj, features = 'MA0139.1', min.cutoff = 'q10', max.cutoff = 'q90') # CTCF
# Differential motif activity
differential_activity <- FindMarkers(obj, ident.1 = 'cluster1', ident.2 = 'cluster2', only.pos = TRUE, mean.fxn = rowMeans, fc.name = 'avg_diff')
CoveragePlot(obj, region = 'chr1-1000000-1050000', group.by = 'seurat_clusters', annotation = TRUE)
# Gene track
CoveragePlot(obj, region = 'MS4A1', group.by = 'seurat_clusters', extend.upstream = 10000, extend.downstream = 5000)
transfer_anchors <- FindTransferAnchors(reference = rna_obj, query = obj, features = VariableFeatures(rna_obj), reference.assay = 'RNA', query.assay = 'RNA', reduction = 'cca')
predicted_labels <- TransferData(anchorset = transfer_anchors, refdata = rna_obj$celltype, weight.reduction = obj[['lsi']], dims = 2:30)
obj <- AddMetaData(obj, metadata = predicted_labels)