Performs fast Gene Set Enrichment Analysis (GSEA) on single-cell data using fgsea R package. Identifies enriched biological pathways by ranking genes based on differential expression between cell groups. Generates enrichment scores, significance metrics, and publication-ready visualizations.
Performs fast Gene Set Enrichment Analysis (GSEA) on single-cell data using fgsea R package. Identifies enriched biological pathways by ranking genes based on differential expression between cell groups. Generates enrichment scores, significance metrics, and publication-ready visualizations.
[ScFGSEA]
cache = true
[ScFGSEA.in]
srtobj = ["SeuratClustering"] # or "ScRepCombiningExpression"
[ScFGSEA.envs]
# Core parameters
ncores = 1 # Parallel cores
assay = "RNA" # Assay to use
subset = "seurat_clusters %in% c('c1', 'c2')" # Subset cells
# Grouping parameters
group_by = "seurat_clusters" # Column to compare
ident_1 = "c1" # First group
ident_2 = "c2" # Second group (optional: uses all others)
each = "seurat_clusters" # Split into multiple cases
# Gene set database
gmtfile = "KEGG_2021_Human" # Default
# Ranking method
method = "s2n" # signal-to-noise (default)
# fgsea parameters
minsize = 10 # Min gene set size
maxsize = 100 # Max gene set size
top = 20 # Top pathways to plot (< 1 for padj threshold)
eps = 0.0 # P-value boundary
# Visualization
[ScFGSEA.envs.alleach_plots.Heatmap]
plot_type = "heatmap"
group_by = "Diagnosis"
"MSigDB_Hallmark_2020""KEGG_2021_Human""Reactome_Pathways_2024""BioCarta_2016""WikiPathways_2024_Human""GO_Biological_Process_2025""GO_Cellular_Component_2025""GO_Molecular_Function_2025"gmtfile = "/path/to/custom.gmt"
Format: name<tab>description<tab>gene1,gene2,...
"s2n"/"signal_to_noise": Signal-to-noise ratio (default)"abs_s2n"/"abs_signal_to_noise": Absolute signal-to-noise"t_test": Student's t-test"ratio_of_classes": Fold change (natural scale)"diff_of_classes": Difference of means"log2_ratio_of_classes": Log2 fold change (recommended for log-scale RNA-seq)[ScFGSEA]
[ScFGSEA.in]
srtobj = ["SeuratClustering"]
[ScFGSEA.envs]
group_by = "seurat_clusters"
ident_1 = "c1"
ident_2 = "c2"
[ScFGSEA.envs]
gmtfile = "MSigDB_Hallmark_2020"
group_by = "Diagnosis"
ident_1 = "Disease"
ident_2 = "Control"
each = "seurat_clusters"
method = "s2n"
top = 20
[ScFGSEA.envs]
gmtfile = "KEGG_2021_Human"
group_by = "Treatment"
ident_1 = "Treated"
ident_2 = "Untreated"
minsize = 15
maxsize = 200
method = "log2_ratio_of_classes"
[ScFGSEA.envs]
gmtfile = "GO_Biological_Process_2025"
group_by = "Diagnosis"
ident_1 = "Colitis"
ident_2 = "Control"
minsize = 10
maxsize = 500
top = 0.05 # padj < 0.05
[ScFGSEA.envs]
gmtfile = "/data/gmt/MSigDB_C7_Immunologic_Signatures.gmt"
group_by = "tissue_type"
ident_1 = "Inflamed"
ident_2 = "Normal"
minsize = 5
maxsize = 150
[ScFGSEA.envs.cases.Hallmark]
gmtfile = "MSigDB_Hallmark_2020"
ident_1 = "Disease"
ident_2 = "Control"
[ScFGSEA.envs.cases.KEGG]
gmtfile = "KEGG_2021_Human"
ident_1 = "Disease"
ident_2 = "Control"
[ScFGSEA.in]
srtobj = ["ScRepCombiningExpression"]
[ScFGSEA.envs]
group_by = "cdr3_clonotype_cluster"
ident_1 = "expanded_clone"
ident_2 = "rest"
gmtfile = "MSigDB_Hallmark_2020"
subset = "CD4"
[ScFGSEA.envs]
gmtfile = "MSigDB_Hallmark_2020"
group_by = "seurat_clusters"
ident_1 = "c1"
ident_2 = "c2"
[ScFGSEA.envs]
group_by = "Diagnosis"
ident_1 = "Disease"
ident_2 = "Control"
each = "seurat_clusters"
gmtfile = "KEGG_2021_Human"
[ScFGSEA.envs]
method = "log2_ratio_of_classes"
gmtfile = "MSigDB_Hallmark_2020"
[ScFGSEA.envs]
minsize = 20
maxsize = 150
gmtfile = "Reactome_Pathways_2024"
[ScFGSEA.envs]
top = 0.01 # padj < 0.01 only
gmtfile = "MSigDB_Hallmark_2020"
[ScFGSEA.envs]
gmtfile = "/data/gmt/KEGG_Metabolism.gmt"
group_by = "Metabolic_State"
ident_1 = "High"
ident_2 = "Low"
SeuratClustering or ScRepCombiningExpressionCellTypeAnnotation, pathway visualizationgmtfile: Valid enrichit name or GMT pathgroup_by: Valid metadata columnident_1/ident_2: Values must exist in group_byminsize: ≥ 1, maxsize: > minsizetop: > 0 or < 1 (padj threshold)method: Valid fgsea ranking method[ScFGSEA.envs]
minsize = 5 # Smaller pathways
maxsize = 500 # Larger pathways
top = 0.1 # Looser threshold
gmtfile = "GO_Biological_Process_2025" # More gene sets
Causes: Insufficient cells, gene name mismatch, restrictive thresholds Solutions:
[ScFGSEA.envs]
minsize = 10
maxsize = 200
subset = "group_by_count > 10"
[ScFGSEA.envs]
minsize = 20
maxsize = 100
gmtfile = "MSigDB_Hallmark_2020"
ncores = 8
subset = "seurat_clusters %in% c('c1', 'c2')"
Cause: Human (GENE) vs mouse (Gene), different ID types Solutions:
rownames(seurat_object)log2_ratio_of_classes for log-scale RNA-seq dataminsize/maxsize based on database and research questioneach parameter for multiple subgroup comparisonstop < 1 for p-value-based filteringncores for large datasets