Name: Bio Pathway Gsea
Author: FreedomIntelligence

Buscar habilidades.../

Bio Pathway Gsea | Skills Pool

library(clusterProfiler)
library(org.Hs.eg.db)

de_results <- read.csv('de_results.csv')

# Create named vector: values = statistic, names = gene IDs
gene_list <- de_results$log2FoldChange
names(gene_list) <- de_results$gene_id

# Sort in decreasing order (REQUIRED)
gene_list <- sort(gene_list, decreasing = TRUE)

# Convert symbols to Entrez IDs
gene_ids <- bitr(names(gene_list), fromType = 'SYMBOL', toType = 'ENTREZID', OrgDb = org.Hs.eg.db)

# Create ranked list with Entrez IDs
gene_list_entrez <- gene_list[names(gene_list) %in% gene_ids$SYMBOL]
names(gene_list_entrez) <- gene_ids$ENTREZID[match(names(gene_list_entrez), gene_ids$SYMBOL)]
gene_list_entrez <- sort(gene_list_entrez, decreasing = TRUE)

# Signed p-value (recommended for detecting both up and down)
gene_list <- -log10(de_results$pvalue) * sign(de_results$log2FoldChange)
names(gene_list) <- de_results$gene_id
gene_list <- sort(gene_list, decreasing = TRUE)

# Wald statistic (from DESeq2)
gene_list <- de_results$stat
names(gene_list) <- de_results$gene_id
gene_list <- sort(gene_list, decreasing = TRUE)

gse_go <- gseGO(
    geneList = gene_list_entrez,
    OrgDb = org.Hs.eg.db,
    ont = 'BP',                     # BP, MF, CC, or ALL
    minGSSize = 10,
    maxGSSize = 500,
    pvalueCutoff = 0.05,
    verbose = FALSE,
    pAdjustMethod = 'BH'
)

# Make readable
gse_go <- setReadable(gse_go, OrgDb = org.Hs.eg.db, keyType = 'ENTREZID')

gse_kegg <- gseKEGG(
    geneList = gene_list_entrez,
    organism = 'hsa',
    minGSSize = 10,
    maxGSSize = 500,
    pvalueCutoff = 0.05,
    verbose = FALSE
)

# Make readable
gse_kegg <- setReadable(gse_kegg, OrgDb = org.Hs.eg.db, keyType = 'ENTREZID')

# Read GMT file (Gene Matrix Transposed)
gene_sets <- read.gmt('msigdb_hallmarks.gmt')

gse_custom <- GSEA(
    geneList = gene_list_entrez,
    TERM2GENE = gene_sets,
    minGSSize = 10,
    maxGSSize = 500,
    pvalueCutoff = 0.05
)

# Use msigdbr package for MSigDB gene sets
library(msigdbr)

# Hallmark gene sets
hallmarks <- msigdbr(species = 'Homo sapiens', category = 'H')
hallmarks_t2g <- hallmarks[, c('gs_name', 'entrez_gene')]

gse_hallmark <- GSEA(
    geneList = gene_list_entrez,
    TERM2GENE = hallmarks_t2g,
    pvalueCutoff = 0.05
)

# Other categories: C1 (positional), C2 (curated), C3 (motif), C5 (GO), C6 (oncogenic), C7 (immunologic)

# View results
head(gse_go)
results <- as.data.frame(gse_go)

# Key columns:
# - NES: Normalized Enrichment Score (positive = upregulated, negative = downregulated)
# - pvalue: Nominal p-value
# - p.adjust: FDR-adjusted p-value
# - core_enrichment: Leading edge genes

Parameter	Default	Description
geneList	required	Named, sorted numeric vector
OrgDb	required	Organism database (for gseGO)
organism	hsa	KEGG organism code (for gseKEGG)
ont	BP	Ontology: BP, MF, CC, ALL
minGSSize	10	Min genes in gene set
maxGSSize	500	Max genes in gene set
pvalueCutoff	0.05	P-value threshold
pAdjustMethod	BH	Adjustment method
nPerm	10000	Permutations (if permutation test used)
eps	1e-10	Boundary for p-value calculation

results_df <- as.data.frame(gse_go)
write.csv(results_df, 'gsea_go_results.csv', row.names = FALSE)

# Get leading edge genes for a term
leading_edge <- strsplit(results_df$core_enrichment[1], '/')[[1]]

NES	Interpretation
Positive (> 0)	Gene set enriched in upregulated genes
Negative (< 0)	Gene set enriched in downregulated genes
	NES

Bio Pathway Gsea

Version Compatibility

Gene Set Enrichment Analysis (GSEA)

Core Concept

Prepare Ranked Gene List

Bio Pathway Gsea

Version Compatibility

Gene Set Enrichment Analysis (GSEA)

Core Concept

Prepare Ranked Gene List

Convert Gene IDs for GSEA

Alternative Ranking Statistics

GSEA with GO

GSEA with KEGG

GSEA with Custom Gene Sets

MSigDB Gene Sets

Understanding Results

Interpreting NES (Normalized Enrichment Score)

Key Parameters

Export Results

Notes

Nanoclaw Repl

Bioinformatics

Smart Explore

Vector Database Engineer

Skin Health Analyzer

Scanpy

Bio Pathway Gsea

Version Compatibility

Gene Set Enrichment Analysis (GSEA)

Core Concept

Prepare Ranked Gene List

Bio Pathway Gsea

Version Compatibility

Gene Set Enrichment Analysis (GSEA)

Core Concept

Prepare Ranked Gene List

Convert Gene IDs for GSEA

Alternative Ranking Statistics

GSEA with GO

GSEA with KEGG

GSEA with Custom Gene Sets

MSigDB Gene Sets

Understanding Results

Interpreting NES (Normalized Enrichment Score)

Key Parameters

Export Results

Notes

Related Skills

Nanoclaw Repl

Bioinformatics

Smart Explore

Vector Database Engineer

Skin Health Analyzer

Scanpy