Query gnomAD (Genome Aggregation Database) for population allele frequencies, variant constraint scores (pLI, LOEUF), and loss-of-function intolerance. Essential for variant pathogenicity interpretation, rare disease genetics, and identifying loss-of-function intolerant genes.
The Genome Aggregation Database (gnomAD) is the largest publicly available collection of human genetic variation, aggregated from large-scale sequencing projects. gnomAD v4 contains exome sequences from 730,947 individuals and genome sequences from 76,215 individuals across diverse ancestries. It provides population allele frequencies, variant consequence annotations, and gene-level constraint metrics that are essential for interpreting the clinical significance of genetic variants.
Key resources:
Use gnomAD when:
gnomAD uses a GraphQL API accessible at https://gnomad.broadinstitute.org/api. Most queries fetch variants by gene or specific genomic position.
Datasets available:
gnomad_r4 — gnomAD v4 exomes (recommended default, GRCh38)gnomad_r4_genomes — gnomAD v4 genomes (GRCh38)gnomad_r3 — gnomAD v3 genomes (GRCh38)gnomad_r2_1 — gnomAD v2 exomes (GRCh37)Reference genomes:
GRCh38 — default for v3/v4GRCh37 — for v2import requests
def query_gnomad_gene(gene_symbol, dataset="gnomad_r4", reference_genome="GRCh38"):
"""Fetch variants in a gene from gnomAD."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query GeneVariants($gene_symbol: String!, $dataset: DatasetId!, $reference_genome: ReferenceGenomeId!) {
gene(gene_symbol: $gene_symbol, reference_genome: $reference_genome) {
gene_id
gene_symbol
variants(dataset: $dataset) {
variant_id
pos
ref
alt
consequence
genome {
af
ac
an
ac_hom
populations {
id
ac
an
af
}
}
exome {
af
ac
an
ac_hom
}
lof
lof_flags
lof_filter
}
}
}
"""
variables = {
"gene_symbol": gene_symbol,
"dataset": dataset,
"reference_genome": reference_genome
}
response = requests.post(url, json={"query": query, "variables": variables})
return response.json()
# Example
result = query_gnomad_gene("BRCA1")
gene_data = result["data"]["gene"]
variants = gene_data["variants"]
# Filter to rare PTVs
rare_ptvs = [
v for v in variants
if v.get("lof") == "LC" or v.get("consequence") in ["stop_gained", "frameshift_variant"]
and v.get("genome", {}).get("af", 1) < 0.001
]
print(f"Found {len(rare_ptvs)} rare PTVs in {gene_data['gene_symbol']}")
import requests
def query_gnomad_variant(variant_id, dataset="gnomad_r4"):
"""Fetch details for a specific variant (e.g., '1-55516888-G-GA')."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query VariantDetails($variantId: String!, $dataset: DatasetId!) {
variant(variantId: $variantId, dataset: $dataset) {
variant_id
chrom
pos
ref
alt
genome {
af
ac
an
ac_hom
populations {
id
ac
an
af
}
}
exome {
af
ac
an
ac_hom
populations {
id
ac
an
af
}
}
consequence
lof
rsids
in_silico_predictors {
id
value
flags
}
clinvar_variation_id
}
}
"""
response = requests.post(
url,
json={"query": query, "variables": {"variantId": variant_id, "dataset": dataset}}
)
return response.json()
# Example: query a specific variant
result = query_gnomad_variant("17-43094692-G-A") # BRCA1 missense
variant = result["data"]["variant"]
if variant:
genome_af = variant.get("genome", {}).get("af", "N/A")
exome_af = variant.get("exome", {}).get("af", "N/A")
print(f"Variant: {variant['variant_id']}")
print(f" Consequence: {variant['consequence']}")
print(f" Genome AF: {genome_af}")
print(f" Exome AF: {exome_af}")
print(f" LoF: {variant.get('lof')}")
gnomAD constraint scores assess how tolerant a gene is to variation relative to expectation:
import requests
def query_gnomad_constraint(gene_symbol, reference_genome="GRCh38"):
"""Fetch constraint scores for a gene."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query GeneConstraint($gene_symbol: String!, $reference_genome: ReferenceGenomeId!) {
gene(gene_symbol: $gene_symbol, reference_genome: $reference_genome) {
gene_id
gene_symbol
gnomad_constraint {
exp_lof
exp_mis
exp_syn
obs_lof
obs_mis
obs_syn
oe_lof
oe_mis
oe_syn
oe_lof_lower
oe_lof_upper
lof_z
mis_z
syn_z
pLI
}
}
}
"""
response = requests.post(
url,
json={"query": query, "variables": {"gene_symbol": gene_symbol, "reference_genome": reference_genome}}
)
return response.json()
# Example
result = query_gnomad_constraint("KCNQ2")
gene = result["data"]["gene"]
constraint = gene["gnomad_constraint"]
print(f"Gene: {gene['gene_symbol']}")
print(f" pLI: {constraint['pLI']:.3f} (>0.9 = LoF intolerant)")
print(f" LOEUF: {constraint['oe_lof_upper']:.3f} (<0.35 = highly constrained)")
print(f" Obs/Exp LoF: {constraint['oe_lof']:.3f}")
print(f" Missense Z: {constraint['mis_z']:.3f}")
Constraint score interpretation:
| Score | Range | Meaning |
|---|---|---|
pLI | 0–1 | Probability of LoF intolerance; >0.9 = highly intolerant |
LOEUF | 0–∞ | LoF observed/expected upper bound; <0.35 = constrained |
oe_lof | 0–∞ | Observed/expected ratio for LoF variants |
mis_z | −∞ to ∞ | Missense constraint z-score; >3.09 = constrained |
syn_z | −∞ to ∞ | Synonymous z-score (control; should be near 0) |
import requests
import pandas as pd
def get_population_frequencies(variant_id, dataset="gnomad_r4"):
"""Extract per-population allele frequencies for a variant."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query PopFreqs($variantId: String!, $dataset: DatasetId!) {
variant(variantId: $variantId, dataset: $dataset) {
variant_id
genome {
populations {
id
ac
an
af
ac_hom
}
}
}
}
"""
response = requests.post(
url,
json={"query": query, "variables": {"variantId": variant_id, "dataset": dataset}}
)
data = response.json()
populations = data["data"]["variant"]["genome"]["populations"]
df = pd.DataFrame(populations)
df = df[df["an"] > 0].copy()
df["af"] = df["ac"] / df["an"]
df = df.sort_values("af", ascending=False)
return df
# Population IDs in gnomAD v4:
# afr = African/African American
# ami = Amish
# amr = Admixed American
# asj = Ashkenazi Jewish
# eas = East Asian
# fin = Finnish
# mid = Middle Eastern
# nfe = Non-Finnish European
# sas = South Asian
# remaining = Other
gnomAD also contains a structural variant dataset:
import requests
def query_gnomad_sv(gene_symbol):
"""Query structural variants overlapping a gene."""
url = "https://gnomad.broadinstitute.org/api"
query = """
query SVsByGene($gene_symbol: String!) {
gene(gene_symbol: $gene_symbol, reference_genome: GRCh38) {
structural_variants {
variant_id
type
chrom
pos
end
af
ac
an
}
}
}
"""
response = requests.post(url, json={"query": query, "variables": {"gene_symbol": gene_symbol}})
return response.json()
Check population frequency — Is the variant rare enough to be pathogenic?
Assess functional impact — LoF variants have highest prior probability
lof field: HC = high-confidence LoF, LC = low-confidencelof_flags for issues like "NAGNAG_SITE", "PHYLOCSF_WEAK"Apply ACMG criteria:
ac_hom) are relevant for recessive disease analysis