Name: Screploading
Author: pwwang

Screploading | Skills Pool

[ScRepLoading]
cache = true  # Enable caching (default: true)

[ScRepLoading.in]
# Type: file
# Required: yes
# Description: Sample metadata file (tab-delimited) with TCR/BCR data paths
metafile = "path/to/sample_info.txt"

[ScRepLoading.envs]
# type: choice - Data type to load (default: "auto")
# Options:
#   "TCR" - T cell receptor data
#   "BCR" - B cell receptor data
#   "auto" - Auto-detect from column names in sample info
# Note: If both TCRData and BCRData present, TCR selected by default
type = "auto"

# format: choice - Format of TCR/BCR data files (optional)
# Options: auto, 10X, AIRR, BD, Dandelion, Immcantation,
#          JSON, MiXCR, Omniscope, ParseBio, TRUST4, WAT3R
# If not provided, scRepertoire guesses from filename
format = "auto"

# combineTCR: json - Extra arguments for scRepertoire::combineTCR()
# See: https://rdrr.io/github/ncborcherding/scRepertoire/man/combinetcr
combineTCR = {"samples": true}

# combineBCR: json - Extra arguments for scRepertoire::combineBCR()
# See: https://rdrr.io/github/ncborcherding/scRepertoire/man/combinebcr
combineBCR = {"samples": true}

# exclude: auto or list - Columns to exclude from metadata (default: auto)
# auto = ["BCRData", "TCRData", "RNAData"]
# Can also be comma-separated string: "BCRData,TCRData,RNAData"
exclude = "auto"

# tmpdir: str - Temporary directory for symbolic links (default: "/tmp")
tmpdir = "/tmp"

[ScRepLoading.envs.combineTCR]
# samples: bool or list - Sample labels (default: true)
# true = use Sample column from metadata
# false = no sample grouping
# list = explicit sample labels
samples = true

# ID: str or null - Additional sample labeling (optional)
# Adds prefix to barcodes to prevent duplicate issues
ID = null

# removeNA: bool - Remove cells with missing chain values (default: false)
# true = filter out cells with NA in any chain
# false = include cells with 1 NA value (default)
removeNA = false

# removeMulti: bool - Remove cells with >2 chains (default: false)
# true = filter out multi-chain cells (>2 chains)
# false = include multi-chain cells (default)
removeMulti = false

# filterMulti: bool - Select highest-expression chain for multi-chain (TCR default: false)
# true = keep highest UMI count chain if multiple chains present
# false = keep all chains (default)
filterMulti = false

# filterNonproductive: bool - Remove non-productive rearrangements (default: true)
# true = filter out non-functional receptors
# false = include all rearrangements
filterNonproductive = true

[ScRepLoading.envs.combineBCR]
# samples: bool or list - Sample labels (default: true)
samples = true

# ID: str or null - Additional sample labeling (optional)
ID = null

# call.related.clones: bool - Cluster related BCR clones (default: true)
# Uses nucleotide sequence + V gene with Levenshtein distance
# false = uses V gene + amino acid sequence for CTstrict
call.related.clones = true

# threshold: num - Normalized edit distance for clustering (default: 0.85)
# Higher = more permissive clustering (more sequences grouped)
# Range: 0.0 - 1.0
threshold = 0.85

# removeNA: bool - Remove cells with missing chain values (default: false)
removeNA = false

# removeMulti: bool - Remove cells with >2 chains (default: false)
removeMulti = false

# filterMulti: bool - Select highest-expression chain (default: true)
# true = keep highest UMI count chain
# false = keep all chains
filterMulti = true

# filterNonproductive: bool - Remove non-productive rearrangements (default: true)
filterNonproductive = true

[SampleInfo.in]
infile = "sample_info.txt"

# Sample info file contents:
# Sample  Age  Sex  Diagnosis  RNAData            TCRData
# C1      62   F    Colitis    /data/C1/rna      /data/C1/tcr
# C2      71   F    Colitis    /data/C2/rna      /data/C2/tcr

# ScRepLoading auto-enables when TCRData column present
# No explicit ScRepLoading section needed

[ScRepLoading]
cache = true

[ScRepLoading.in]
metafile = "metadata/single_sample.txt"

[ScRepLoading.envs]
type = "TCR"
format = "10X"

[ScRepLoading.envs.combineTCR]
removeNA = true
filterNonproductive = true

[ScRepLoading]
cache = true

[ScRepLoading.in]
metafile = "metadata/bcr_samples.txt"

[ScRepLoading.envs]
type = "BCR"

[ScRepLoading.envs.combineBCR]
call.related.clones = true
threshold = 0.85  # Higher threshold for more permissive clustering
filterMulti = true
removeMulti = false

[ScRepLoading]

[ScRepLoading.in]
metafile = "metadata/airr_samples.txt"

[ScRepLoading.envs]
format = "AIRR"
type = "auto"

[ScRepLoading.envs.combineTCR]
removeNA = false
removeMulti = false

[ScRepLoading]

[ScRepLoading.in]
metafile = "metadata/trust4_samples.txt"

[ScRepLoading.envs]
format = "TRUST4"

[ScRepLoading.envs.combineTCR]
removeNA = true
filterNonproductive = true

# sample_info.txt
# Sample    RNAData              TCRData
# Sample1   /data/Sample1/rna   /data/Sample1/vdj
# Sample2   /data/Sample2/rna   /data/Sample2/vdj

[SampleInfo.in]
infile = "sample_info.txt"

# TCR directories must contain filtered_contig_annotations.csv
# No ScRepLoading configuration needed - auto-detected

# sample_info.txt
# Sample    RNAData              TCRData              BCRData
# Sample1   /data/Sample1/rna   /data/Sample1/tcr   /data/Sample1/bcr

[SampleInfo.in]
infile = "sample_info.txt"

# TCR selected by default when both columns present
# To explicitly analyze BCR instead:
[ScRepLoading.envs]
type = "BCR"

[ScRepLoading]

[ScRepLoading.in]
metafile = "metadata/tcr_filtered.txt"

[ScRepLoading.envs.combineTCR]
removeNA = true      # Remove cells with missing chains
removeMulti = true   # Remove cells with >2 chains
filterNonproductive = true  # Remove non-functional receptors

[ScRepLoading]

[ScRepLoading.in]
metafile = "metadata/tcr_exploratory.txt"

[ScRepLoading.envs.combineTCR]
removeNA = false     # Keep cells with single chain
removeMulti = false  # Include multi-chain cells for inspection
filterNonproductive = false  # Include non-productive rearrangements

[ScRepLoading]

[ScRepLoading.in]
metafile = "metadata/bcr_clustering.txt"

[ScRepLoading.envs.combineBCR]
call.related.clones = true
threshold = 0.90  # More stringent clustering (lower = more permissive)

[ScRepLoading]

[ScRepLoading.in]
metafile = "metadata/longitudinal.txt"

[ScRepLoading.envs.combineTCR]
samples = true  # Use Sample column from metadata
ID = "Timepoint"  # Add Timepoint as additional label prefix

# Creates barcodes like: "Sample1_Timepoint1_AAACCC..."
# Prevents duplicate barcode issues across timepoints

[ScRepLoading]

[ScRepLoading.in]
metafile = "metadata/custom_columns.txt"

[ScRepLoading.envs]
exclude = ["RNAData", "TCRData", "BCRData", "ExperimentID", "Batch"]

# These columns excluded from scRepertoire object metadata
# Helps reduce metadata clutter in downstream analysis

# Default behavior - ScRepLoading automatically pairs chains
# at cell barcode level when both TRA and TRB present

[ScRepLoading]

[ScRepLoading.in]
metafile = "metadata/tcr_paired.txt"

[ScRepLoading.envs.combineTCR]
removeNA = false  # Keep single-chain cells for inspection
filterMulti = false  # Don't filter multi-chain cells

# Later analysis can filter for true paired chains
# Using downstream processes like CDR3Clustering

Screploading

ScRepLoading Process Configuration

Purpose

When to Use

Configuration Structure

Screploading

ScRepLoading Process Configuration

Purpose

When to Use

Configuration Structure

Process Enablement

Input Specification

Environment Variables

Detailed combineTCR Parameters

Detailed combineBCR Parameters

Configuration Examples

Minimal Configuration (10x TCR Data)

Single Sample with Format Specification

Multi-Sample BCR Analysis with Clustering

Non-10x Format (AIRR)

TRUST4 Format

Common Patterns

Pattern 1: 10x Genomics TCR Data (Most Common)

Pattern 2: Both TCR and BCR Data (Auto-Detect TCR)

Pattern 3: Filtered TCR Data (Remove NA and Multi-Chain)

Pattern 4: Relaxed Filtering for Exploratory Analysis

Pattern 5: BCR Clone Clustering with Custom Threshold

Pattern 6: Sample-Specific Labeling

Pattern 7: Custom Metadata Exclusion

Pattern 8: Paired Chain Analysis (TRA+TRB for TCR)

Dependencies

Upstream Processes

Downstream Processes

Validation Rules

Common Configuration Errors

File Format Requirements

Chain Compatibility

Troubleshooting

Issue: ScRepLoading not running

Issue: "File format not recognized"

Issue: "No cells loaded" or empty output

Issue: Duplicate barcode errors

Issue: BCR clustering too strict/too permissive

Issue: Single-chain cells lost

Issue: Metadata columns missing from output

Issue: Cannot load from specific directory path

Issue: Combining TCR and BCR data separately

Issue: Integration with ScRepCombiningExpression fails

Nanoclaw Repl

Bioinformatics

Smart Explore

Vector Database Engineer

Skin Health Analyzer

Scanpy