Name: Literature Gap Finder
Author: majiayu000

Search skills.../

Literature Gap Finder | Skills Pool

# Build a method-setting matrix programmatically
create_gap_matrix <- function() {
  methods <- c("Regression", "Weighting/IPW", "DR/AIPW", "TMLE", "ML-based")
  settings <- c("Binary treatment", "Continuous treatment",
                "Time-varying", "Clustered", "High-dimensional",
                "Measurement error", "Missing data", "Network")

  matrix_data <- expand.grid(method = methods, setting = settings)
  matrix_data$status <- "unknown"  # To be filled: "developed", "partial", "gap"
  matrix_data$priority <- NA
  matrix_data$references <- ""

  matrix_data
}

# Visualize the gap matrix
visualize_gaps <- function(gap_matrix) {
  library(ggplot2)

  ggplot(gap_matrix, aes(x = method, y = setting, fill = status)) +
    geom_tile(color = "white") +
    scale_fill_manual(values = c(
      "developed" = "#2ecc71",
      "partial" = "#f39c12",
      "gap" = "#e74c3c",
      "unknown" = "#95a5a6"
    )) +
    theme_minimal() +
    labs(title = "Method × Setting Gap Matrix",
         x = "Method", y = "Setting") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

Step	Action	Tools
1	Search major databases	Google Scholar, Web of Science, Scopus
2	Search preprint servers	arXiv, bioRxiv, SSRN
3	Search R packages	CRAN, GitHub, R-universe
4	Check conference proceedings	ICML, NeurIPS, JSM, ENAR
5	Search dissertations	ProQuest, university repositories
6	Email domain experts	2-3 experts for confirmation

# Systematic verification checklist
verify_gap <- function(topic, keywords) {
  checklist <- list(
    databases_searched = c("google_scholar", "web_of_science", "pubmed", "scopus"),
    search_terms = keywords,
    date_range = paste(Sys.Date() - 365*5, "to", Sys.Date()),
    results = list(
      papers_found = 0,
      closest_related = c(),
      why_not_the_same = ""
    ),
    expert_consultation = list(
      experts_contacted = c(),
      responses = c()
    ),
    verification_status = "pending"  # pending, confirmed, rejected
  )

  checklist
}

# Document the verification
document_verification <- function(gap_description, search_log) {
  cat("## Gap Verification Report\n\n")
  cat("**Gap:**", gap_description, "\n\n")
  cat("**Search Date:**", as.character(Sys.Date()), "\n\n")
  cat("**Databases Searched:**\n")
  for (db in search_log$databases_searched) {
    cat("- ", db, "\n")
  }
  cat("\n**Search Terms:**", paste(search_log$search_terms, collapse = ", "), "\n")
  cat("\n**Conclusion:**", search_log$verification_status, "\n")
}

Criterion	Weight	Score 1-5
Impact (how many benefit?)	0.25	___
Novelty (how new?)	0.20	___
Tractability (can we solve it?)	0.20	___
Timeliness (is it hot now?)	0.15	___
Fit (matches our expertise?)	0.10	___
Publication potential	0.10	___

# Priority scoring function
score_research_gap <- function(
  impact,        # 1-5: How many researchers would benefit
  novelty,       # 1-5: How new/original is this
  tractability,  # 1-5: How likely can we solve it
  timeliness,    # 1-5: Is this currently hot
  fit,           # 1-5: Matches our expertise
  publication    # 1-5: Publication potential
) {
  weights <- c(0.25, 0.20, 0.20, 0.15, 0.10, 0.10)
  scores <- c(impact, novelty, tractability, timeliness, fit, publication)

  priority <- sum(weights * scores)

  list(
    priority_score = priority,
    interpretation = case_when(
      priority >= 4.0 ~ "High priority - pursue immediately",
      priority >= 3.0 ~ "Medium priority - develop further",
      priority >= 2.0 ~ "Low priority - back burner",
      TRUE ~ "Skip - not worth pursuing"
    ),
    breakdown = data.frame(
      criterion = c("Impact", "Novelty", "Tractability",
                   "Timeliness", "Fit", "Publication"),
      weight = weights,
      score = scores,
      weighted = weights * scores
    )
  )
}

# Compare multiple gaps
rank_gaps <- function(gaps_list) {
  scores <- sapply(gaps_list, function(g) g$priority_score)
  order(scores, decreasing = TRUE)
}

                    METHODS
          │ Regression │ Weighting │ DR/TMLE │ ML-based │
──────────┼────────────┼───────────┼─────────┼──────────│
Binary A  │     ✓      │     ✓     │    ✓    │    ✓     │
Continuous│     ✓      │     ?     │    ✓    │    ?     │
SETTINGS  ├────────────┼───────────┼─────────┼──────────│
Time-vary │     ?      │     ✓     │    ✓    │    ✗     │
Clustered │     ✓      │     ?     │    ?    │    ✗     │
High-dim  │     ✗      │     ✗     │    ?    │    ✓     │

✓ = Well-developed    ? = Partial/emerging    ✗ = Gap

Dimension	Variations
Treatment	Binary, continuous, multi-level, time-varying
Mediator	Single, multiple, high-dimensional, latent
Outcome	Continuous, binary, count, survival, longitudinal
Confounding	Measured, unmeasured, time-varying
Structure	Single mediator, parallel, sequential, moderated
Data	Cross-sectional, longitudinal, clustered, network
Assumptions	Standard, relaxed positivity, measurement error

Method Family	Specific Methods
Regression	Baron-Kenny, product of coefficients, difference
Weighting	IPW, MSM, sequential g-estimation
Doubly Robust	AIPW, TMLE, cross-fitted
Semiparametric	Influence function-based
Bayesian	MCMC, variational
Machine Learning	Causal forests, DML, neural
Bounds	Partial identification, sensitivity

                         │ Product │ Weighting │ DR │ Bounds │
─────────────────────────┼─────────┼───────────┼────┼────────│
2 mediators, linear      │    ✓    │     ✓     │  ✓ │   ?    │
2 mediators, nonlinear   │    ?    │     ✓     │  ? │   ✗    │
3+ mediators, linear     │    ?    │     ?     │  ✗ │   ✗    │
3+ mediators, nonlinear  │    ✗    │     ?     │  ✗ │   ✗    │
With measurement error   │    ✗    │     ✗     │  ✗ │   ✗    │
With unmeasured conf.    │    ✗    │     ✗     │  ✗ │   ?    │

                    Standard Mediation (Baron-Kenny 1986)
                              │
            ┌─────────────────┼─────────────────┐
            ↓                 ↓                 ↓
    No unmeasured      Linearity         No interaction
    confounding        assumed           assumed
            │                 │                 │
            ↓                 ↓                 ↓
    ┌───────┴───────┐   Nonparametric    VanderWeele
    ↓               ↓     (Imai 2010)    4-way decomp
Sensitivity      Bounds                        │
(Imai 2010)   (partial ID)                    ↓
    │               │               Multiple mediators?
    ↓               ↓               Longitudinal?
 E-value        Sharp bounds?       Measurement error?
(Ding 2016)         │                    │
    │               ↓                    ↓
    ↓           [YOUR GAP?]         [YOUR GAP?]
[YOUR GAP?]

Positivity: P(A=a|X) > ε > 0 for all a, x
                    │
    ┌───────────────┼───────────────┐
    ↓               ↓               ↓
Near-violation  Practical      Structural
                positivity      violations
    │               │               │
    ↓               ↓               ↓
Trimming      Overlap         Extrapolation
weights       assessment       methods
    │               │               │
    ↓               ↓               ↓
Truncation?   Diagnostics?   Bounds under
                             violations?

Category	Description	How to Find
Foundational	Original method papers	Most-cited, oldest
Textbook	Comprehensive treatments	Citations across subfields
Recent reviews	State-of-the-art summaries	"Review" in title, last 5 years
Frontier	Latest developments	Top journals, last 2 years
Your competition	Groups working on same gap	Recent similar titles

Literature Gap Finder

The Gap-Finding Framework

What Makes a Good Research Gap?

Types of Gaps

Literature Gap Finder

The Gap-Finding Framework

What Makes a Good Research Gap?

Types of Gaps

Method-Setting Matrix

Systematic Gap Identification Framework

Verification Process

Confirming a Gap is Real

Priority Ranking

Scoring Research Gaps

Method × Setting Matrix

The Core Framework

Building Your Matrix

Example: Sequential Mediation Matrix

Assumption Relaxation Trees

The Framework

Building the Tree

Example: Positivity Assumption

Citation Network Analysis

Forward and Backward Searching

Key Paper Identification

Building a Citation Map

Continuous Learning V2

Continuous Learning V2

Continuous Learning V2

Continuous Learning

Continuous Learning

Pytorch Patterns