Statistical analysis for psychology research using R. Use when conducting power analysis, data cleaning, hypothesis testing (t-test, ANOVA, regression, mixed models), effect size calculation, data visualization with ggplot2, or generating APA-formatted results tables. Also use for meta-analysis (metafor) and structural equation modeling (lavaan).
42:T1eed,
This skill provides R code templates and guidance for psychology data analysis.
# Required packages — install if not present
required_packages <- c(
"tidyverse", # data wrangling + ggplot2
"pwr", # power analysis
"effectsize", # Cohen's d, eta-squared, etc.
"lme4", # mixed effects models
"lmerTest", # p-values for mixed models
"lavaan", # SEM
"metafor", # meta-analysis
"psych", # psychometric functions (alpha, EFA)
"apaTables", # APA-formatted tables
"report", # automatic APA reporting
"here", # reproducible file paths
"janitor" # data cleaning utilities
)
install.packages(setdiff(required_packages, rownames(installed.packages())))
lapply(required_packages, library, character.only = TRUE)
# Always set seed for reproducibility
set.seed(42)
library(pwr)
# Independent samples t-test
pwr.t.test(d = 0.5, sig.level = .05, power = .80, type = "two.sample")
# Paired t-test
pwr.t.test(d = 0.5, sig.level = .05, power = .80, type = "paired")
# One-way ANOVA (k groups)
pwr.anova.test(k = 3, f = 0.25, sig.level = .05, power = .80)
# Correlation
pwr.r.test(r = 0.3, sig.level = .05, power = .80)
# Multiple regression (u predictors)
pwr.f2.test(u = 3, f2 = 0.15, sig.level = .05, power = .80)
# Chi-square
pwr.chisq.test(w = 0.3, df = 2, sig.level = .05, power = .80)
library(tidyverse)
library(here)
# Load data
raw_data <- read_csv(here("data", "raw_data.csv"))
# ── Exclusion criteria ────────────────────────────────────────────────────────
# 1. Completion time (< 50% of median = speeder)
median_time <- median(raw_data$duration_s, na.rm = TRUE)
excluded_speeders <- raw_data %>%
filter(duration_s < 0.5 * median_time)
# 2. Attention checks
excluded_attn <- raw_data %>%
filter(attn_check_1 != 4 | attn_check_2 != 2) # expected responses
# 3. Apply exclusions
data_clean <- raw_data %>%
filter(
duration_s >= 0.5 * median_time,
attn_check_1 == 4,
attn_check_2 == 2
)
# Report exclusions
cat("Original N:", nrow(raw_data), "\n")
cat("Excluded (speeders):", nrow(excluded_speeders), "\n")
cat("Excluded (attention):", nrow(excluded_attn) - sum(excluded_attn$id %in% excluded_speeders$id), "\n")
cat("Final N:", nrow(data_clean), "\n")
# ── Scale scoring ─────────────────────────────────────────────────────────────
# Reverse score items (replace 4 with relevant max)
data_clean <- data_clean %>%
mutate(
item_3r = 8 - item_3, # 7-point scale
item_7r = 8 - item_7
)
# Compute scale means
data_clean <- data_clean %>%
mutate(
scale_dv = rowMeans(select(., item_1, item_2, item_3r), na.rm = TRUE),
scale_cv = rowMeans(select(., item_4, item_5), na.rm = TRUE)
)
# Internal reliability
library(psych)
alpha_dv <- alpha(data_clean %>% select(item_1, item_2, item_3r))
cat("DV reliability: α =", round(alpha_dv$total$raw_alpha, 3), "\n")
# ── Independent samples t-test ────────────────────────────────────────────────
t_result <- t.test(dv ~ condition, data = data_clean, var.equal = FALSE)
d_result <- cohens_d(dv ~ condition, data = data_clean)
# APA report
cat(sprintf(
"t(%s) = %.2f, p = %s, d = %.2f, 95%% CI [%.2f, %.2f]\n",
round(t_result$parameter, 1),
t_result$statistic,
ifelse(t_result$p.value < .001, "< .001", sprintf("= .%03d", round(t_result$p.value * 1000))),
d_result$Cohens_d,
d_result$CI_low,
d_result$CI_high
))
# ── One-way ANOVA ─────────────────────────────────────────────────────────────
library(effectsize)
aov_result <- aov(dv ~ condition, data = data_clean)
eta_sq <- eta_squared(aov_result, partial = TRUE, ci = 0.90)
summary(aov_result)
# Post-hoc (Tukey)
TukeyHSD(aov_result)
# ── Multiple regression ───────────────────────────────────────────────────────
model <- lm(dv ~ pred1 + pred2 + pred3, data = data_clean)
summary(model)
# Standardized betas
library(effectsize)
standardize_parameters(model)
# ── Mixed effects model ───────────────────────────────────────────────────────
library(lme4); library(lmerTest)
mixed_model <- lmer(dv ~ time * condition + (1 + time | subject_id), data = data_clean)
summary(mixed_model)
# Effect sizes for mixed models
library(effectsize)
eta_squared(mixed_model)
# Function to format p values APA style
apa_p <- function(p) {
if (p < .001) "< .001"
else sprintf("= .%03d", round(p * 1000))
}
# Function to generate APA t-test string
apa_ttest <- function(t_result, d_result) {
sprintf("t(%s) = %.2f, p %s, d = %.2f, 95%% CI [%.2f, %.2f]",
round(t_result$parameter, 1),
t_result$statistic,
apa_p(t_result$p.value),
d_result$Cohens_d,
d_result$CI_low, d_result$CI_high
)
}
# Usage example
result_string <- apa_ttest(t_result, d_result)
cat("H1:", result_string)
library(tidyverse)
# ── Bar plot with error bars (means ± 95% CI) ─────────────────────────────────
data_summary <- data_clean %>%
group_by(condition) %>%
summarize(
M = mean(dv),
SE = sd(dv) / sqrt(n()),
CI_lower = M - 1.96 * SE,
CI_upper = M + 1.96 * SE,
.groups = "drop"
)
ggplot(data_summary, aes(x = condition, y = M, fill = condition)) +
geom_col(width = 0.6, alpha = 0.85) +
geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper), width = 0.2) +
labs(x = "Condition", y = "Mean DV Score", title = "Figure 1") +
theme_classic() +
theme(legend.position = "none") +
scale_fill_grey() # APA-friendly greyscale
# Save for manuscript
ggsave("figures/figure1.pdf", width = 6, height = 4)
# ── Violin plot ───────────────────────────────────────────────────────────────
ggplot(data_clean, aes(x = condition, y = dv, fill = condition)) +
geom_violin(alpha = 0.5, trim = FALSE) +
geom_boxplot(width = 0.1, outlier.shape = NA) +
stat_summary(fun = mean, geom = "point", size = 3, shape = 18) +
theme_classic() +
scale_fill_grey()
library(metafor)
# Input: effect sizes from primary studies
meta_data <- data.frame(
study = c("Smith2020", "Jones2021", "Lee2022"),
yi = c(0.45, 0.32, 0.58), # Cohen's d
vi = c(0.04, 0.05, 0.06) # variance
)
# Random-effects meta-analysis
res <- rma(yi, vi, data = meta_data, method = "REML")
summary(res)
# Forest plot
forest(res, slab = meta_data$study)
# Funnel plot (publication bias)
funnel(res)
regtest(res) # Egger's test
Always provide results in this format for 礼部 to embed in the manuscript:
# Analysis Results Summary (APA Format)
## Descriptive Statistics
M = [x.xx], SD = [x.xx], N = [N], α = [.xx]
## H1: [Hypothesis statement]
[APA result string]
Interpretation: [one sentence: supported / not supported, why]
## H2: ...
Save results to memory/analyses/[study-name]/results.md.