Name: Power Analysis
Author: sencersoylu

搵技能.../

Power Analysis | Skills Pool

Effect Size Measure	Small	Medium	Large	Used With
Cohen's d	0.2	0.5	0.8	Two-group mean comparisons
Cohen's f	0.10	0.25	0.40	ANOVA
Cohen's w	0.10	0.30	0.50	Chi-square tests
Pearson r	0.10	0.30	0.50	Correlation
Cohen's f-squared	0.02	0.15	0.35	Multiple regression
Odds Ratio	1.5	2.5	4.3	Logistic regression
Hazard Ratio	1.3	1.7	2.5	Survival analysis

import numpy as np

# Cohen's d to r
def d_to_r(d):
    return d / np.sqrt(d**2 + 4)

# r to Cohen's d
def r_to_d(r):
    return 2 * r / np.sqrt(1 - r**2)

# Cohen's d to f (for ANOVA with 2 groups)
def d_to_f(d):
    return d / 2

# Odds ratio to Cohen's d
def or_to_d(odds_ratio):
    return np.log(odds_ratio) * np.sqrt(3) / np.pi

# Cohen's d to odds ratio
def d_to_or(d):
    return np.exp(d * np.pi / np.sqrt(3))

# Eta-squared to Cohen's f
def eta_sq_to_f(eta_sq):
    return np.sqrt(eta_sq / (1 - eta_sq))

from statsmodels.stats.power import TTestIndPower

analysis = TTestIndPower()

# Parameters
effect_size = 0.5    # Cohen's d
alpha = 0.05         # significance level
power = 0.80         # desired power
ratio = 1.0          # n2/n1 ratio (1.0 = equal groups)

# Calculate required sample size per group
n_per_group = analysis.solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    ratio=ratio,
    alternative='two-sided'
)
print(f"Required n per group: {np.ceil(n_per_group):.0f}")
print(f"Total N: {np.ceil(n_per_group) * 2:.0f}")

# Generate power curve
import matplotlib.pyplot as plt

sample_sizes = np.arange(10, 200)
powers = analysis.power(effect_size=effect_size, nobs1=sample_sizes,
                        alpha=alpha, ratio=ratio, alternative='two-sided')

plt.figure(figsize=(8, 5))
plt.plot(sample_sizes, powers)
plt.axhline(y=0.80, color='r', linestyle='--', label='Power = 0.80')
plt.axvline(x=n_per_group, color='g', linestyle='--', label=f'n = {np.ceil(n_per_group):.0f}')
plt.xlabel('Sample Size per Group')
plt.ylabel('Power')
plt.title(f'Power Curve (d={effect_size}, alpha={alpha})')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('power_curve_ttest.png', dpi=150, bbox_inches='tight')

from statsmodels.stats.power import TTestPower

analysis = TTestPower()

effect_size = 0.5    # Cohen's d for paired differences
alpha = 0.05
power = 0.80

n = analysis.solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    alternative='two-sided'
)
print(f"Required n (pairs): {np.ceil(n):.0f}")

from statsmodels.stats.power import GofChisquarePower

# For chi-square goodness of fit
analysis = GofChisquarePower()

effect_size = 0.3    # Cohen's w
alpha = 0.05
power = 0.80
n_bins = 4           # degrees of freedom + 1

n = analysis.solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    n_bins=n_bins
)
print(f"Required total N: {np.ceil(n):.0f}")

# Alternative: using proportion difference (2x2 table)
from statsmodels.stats.power import NormalIndPower

# Two proportions
p1 = 0.30  # expected proportion in group 1
p2 = 0.45  # expected proportion in group 2
# Cohen's h for two proportions
h = 2 * np.arcsin(np.sqrt(p1)) - 2 * np.arcsin(np.sqrt(p2))

analysis = NormalIndPower()
n_per_group = analysis.solve_power(
    effect_size=abs(h),
    alpha=alpha,
    power=power,
    ratio=1.0,
    alternative='two-sided'
)
print(f"Required n per group (proportions): {np.ceil(n_per_group):.0f}")

from statsmodels.stats.power import FTestAnovaPower

analysis = FTestAnovaPower()

effect_size = 0.25   # Cohen's f
alpha = 0.05
power = 0.80
k_groups = 3         # number of groups

n_per_group = analysis.solve_power(
    effect_size=effect_size,
    alpha=alpha,
    power=power,
    k_groups=k_groups
)
print(f"Required n per group: {np.ceil(n_per_group):.0f}")
print(f"Total N: {np.ceil(n_per_group) * k_groups:.0f}")

from statsmodels.stats.power import NormalIndPower
import numpy as np

def power_correlation(r, alpha=0.05, power=0.80):
    """Sample size for testing Pearson correlation != 0.
    Uses Fisher's z transformation approach.
    """
    z_alpha = stats.norm.ppf(1 - alpha/2)
    z_beta = stats.norm.ppf(power)
    z_r = 0.5 * np.log((1 + r) / (1 - r))  # Fisher's z
    n = ((z_alpha + z_beta) / z_r) ** 2 + 3
    return int(np.ceil(n))

from scipy import stats

r = 0.30  # expected correlation
n = power_correlation(r, alpha=0.05, power=0.80)
print(f"Required N for r={r}: {n}")

# Multiple effect sizes
for r in [0.10, 0.20, 0.30, 0.40, 0.50]:
    n = power_correlation(r)
    print(f"r = {r:.2f}: N = {n}")

from statsmodels.stats.power import FTestPower

analysis = FTestPower()

# Parameters
f2 = 0.15           # Cohen's f-squared (medium effect)
alpha = 0.05
power = 0.80
n_predictors = 5    # number of predictors in full model
n_tested = 2        # number of predictors being tested

# df_num = n_tested (predictors being tested)
# df_denom = N - n_predictors - 1

# Iterative solution
from scipy.optimize import brentq

def power_equation(n):
    df_num = n_tested
    df_denom = n - n_predictors - 1
    if df_denom <= 0:
        return -1
    noncentrality = f2 * n
    power_calc = analysis.power(
        effect_size=np.sqrt(f2),
        df_num=df_num,
        df_denom=df_denom,
        alpha=alpha
    )
    return power_calc - power

n_required = int(np.ceil(brentq(power_equation, n_predictors + 2, 10000)))
print(f"Required N for {n_predictors} predictors (f2={f2}): {n_required}")

def adjust_for_dropout(n_calculated, dropout_rate):
    """Inflate sample size to account for expected dropout."""
    return int(np.ceil(n_calculated / (1 - dropout_rate)))

n_base = 64  # calculated sample size per group
dropout = 0.20  # 20% expected dropout

n_adjusted = adjust_for_dropout(n_base, dropout)
print(f"Adjusted n per group (for {dropout*100:.0f}% dropout): {n_adjusted}")

from statsmodels.stats.power import TTestIndPower

analysis = TTestIndPower()

# 2:1 allocation ratio
n_smaller_group = analysis.solve_power(
    effect_size=0.5,
    alpha=0.05,
    power=0.80,
    ratio=2.0,  # n_treatment / n_control
    alternative='two-sided'
)
print(f"Control group n: {np.ceil(n_smaller_group):.0f}")
print(f"Treatment group n: {np.ceil(n_smaller_group * 2):.0f}")
print(f"Total N: {np.ceil(n_smaller_group) + np.ceil(n_smaller_group * 2):.0f}")

# Bonferroni-adjusted alpha
n_outcomes = 3
alpha_adjusted = 0.05 / n_outcomes

n_per_group = TTestIndPower().solve_power(
    effect_size=0.5,
    alpha=alpha_adjusted,
    power=0.80,
    alternative='two-sided'
)
print(f"Adjusted alpha: {alpha_adjusted:.4f}")
print(f"Required n per group (Bonferroni-adjusted): {np.ceil(n_per_group):.0f}")

# Sensitivity analysis: what effect could we detect?
from statsmodels.stats.power import TTestIndPower

analysis = TTestIndPower()
actual_n = 50  # actual sample size per group

detectable_d = analysis.solve_power(
    nobs1=actual_n,
    alpha=0.05,
    power=0.80,
    alternative='two-sided'
)
print(f"With n={actual_n} per group, the study could detect d >= {detectable_d:.3f} at 80% power")

Cohen's d	Power 0.80	Power 0.90	Power 0.95
0.2 (small)	394	527	651
0.3	176	235	290
0.4	100	133	164
0.5 (medium)	64	86	105
0.6	45	60	74
0.7	33	44	55
0.8 (large)	26	34	42
1.0	17	23	28
1.2	12	16	20

Power Analysis

When to Use

Protocol

1. Core Concepts

Power Analysis

When to Use

Protocol

1. Core Concepts

2. Effect Size Estimation

2.1 Effect Size Conventions (Cohen, 1988)

2.2 Converting Between Effect Sizes

3. Sample Size Calculations

3.1 Two-Sample t-test

3.2 Paired t-test

3.3 Chi-Square Test of Independence

3.4 One-Way ANOVA

3.5 Correlation

3.6 Multiple Regression

4. Adjustments

4.1 Dropout and Loss to Follow-up

4.2 Unequal Group Sizes

4.3 Multiple Primary Outcomes

5. Sample Size Table (Quick Reference)

6. Post-Hoc Power Analysis (Caution)

Checklist

Planning

Effect Size

Calculation

Documentation

References

Automation Audit Ops

Github Qa Labels

Jupyter Notebook

Tidb Integrationtest Recorder

Quality Nonconformance

Hugging Face Trackio