Radiocarbon Dating Analysis

Radiocarbon (14C) dating is the most widely used chronometric technique in archaeology, providing calendar age estimates for organic materials up to ~50,000 years old. This skill covers calibration against internationally agreed reference curves, Bayesian sequence modeling, wiggle matching, and proper reporting conventions.

Background

Raw radiocarbon measurements are reported as "conventional radiocarbon ages" in years BP (Before Present, where Present = 1950 CE). Because atmospheric 14C concentration has varied over time, a calibration step is required to convert these measurements into calendar ages. The IntCal20, SHCal20, and Marine20 calibration curves (Reimer et al. 2020) provide the relationship between radiocarbon years and calendar years.

Key Concepts

BP (Before Present): Radiocarbon years before 1950 CE
cal BP: Calibrated (calendar) years before 1950 CE
BCE/CE: Calendar years; BCE = cal BP + 1950 (for dates before 1950)
: Per-mil deviation of 14C/12C ratio from a standard

import numpy as np import pandas as pd import requests from io import StringIO from pathlib import Path def load_calibration_curve(curve_name: str = "intcal20") -> pd.DataFrame: """ Load a calibration curve from file or download from intcal.org. Parameters ---------- curve_name : str One of 'intcal20', 'shcal20', 'marine20' Returns ------- pd.DataFrame Columns: cal_BP, c14_age, c14_error, delta14C, delta14C_error """ curve_urls = { "intcal20": "https://intcal.org/curves/intcal20.14c", "shcal20": "https://intcal.org/curves/shcal20.14c", "marine20": "https://intcal.org/curves/marine20.14c", } local_paths = { "intcal20": Path("intcal20.14c"), "shcal20": Path("shcal20.14c"), "marine20": Path("marine20.14c"), } curve_name = curve_name.lower() if curve_name not in curve_urls: raise ValueError(f"Unknown curve: {curve_name}. Choose from {list(curve_urls)}") local = local_paths[curve_name] if local.exists(): raw = local.read_text() else: print(f"Downloading {curve_name} from intcal.org ...") response = requests.get(curve_urls[curve_name], timeout=30) response.raise_for_status() raw = response.text local.write_text(raw) # Skip comment lines starting with '#' lines = [ln for ln in raw.splitlines() if not ln.strip().startswith("#") and ln.strip()] text = "\n".join(lines) df = pd.read_csv( StringIO(text), header=None, names=["cal_BP", "c14_age", "c14_error", "delta14C", "delta14C_error"], sep=r"\s*,\s*", engine="python", ) df = df.sort_values("cal_BP").reset_index(drop=True) return df def _interpolate_curve(curve_df: pd.DataFrame, cal_years: np.ndarray): """Interpolate curve c14_age and c14_error at given cal BP values.""" mu = np.interp(cal_years, curve_df["cal_BP"].values, curve_df["c14_age"].values) sigma = np.interp(cal_years, curve_df["cal_BP"].values, curve_df["c14_error"].values) return mu, sigma

import matplotlib.pyplot as plt import matplotlib.gridspec as gridspec from scipy.stats import norm def calibrate_date( c14_age: float, c14_error: float, curve_df: pd.DataFrame, cal_range: tuple[float, float] = (0, 55000), resolution: float = 1.0, ) -> pd.DataFrame: """ Calibrate a single radiocarbon date against a calibration curve. Parameters ---------- c14_age : float Conventional radiocarbon age in BP. c14_error : float 1-sigma laboratory measurement error. curve_df : pd.DataFrame Calibration curve loaded by load_calibration_curve(). cal_range : tuple (min_cal_BP, max_cal_BP) search window. resolution : float Step size in calendar years (default 1 year). Returns ------- pd.DataFrame Columns: cal_BP, probability (unnormalized then normalized), cal_CE. """ cal_years = np.arange(cal_range[0], cal_range[1] + resolution, resolution) curve_mu, curve_sigma = _interpolate_curve(curve_df, cal_years) # Combined variance: measurement error + calibration curve error combined_sigma = np.sqrt(c14_error**2 + curve_sigma**2) # Likelihood: N(c14_age | curve_mu, combined_sigma) prob = norm.pdf(c14_age, loc=curve_mu, scale=combined_sigma) prob /= prob.sum() # normalize to sum to 1 cal_CE = 1950 - cal_years # negative = BCE return pd.DataFrame({ "cal_BP": cal_years, "cal_CE": cal_CE, "probability": prob, }) def hpd_intervals(cal_dist: pd.DataFrame, confidence: float = 0.95) -> list[tuple]: """ Compute Highest Posterior Density (HPD) intervals. Parameters ---------- cal_dist : pd.DataFrame Output of calibrate_date(). confidence : float Credible mass (e.g. 0.68 or 0.95). Returns ------- list of (start_cal_BP, end_cal_BP, probability_mass) tuples """ sorted_idx = np.argsort(cal_dist["probability"].values)[::-1] cum_prob = np.cumsum(cal_dist["probability"].values[sorted_idx]) in_hpd = np.zeros(len(cal_dist), dtype=bool) in_hpd[sorted_idx[cum_prob <= confidence]] = True # Also include the first point that pushes past the threshold if cum_prob[np.sum(cum_prob <= confidence)] <= confidence + 1e-9: idx = np.sum(cum_prob <= confidence) if idx < len(sorted_idx): in_hpd[sorted_idx[idx]] = True cal_years = cal_dist["cal_BP"].values intervals = [] in_block = False start = None block_prob = 0.0 for i, (yr, flag, p) in enumerate( zip(cal_years, in_hpd, cal_dist["probability"].values) ): if flag and not in_block: in_block = True start = yr block_prob = 0.0 if flag: block_prob += p if not flag and in_block: intervals.append((start, cal_years[i - 1], block_prob)) in_block = False if in_block: intervals.append((start, cal_years[-1], block_prob)) return intervals def summary_statistics(cal_dist: pd.DataFrame) -> dict: """ Compute summary statistics for a calibrated date distribution. Returns dict with keys: mean_cal_BP, median_cal_BP, mode_cal_BP, sigma_cal_BP, hpd68, hpd95 (each as list of interval tuples). """ probs = cal_dist["probability"].values years = cal_dist["cal_BP"].values mean_bp = np.sum(years * probs) variance = np.sum(probs * (years - mean_bp) ** 2) sigma = np.sqrt(variance) cdf = np.cumsum(probs) median_bp = years[np.searchsorted(cdf, 0.5)] mode_bp = years[np.argmax(probs)] return { "mean_cal_BP": float(mean_bp), "median_cal_BP": float(median_bp), "mode_cal_BP": float(mode_bp), "sigma_cal_BP": float(sigma), "mean_cal_CE": float(1950 - mean_bp), "hpd68": hpd_intervals(cal_dist, 0.68), "hpd95": hpd_intervals(cal_dist, 0.95), } def plot_calibrated_date( cal_dist: pd.DataFrame, title: str = "Calibrated Radiocarbon Date", c14_age: float = None, c14_error: float = None, curve_df: pd.DataFrame = None, show_hpd: bool = True, output_path: str = None, ) -> None: """ Plot a calibrated radiocarbon date with optional calibration curve overlay. Parameters ---------- cal_dist : pd.DataFrame Output of calibrate_date(). title : str Plot title. c14_age, c14_error : float, optional Original measurement for Gaussian overlay on 14C axis. curve_df : pd.DataFrame, optional Calibration curve to display. show_hpd : bool Shade 95% HPD intervals. output_path : str, optional Save figure to this path if provided. """ fig = plt.figure(figsize=(12, 8)) gs = gridspec.GridSpec(2, 2, width_ratios=[3, 1], height_ratios=[1, 3], hspace=0.05, wspace=0.05) ax_curve = fig.add_subplot(gs[0, 0]) # top: 14C curve ax_cal = fig.add_subplot(gs[1, 0]) # bottom-left: calendar dist ax_c14 = fig.add_subplot(gs[1, 1]) # bottom-right: 14C Gaussian cal_years = cal_dist["cal_BP"].values probs = cal_dist["probability"].values # --- Calibration curve panel --- if curve_df is not None: mask = (curve_df["cal_BP"] >= cal_years.min()) & \ (curve_df["cal_BP"] <= cal_years.max()) sub = curve_df[mask] ax_curve.plot(sub["cal_BP"], sub["c14_age"], "k-", lw=1.5, label="IntCal20") ax_curve.fill_between( sub["cal_BP"], sub["c14_age"] - 2 * sub["c14_error"], sub["c14_age"] + 2 * sub["c14_error"], alpha=0.3, color="gray", ) if c14_age is not None: ax_curve.axhline(c14_age, color="red", lw=1, ls="--") ax_curve.axhspan(c14_age - c14_error, c14_age + c14_error, color="red", alpha=0.15) ax_curve.set_ylabel("14C Age (BP)") ax_curve.xaxis.set_visible(False) ax_curve.invert_xaxis() # --- Calendar probability panel --- ax_cal.fill_between(cal_years, probs, alpha=0.5, color="steelblue") ax_cal.plot(cal_years, probs, color="steelblue", lw=1) if show_hpd: intervals_95 = hpd_intervals(cal_dist, 0.95) for start, end, mass in intervals_95: mask = (cal_years >= start) & (cal_years <= end) ax_cal.fill_between(cal_years[mask], probs[mask], alpha=0.6, color="steelblue") ax_cal.set_xlabel("Calibrated Age (cal BP)") ax_cal.set_ylabel("Probability Density") ax_cal.invert_xaxis() # --- 14C Gaussian panel --- if c14_age is not None and c14_error is not None: y_range = np.linspace(c14_age - 4 * c14_error, c14_age + 4 * c14_error, 300) gauss = norm.pdf(y_range, c14_age, c14_error) ax_c14.plot(gauss, y_range, color="red", lw=1.5) ax_c14.fill_betweenx(y_range, gauss, alpha=0.3, color="red") ax_c14.yaxis.set_visible(False) ax_c14.set_xlabel("Probability") fig.suptitle(title, fontsize=14, fontweight="bold") if output_path: plt.savefig(output_path, dpi=150, bbox_inches="tight") plt.tight_layout() plt.show()

from scipy.optimize import minimize from typing import Optional def sequence_model( dates: list[dict], constraints: list[tuple[int, int]] = None, n_iterations: int = 50000, burn_in: int = 10000, seed: int = 42, ) -> list[dict]: """ Bayesian sequence model using MCMC for stratigraphically ordered dates. Each date dict must have keys: label: str c14_age: float (BP) c14_error: float curve_df: pd.DataFrame (calibration curve) constraints: list of (i, j) meaning date[i] is OLDER than date[j] (i.e., date[i].cal_BP > date[j].cal_BP) Returns list of dicts with posterior samples and statistics. Parameters ---------- dates : list of dict Each entry: {label, c14_age, c14_error, curve_df} constraints : list of (int, int) Ordering constraints: (older_index, younger_index) n_iterations : int Total MCMC iterations. burn_in : int Discarded warm-up iterations. seed : int Random seed for reproducibility. """ rng = np.random.default_rng(seed) n = len(dates) # Initialize theta (cal BP values) from unconstrained calibrations theta = np.zeros(n) cal_dists = [] for i, d in enumerate(dates): cd = calibrate_date(d["c14_age"], d["c14_error"], d["curve_df"]) cal_dists.append(cd) theta[i] = float(summary_statistics(cd)["median_cal_BP"]) if constraints is None: constraints = [] def log_likelihood(th): ll = 0.0 for i, d in enumerate(dates): cd = cal_dists[i] # Interpolate probability at this theta value prob = np.interp(th[i], cd["cal_BP"].values, cd["probability"].values, left=0.0, right=0.0) if prob <= 0: return -np.inf ll += np.log(prob) return ll def satisfies_constraints(th): for (older, younger) in constraints: if th[older] <= th[younger]: # older must have larger cal_BP return False return True # Metropolis-Hastings MCMC proposal_sigma = 50.0 # years current_ll = log_likelihood(theta) samples = np.zeros((n_iterations - burn_in, n)) n_accepted = 0 for it in range(n_iterations): # Propose new theta by perturbing one date at a time idx = rng.integers(0, n) theta_prop = theta.copy() theta_prop[idx] += rng.normal(0, proposal_sigma) if not satisfies_constraints(theta_prop): continue prop_ll = log_likelihood(theta_prop) log_alpha = prop_ll - current_ll if np.log(rng.uniform()) < log_alpha: theta = theta_prop current_ll = prop_ll n_accepted += 1 if it >= burn_in: samples[it - burn_in] = theta acceptance_rate = n_accepted / n_iterations print(f"MCMC acceptance rate: {acceptance_rate:.3f}") results = [] for i, d in enumerate(dates): samp = samples[:, i] results.append({ "label": d["label"], "c14_age": d["c14_age"], "c14_error": d["c14_error"], "posterior_samples": samp, "mean_cal_BP": float(np.mean(samp)), "median_cal_BP": float(np.median(samp)), "std_cal_BP": float(np.std(samp)), "hpd95_lower": float(np.percentile(samp, 2.5)), "hpd95_upper": float(np.percentile(samp, 97.5)), "mean_cal_CE": float(1950 - np.mean(samp)), }) return results def wiggle_match( radiocarbon_ages: list[float], errors: list[float], time_gaps: list[float], curve_df: pd.DataFrame, search_range: tuple = (0, 10000), ) -> dict: """ Wiggle matching: fit a floating tree-ring sequence to the calibration curve. Parameters ---------- radiocarbon_ages : list of float 14C ages (BP) for each ring, outermost first. errors : list of float Measurement errors for each ring. time_gaps : list of float Known gaps in years between rings (len = len(radiocarbon_ages) - 1). curve_df : pd.DataFrame Calibration curve. search_range : tuple (min_cal_BP, max_cal_BP) for the outermost ring. Returns ------- dict with best_fit_cal_BP (outermost ring), chi2, and probability array. """ n_rings = len(radiocarbon_ages) candidate_years = np.arange(search_range[0], search_range[1]) chi2_values = np.zeros(len(candidate_years)) for idx, outer_cal_BP in enumerate(candidate_years): chi2 = 0.0 for ring_i in range(n_rings): gap = sum(time_gaps[:ring_i]) if ring_i > 0 else 0 cal_bp_ring = outer_cal_BP + gap curve_mu, curve_sigma = _interpolate_curve(curve_df, np.array([cal_bp_ring])) combined_var = errors[ring_i]**2 + curve_sigma[0]**2 chi2 += (radiocarbon_ages[ring_i] - curve_mu[0])**2 / combined_var chi2_values[idx] = chi2 prob = np.exp(-0.5 * chi2_values) prob /= prob.sum() best_idx = np.argmin(chi2_values) return { "best_fit_cal_BP": float(candidate_years[best_idx]), "best_fit_cal_CE": float(1950 - candidate_years[best_idx]), "chi2_min": float(chi2_values[best_idx]), "candidate_years": candidate_years, "probability": prob, }

# Example 1: Single date calibration # A charcoal sample from a hearth, measured at 2850 ± 35 BP def example_single_date(): print("=== Example 1: Single Date Calibration ===\n") # Load calibration curve (will download on first run) curve = load_calibration_curve("intcal20") print(f"Loaded IntCal20: {len(curve)} data points") # Calibrate the date c14_age = 2850 c14_error = 35 cal_dist = calibrate_date(c14_age, c14_error, curve_df=curve) # Summary statistics stats = summary_statistics(cal_dist) print(f"\nRadiocarbon age: {c14_age} ± {c14_error} BP") print(f"Calibrated mean: {stats['mean_cal_BP']:.0f} cal BP " f"({stats['mean_cal_CE']:.0f} CE/BCE)") print(f"Calibrated median: {stats['median_cal_BP']:.0f} cal BP") print(f"Sigma (1σ): ± {stats['sigma_cal_BP']:.0f} years") print("\n68.3% HPD intervals (cal BP):") for start, end, mass in stats["hpd68"]: ce_start = 1950 - end ce_end = 1950 - start era_s = "BCE" if ce_start < 0 else "CE" era_e = "BCE" if ce_end < 0 else "CE" print(f" {start:.0f} – {end:.0f} cal BP " f"({abs(ce_start):.0f} {era_s} – {abs(ce_end):.0f} {era_e}) " f"[{mass*100:.1f}%]") print("\n95.4% HPD intervals (cal BP):") for start, end, mass in stats["hpd95"]: ce_start = 1950 - end ce_end = 1950 - start era_s = "BCE" if ce_start < 0 else "CE" era_e = "BCE" if ce_end < 0 else "CE" print(f" {start:.0f} – {end:.0f} cal BP " f"({abs(ce_start):.0f} {era_s} – {abs(ce_end):.0f} {era_e}) " f"[{mass*100:.1f}%]") # Plot (comment out in headless environments) plot_calibrated_date( cal_dist, title=f"Charcoal from Hearth — {c14_age} ± {c14_error} BP", c14_age=c14_age, c14_error=c14_error, curve_df=curve, output_path="calibrated_hearth.png", ) print("\nPlot saved to calibrated_hearth.png") if __name__ == "__main__": example_single_date()

# Example 2: Bayesian sequence model — Bronze Age burial site # Five radiocarbon dates from successive burial layers (top to bottom = younger to older) def example_burial_sequence(): print("=== Example 2: Bayesian Sequence Model — Bronze Age Burial Site ===\n") curve = load_calibration_curve("intcal20") # Dates from burial layers — outermost (youngest) to innermost (oldest) burial_dates = [ {"label": "Layer 1 (youngest)", "c14_age": 2980, "c14_error": 40, "curve_df": curve}, {"label": "Layer 2", "c14_age": 3060, "c14_error": 35, "curve_df": curve}, {"label": "Layer 3", "c14_age": 3150, "c14_error": 45, "curve_df": curve}, {"label": "Layer 4", "c14_age": 3280, "c14_error": 50, "curve_df": curve}, {"label": "Layer 5 (oldest)", "c14_age": 3390, "c14_error": 40, "curve_df": curve}, ] # Stratigraphic constraints: each layer must be younger than the one below # Index 0 (youngest) < index 1 < index 2 < index 3 < index 4 (oldest) # constraint (i, j): date[i] is older (larger cal BP) than date[j] constraints = [ (1, 0), # Layer 2 older than Layer 1 (2, 1), # Layer 3 older than Layer 2 (3, 2), # Layer 4 older than Layer 3 (4, 3), # Layer 5 older than Layer 4 ] print("Running MCMC sequence model (this may take 10–30 seconds)...") results = sequence_model( burial_dates, constraints=constraints, n_iterations=30000, burn_in=5000, seed=2024, ) print("\nPosterior estimates (sequence-constrained):\n") print(f"{'Label':<25} {'Mean (cal BP)':>14} {'95% HPD (cal BP)':>22} {'Mean (CE/BCE)':>14}") print("-" * 80) for r in results: ce = r["mean_cal_CE"] era = "BCE" if ce < 0 else "CE" print( f"{r['label']:<25} " f"{r['mean_cal_BP']:>14.0f} " f"{r['hpd95_lower']:>10.0f} – {r['hpd95_upper']:<10.0f} " f"{abs(ce):>10.0f} {era}" ) # Check that sequence is respected print("\nSequence consistency check:") for i in range(len(results) - 1): older = results[i + 1]["mean_cal_BP"] younger = results[i]["mean_cal_BP"] ok = "OK" if older > younger else "VIOLATION" print(f" {results[i+1]['label']} > {results[i]['label']}: " f"{older:.0f} > {younger:.0f} [{ok}]") # Plot posterior distributions fig, axes = plt.subplots(len(results), 1, figsize=(12, 10), sharex=True) colors = plt.cm.viridis(np.linspace(0.2, 0.8, len(results))) for i, (r, ax) in enumerate(zip(results, axes)): samp = r["posterior_samples"] ax.hist(samp, bins=80, density=True, color=colors[i], alpha=0.7, edgecolor="none") ax.axvline(r["mean_cal_BP"], color="red", lw=1.5, ls="--", label="Mean") ax.axvspan(r["hpd95_lower"], r["hpd95_upper"], alpha=0.15, color="red") ax.set_ylabel(r["label"][:20], fontsize=8) ax.yaxis.set_ticklabels([]) axes[-1].set_xlabel("Calibrated Age (cal BP)") axes[-1].invert_xaxis() fig.suptitle("Bayesian Sequence Model — Bronze Age Burial Site", fontweight="bold") plt.tight_layout() plt.savefig("burial_sequence.png", dpi=150, bbox_inches="tight") print("\nSequence plot saved to burial_sequence.png") if __name__ == "__main__": example_burial_sequence()

Radiocarbon Dating