Name: Spectroscopy Analysis Guide
Author: wentorai

スキルを検索.../

Spectroscopy Analysis Guide | Skills Pool

import numpy as np
from scipy.signal import find_peaks, savgol_filter

def read_jcamp(filepath: str) -> dict:
    """
    Read a JCAMP-DX spectral file.
    Returns x (wavenumber/chemical shift/m/z) and y (intensity) arrays.
    """
    x_data, y_data = [], []
    metadata = {}

    with open(filepath, "r") as f:
        for line in f:
            line = line.strip()
            if line.startswith("##"):
                key_val = line[2:].split("=", 1)
                if len(key_val) == 2:
                    metadata[key_val[0].strip()] = key_val[1].strip()
            elif line and not line.startswith("$$"):
                parts = line.split()
                try:
                    values = [float(v) for v in parts]
                    if len(values) >= 2:
                        x_data.append(values[0])
                        y_data.extend(values[1:])
                except ValueError:
                    continue

    return {
        "x": np.array(x_data),
        "y": np.array(y_data[:len(x_data)]),
        "metadata": metadata,
    }

import nmrglue as ng

def process_1h_nmr(bruker_dir: str) -> dict:
    """
    Process 1H NMR data from Bruker format using nmrglue.
    bruker_dir: path to Bruker experiment directory
    """
    # Read raw data
    dic, data = ng.bruker.read(bruker_dir)

    # Apply processing
    data = ng.bruker.remove_digital_filter(dic, data)
    data = ng.proc_base.zf_size(data, 65536)     # zero-fill
    data = ng.proc_base.fft(data)                  # Fourier transform
    data = ng.proc_autophase.autops(data, "acme")  # automatic phasing
    data = ng.proc_base.rev(data)                  # reverse spectrum
    data = ng.proc_base.di(data)                   # discard imaginary

    # Generate chemical shift axis (ppm)
    udic = ng.bruker.guess_udic(dic, data)
    uc = ng.fileiobase.uc_from_udic(udic)
    ppm = uc.ppm_scale()

    return {
        "ppm": ppm,
        "spectrum": data.real,
        "sf": dic["acqus"]["SFO1"],       # spectrometer frequency (MHz)
        "sw_ppm": dic["acqus"]["SW"],       # sweep width (ppm)
    }

def pick_nmr_peaks(ppm: np.ndarray, spectrum: np.ndarray,
                    threshold: float = 0.05) -> list[dict]:
    """
    Automatic peak picking for 1H NMR.
    threshold: minimum peak height as fraction of max intensity.
    """
    min_height = threshold * np.max(spectrum)
    indices, properties = find_peaks(
        spectrum, height=min_height, distance=10, prominence=min_height * 0.5
    )

    peaks = []
    for idx in indices:
        peaks.append({
            "ppm": round(float(ppm[idx]), 3),
            "intensity": float(spectrum[idx]),
        })

    # Sort by chemical shift (high to low, NMR convention)
    peaks.sort(key=lambda p: p["ppm"], reverse=True)
    return peaks

Chemical Shift (ppm)	Functional Group
0.8-1.0	CH3 (methyl, alkyl)
1.2-1.4	CH2 (methylene, alkyl chain)
2.0-2.5	CH next to C=O
3.3-3.9	CH next to O or N (ethers, amines)
4.5-5.5	Vinyl C=CH2, OCH
6.5-8.5	Aromatic H
9.0-10.0	Aldehyde CHO
10.0-12.0	Carboxylic acid OH

from pyteomics import mzml
import numpy as np

def read_mzml_spectra(filepath: str, ms_level: int = 1) -> list[dict]:
    """
    Read mass spectra from an mzML file.
    ms_level: 1 for MS1 (survey scans), 2 for MS/MS
    """
    spectra = []
    with mzml.read(filepath) as reader:
        for spectrum in reader:
            if spectrum.get("ms level") == ms_level:
                spectra.append({
                    "scan": spectrum["index"],
                    "rt": spectrum["scanList"]["scan"][0].get(
                        "scan start time", 0
                    ),
                    "mz": spectrum["m/z array"],
                    "intensity": spectrum["intensity array"],
                    "tic": np.sum(spectrum["intensity array"]),
                })
    return spectra

def find_molecular_ion(mz: np.ndarray, intensity: np.ndarray,
                        expected_mw: float = None,
                        tolerance_da: float = 0.5) -> list[dict]:
    """
    Identify molecular ion peaks ([M+H]+, [M+Na]+, [M-H]-).
    """
    # Find top peaks
    top_indices = np.argsort(intensity)[::-1][:20]
    candidates = []

    adducts = {
        "[M+H]+": 1.00728,
        "[M+Na]+": 22.98922,
        "[M+K]+": 38.96316,
        "[M-H]-": -1.00728,
        "[M+NH4]+": 18.03437,
    }

    for idx in top_indices:
        peak_mz = mz[idx]
        peak_int = intensity[idx]

        if expected_mw:
            for adduct_name, adduct_mass in adducts.items():
                calc_mw = peak_mz - adduct_mass
                if abs(calc_mw - expected_mw) < tolerance_da:
                    candidates.append({
                        "mz": round(float(peak_mz), 4),
                        "intensity": float(peak_int),
                        "adduct": adduct_name,
                        "calc_mw": round(calc_mw, 4),
                        "error_da": round(abs(calc_mw - expected_mw), 4),
                    })
        else:
            candidates.append({
                "mz": round(float(peak_mz), 4),
                "intensity": float(peak_int),
            })

    return candidates

# Standard IR functional group frequency table
IR_ASSIGNMENTS = {
    (3200, 3600): "O-H stretch (broad: alcohol, acid; sharp: free OH)",
    (3300, 3500): "N-H stretch (primary amine: 2 bands; secondary: 1 band)",
    (2850, 3000): "C-H stretch (sp3: 2850-2960; sp2: 3000-3100)",
    (2100, 2260): "Triple bond stretch (C-triple-N: 2210-2260; C-triple-C: 2100-2150)",
    (1680, 1750): "C=O stretch (ketone ~1715; ester ~1735; acid ~1710; amide ~1650)",
    (1600, 1680): "C=C stretch (alkene ~1640; aromatic ~1600, 1500)",
    (1000, 1300): "C-O stretch (ether, ester, alcohol)",
}

def assign_ir_peaks(wavenumber: np.ndarray, absorbance: np.ndarray,
                     threshold: float = 0.1) -> list[dict]:
    """Detect and assign IR absorption peaks to functional groups."""
    # Invert for peak detection (absorbance peaks are positive)
    peaks, properties = find_peaks(absorbance, height=threshold, prominence=0.05)

    assignments = []
    for idx in peaks:
        wn = float(wavenumber[idx])
        assignment = "unassigned"
        for (low, high), group in IR_ASSIGNMENTS.items():
            if low <= wn <= high:
                assignment = group
                break
        assignments.append({
            "wavenumber_cm-1": round(wn, 1),
            "absorbance": round(float(absorbance[idx]), 4),
            "assignment": assignment,
        })

    return sorted(assignments, key=lambda x: x["wavenumber_cm-1"], reverse=True)

def baseline_correction(y: np.ndarray, lam: float = 1e6,
                         p: float = 0.001, n_iter: int = 10) -> np.ndarray:
    """
    Asymmetric least squares baseline correction (Eilers and Boelens, 2005).
    lam: smoothness parameter (larger = smoother baseline)
    p: asymmetry parameter (smaller = more emphasis on fitting below peaks)
    """
    from scipy.sparse import diags, csc_matrix
    from scipy.sparse.linalg import spsolve

    L = len(y)
    D = diags([1, -2, 1], [0, -1, -2], shape=(L, L - 2)).toarray()
    H = lam * D.dot(D.T)
    w = np.ones(L)

    for _ in range(n_iter):
        W = diags(w, 0, shape=(L, L))
        Z = csc_matrix(W + H)
        baseline = spsolve(Z, w * y)
        w = p * (y > baseline) + (1 - p) * (y < baseline)

    return y - baseline

def smooth_spectrum(y: np.ndarray, window: int = 11,
                     polyorder: int = 3) -> np.ndarray:
    """Apply Savitzky-Golay smoothing to a spectrum."""
    return savgol_filter(y, window, polyorder)

Format	Spectroscopy	Description
JCAMP-DX (.jdx, .dx)	All types	IUPAC standard exchange format
Bruker (1r, fid, acqu)	NMR	Raw and processed Bruker data
mzML / mzXML	MS	Open mass spectrometry format
SPC (.spc)	IR, UV-Vis	Galactic/Thermo spectral format
CSV / TXT	All	Simple x,y pairs (wavelength/wavenumber, intensity)

Format	Spectroscopy	Description
JCAMP-DX (.jdx, .dx)	All types	IUPAC standard exchange format
Bruker (1r, fid, acqu)	NMR	Raw and processed Bruker data
mzML / mzXML	MS	Open mass spectrometry format
SPC (.spc)	IR, UV-Vis	Galactic/Thermo spectral format
CSV / TXT	All	Simple x,y pairs (wavelength/wavenumber, intensity)

Spectroscopy Analysis Guide

Spectral Data Formats

Common File Formats

Reading Spectral Data

Spectroscopy Analysis Guide

Spectral Data Formats

Common File Formats

Reading Spectral Data

NMR Spectroscopy

1H NMR Processing

Common 1H NMR Chemical Shift Ranges

Mass Spectrometry

Processing MS Data

Infrared Spectroscopy

IR Peak Assignment

Spectral Processing Utilities

Baseline Correction and Smoothing

Tools and Software

Database Migrations Migration Observability

Computer Vision Expert

Ai Studio Image

Astropy

Performance Engineer

Cosmosdb Datamodeling