Incremental audio production with duration mismatch handling, adaptive stem extension, and pre-mix alignment verification
This skill provides a resilient pattern for audio production that emphasizes incremental verification, fail-fast principles, and adaptive duration handling. Each major step produces verified outputs before proceeding, with explicit strategies for handling stems of different durations.
Follow these steps in strict order. Each step must complete successfully and pass verification before proceeding to the next:
Calculate all timing parameters before generating any audio. This ensures consistent timing across all stems:
def calculate_section_transitions(bpm, total_duration_sec, sections):
"""Calculate beat-aligned transition points for song sections."""
beats_per_second = bpm / 60.0
section_durations = {}
cumulative_time = 0
for section_name, beat_count in sections.items():
duration = beat_count / beats_per_second
section_durations[section_name] = {
'start': cumulative_time,
'end': cumulative_time + duration,
'beats': beat_count,
'start_beat': cumulative_time * beats_per_second
}
cumulative_time += duration
return section_durations
# Configuration
BPM = 120
DURATION = 137
SECTIONS = {'intro': 16, 'verse': 32, 'chorus': 32, 'bridge': 16, 'outro': 16}
timing = calculate_section_transitions(BPM, DURATION, SECTIONS)
print("Timing calculated:")
for section, data in timing.items():
print(f" {section}: {data['start']:.2f}s - {data['end']:.2f}s ({data['beats']} beats)")
Validate the reference file exists and has expected properties:
import soundfile as sf
import os
def verify_reference_file(filepath, expected_sample_rate=None, min_duration=None):
"""Verify reference audio file and return info dict."""
if not os.path.exists(filepath):
raise FileNotFoundError(f"Reference file not found: {filepath}")
info = sf.info(filepath)
errors = []
if expected_sample_rate and info.samplerate != expected_sample_rate:
errors.append(f"Sample rate mismatch: expected {expected_sample_rate}, got {info.samplerate}")
if min_duration and info.duration < min_duration:
errors.append(f"Duration too short: expected >= {min_duration}s, got {info.duration}s")
if errors:
raise ValueError(f"Reference file validation failed: {'; '.join(errors)}")
print(f"Reference verified: {info.duration:.2f}s @ {info.samplerate}Hz, {info.channels}ch, {info.subtype}")
return {
'sample_rate': info.samplerate,
'duration': info.duration,
'channels': info.channels,
'subtype': info.subtype
}
# Verify reference
ref_info = verify_reference_file('reference.wav', expected_sample_rate=48000, min_duration=130)
TARGET_DURATION = ref_info['duration'] # Use reference duration as target
Generate one stem at a time, verify it immediately before proceeding to the next:
import numpy as np
def generate_stem(name, duration_sec, sample_rate, subtype='FLOAT', section_timing=None):
"""Generate a single stem with explicit sample type."""
frames = int(duration_sec * sample_rate)
t = np.linspace(0, duration_sec, frames)
# Generate stem-specific content (customize per stem type)
if name == 'bass':
freq = 110 # A2
audio_data = np.sin(2 * np.pi * freq * t) * 0.8
elif name == 'guitars':
freq = 440 # A4
audio_data = np.sin(2 * np.pi * freq * t) * 0.6
elif name == 'synths':
freq = 880 # A5
audio_data = np.sin(2 * np.pi * freq * t) * 0.5
elif name == 'bridge':
freq = 220 # A3
audio_data = np.sin(2 * np.pi * freq * t) * 0.7
else:
audio_data = np.sin(2 * np.pi * 440 * t) * 0.5
# Ensure proper data type
if subtype == 'FLOAT':
audio_data = audio_data.astype(np.float32)
elif subtype == 'PCM_24':
audio_data = np.clip(audio_data, -1, 1) * (2**23 - 1)
audio_data = audio_data.astype(np.int32)
filepath = f'{name}_stem.wav'
sf.write(filepath, audio_data, sample_rate, subtype=subtype, format='WAV')
return filepath, audio_data
def verify_stem(filepath, expected_sample_rate, expected_subtype, expected_duration):
"""Verify a single stem meets specifications."""
if not os.path.exists(filepath):
return {'success': False, 'error': f'File not found: {filepath}'}
info = sf.info(filepath)
errors = []
if info.samplerate != expected_sample_rate:
errors.append(f'sample_rate: expected {expected_sample_rate}, got {info.samplerate}')
if info.subtype != expected_subtype:
errors.append(f'subtype: expected {expected_subtype}, got {info.subtype}')
if abs(info.duration - expected_duration) > 1.0: # Allow 1s tolerance
errors.append(f'duration: expected ~{expected_duration}s, got {info.duration}s')
if errors:
return {'success': False, 'error': '; '.join(errors)}
return {'success': True, 'info': info}
# Generate stems one at a time with verification
SAMPLE_RATE = 48000
SUBTYPE = 'FLOAT'
STEM_NAMES = ['bass', 'guitars', 'synths', 'bridge']
generated_stems = []
stem_durations = {} # Track actual durations for alignment step
for stem_name in STEM_NAMES:
print(f"\n=== Generating {stem_name} stem ===")
# Generate
filepath, data = generate_stem(stem_name, DURATION, SAMPLE_RATE, subtype=SUBTYPE)
# Verify immediately
result = verify_stem(filepath, SAMPLE_RATE, SUBTYPE, DURATION)
if result['success']:
print(f"✓ {stem_name} stem verified: {result['info'].duration:.2f}s @ {result['info'].samplerate}Hz")
generated_stems.append(filepath)
stem_durations[stem_name] = result['info'].duration
else:
print(f"✗ {stem_name} stem FAILED: {result['error']}")
raise RuntimeError(f"Stem generation failed for {stem_name}: {result['error']}")
print(f"\nAll {len(generated_stems)} stems generated and verified successfully")
Drums require different processing (rhythm patterns, percussion sounds):
def generate_drum_stem(duration_sec, sample_rate, bpm, section_timing, subtype='FLOAT'):
"""Generate drum stem with rhythm patterns aligned to sections."""
frames = int(duration_sec * sample_rate)
audio_data = np.zeros(frames, dtype=np.float32)
beats_per_second = bpm / 60.0
# Simple kick drum pattern (every beat)
kick_freq = 60
kick_duration = 0.1
kick_frames = int(kick_duration * sample_rate)
for beat_time in np.arange(0, duration_sec, 1.0 / beats_per_second):
start_frame = int(beat_time * sample_rate)
end_frame = min(start_frame + kick_frames, frames)
if start_frame < frames:
t = np.linspace(0, kick_duration, end_frame - start_frame)
kick = np.exp(-5 * t) * np.sin(2 * np.pi * kick_freq * t)
audio_data[start_frame:end_frame] += kick * 0.9
# Simple snare pattern (every 2nd and 4th beat)
snare_freq = 200
snare_duration = 0.05
snare_frames = int(snare_duration * sample_rate)
for beat_time in np.arange(0, duration_sec, 2.0 / beats_per_second):
start_frame = int((beat_time + 0.5 / beats_per_second) * sample_rate)
end_frame = min(start_frame + snare_frames, frames)
if start_frame < frames:
t = np.linspace(0, snare_duration, end_frame - start_frame)
snare = np.exp(-10 * t) * np.random.uniform(-1, 1, len(t)) * 0.5
audio_data[start_frame:end_frame] += snare * 0.7
audio_data = np.clip(audio_data, -1, 1)
filepath = 'drums_stem.wav'
sf.write(filepath, audio_data, sample_rate, subtype=subtype, format='WAV')
return filepath, audio_data
print("\n=== Generating drum stem ===")
drums_filepath, drums_data = generate_drum_stem(DURATION, SAMPLE_RATE, BPM, timing, subtype=SUBTYPE)
drums_result = verify_stem(drums_filepath, SAMPLE_RATE, SUBTYPE, DURATION)
if drums_result['success']:
print(f"✓ Drum stem verified: {drums_result['info'].duration:.2f}s @ {drums_result['info'].samplerate}Hz")
generated_stems.append(drums_filepath)
stem_durations['drums'] = drums_result['info'].duration