Adaptive Stem Alignment Workflow

This skill provides a resilient pattern for audio production that emphasizes incremental verification, fail-fast principles, and adaptive duration handling. Each major step produces verified outputs before proceeding, with explicit strategies for handling stems of different durations.

Overview

Follow these steps in strict order. Each step must complete successfully and pass verification before proceeding to the next:

Early timing calculation - Derive section transitions from BPM and duration first
Verify reference audio - Validate input file properties and establish target duration
Generate and verify each stem individually - One stem at a time with immediate verification
Generate drum stem separately - Dedicated drum extension with rhythm patterns
Align stem durations - Handle duration mismatches with adaptive extension strategies
Apply effects with verification - Process each stem and verify output

Adaptive Stem Alignment Workflow

Overview

Follow these steps in strict order. Each step must complete successfully and pass verification before proceeding to the next:

Early timing calculation - Derive section transitions from BPM and duration first
Verify reference audio - Validate input file properties and establish target duration
Generate and verify each stem individually - One stem at a time with immediate verification
Generate drum stem separately - Dedicated drum extension with rhythm patterns
Align stem durations - Handle duration mismatches with adaptive extension strategies
Apply effects with verification - Process each stem and verify output

import numpy as np def generate_stem(name, duration_sec, sample_rate, subtype='FLOAT', section_timing=None): """Generate a single stem with explicit sample type.""" frames = int(duration_sec * sample_rate) t = np.linspace(0, duration_sec, frames) # Generate stem-specific content (customize per stem type) if name == 'bass': freq = 110 # A2 audio_data = np.sin(2 * np.pi * freq * t) * 0.8 elif name == 'guitars': freq = 440 # A4 audio_data = np.sin(2 * np.pi * freq * t) * 0.6 elif name == 'synths': freq = 880 # A5 audio_data = np.sin(2 * np.pi * freq * t) * 0.5 elif name == 'bridge': freq = 220 # A3 audio_data = np.sin(2 * np.pi * freq * t) * 0.7 else: audio_data = np.sin(2 * np.pi * 440 * t) * 0.5 # Ensure proper data type if subtype == 'FLOAT': audio_data = audio_data.astype(np.float32) elif subtype == 'PCM_24': audio_data = np.clip(audio_data, -1, 1) * (2**23 - 1) audio_data = audio_data.astype(np.int32) filepath = f'{name}_stem.wav' sf.write(filepath, audio_data, sample_rate, subtype=subtype, format='WAV') return filepath, audio_data def verify_stem(filepath, expected_sample_rate, expected_subtype, expected_duration): """Verify a single stem meets specifications.""" if not os.path.exists(filepath): return {'success': False, 'error': f'File not found: {filepath}'} info = sf.info(filepath) errors = [] if info.samplerate != expected_sample_rate: errors.append(f'sample_rate: expected {expected_sample_rate}, got {info.samplerate}') if info.subtype != expected_subtype: errors.append(f'subtype: expected {expected_subtype}, got {info.subtype}') if abs(info.duration - expected_duration) > 1.0: # Allow 1s tolerance errors.append(f'duration: expected ~{expected_duration}s, got {info.duration}s') if errors: return {'success': False, 'error': '; '.join(errors)} return {'success': True, 'info': info} # Generate stems one at a time with verification SAMPLE_RATE = 48000 SUBTYPE = 'FLOAT' STEM_NAMES = ['bass', 'guitars', 'synths', 'bridge'] generated_stems = [] stem_durations = {} # Track actual durations for alignment step for stem_name in STEM_NAMES: print(f"\n=== Generating {stem_name} stem ===") # Generate filepath, data = generate_stem(stem_name, DURATION, SAMPLE_RATE, subtype=SUBTYPE) # Verify immediately result = verify_stem(filepath, SAMPLE_RATE, SUBTYPE, DURATION) if result['success']: print(f"✓ {stem_name} stem verified: {result['info'].duration:.2f}s @ {result['info'].samplerate}Hz") generated_stems.append(filepath) stem_durations[stem_name] = result['info'].duration else: print(f"✗ {stem_name} stem FAILED: {result['error']}") raise RuntimeError(f"Stem generation failed for {stem_name}: {result['error']}") print(f"\nAll {len(generated_stems)} stems generated and verified successfully")

def generate_drum_stem(duration_sec, sample_rate, bpm, section_timing, subtype='FLOAT'): """Generate drum stem with rhythm patterns aligned to sections.""" frames = int(duration_sec * sample_rate) audio_data = np.zeros(frames, dtype=np.float32) beats_per_second = bpm / 60.0 # Simple kick drum pattern (every beat) kick_freq = 60 kick_duration = 0.1 kick_frames = int(kick_duration * sample_rate) for beat_time in np.arange(0, duration_sec, 1.0 / beats_per_second): start_frame = int(beat_time * sample_rate) end_frame = min(start_frame + kick_frames, frames) if start_frame < frames: t = np.linspace(0, kick_duration, end_frame - start_frame) kick = np.exp(-5 * t) * np.sin(2 * np.pi * kick_freq * t) audio_data[start_frame:end_frame] += kick * 0.9 # Simple snare pattern (every 2nd and 4th beat) snare_freq = 200 snare_duration = 0.05 snare_frames = int(snare_duration * sample_rate) for beat_time in np.arange(0, duration_sec, 2.0 / beats_per_second): start_frame = int((beat_time + 0.5 / beats_per_second) * sample_rate) end_frame = min(start_frame + snare_frames, frames) if start_frame < frames: t = np.linspace(0, snare_duration, end_frame - start_frame) snare = np.exp(-10 * t) * np.random.uniform(-1, 1, len(t)) * 0.5 audio_data[start_frame:end_frame] += snare * 0.7 audio_data = np.clip(audio_data, -1, 1) filepath = 'drums_stem.wav' sf.write(filepath, audio_data, sample_rate, subtype=subtype, format='WAV') return filepath, audio_data print("\n=== Generating drum stem ===") drums_filepath, drums_data = generate_drum_stem(DURATION, SAMPLE_RATE, BPM, timing, subtype=SUBTYPE) drums_result = verify_stem(drums_filepath, SAMPLE_RATE, SUBTYPE, DURATION) if drums_result['success']: print(f"✓ Drum stem verified: {drums_result['info'].duration:.2f}s @ {drums_result['info'].samplerate}Hz") generated_stems.append(drums_filepath) stem_durations['drums'] = drums_result['info'].duration

Adaptive Stem Alignment

Adaptive Stem Alignment Workflow

Overview

Adaptive Stem Alignment

Adaptive Stem Alignment Workflow

Overview

Key Differences from Standard Workflow

Step 1: Calculate Timing Parameters (Early)

Step 2: Verify Reference Audio

Step 3: Generate and Verify Each Stem Individually

Step 4: Generate Drum Stem Separately

Songsee

Video Frames

Gifgrep

Qqbot Media

Camsnap

Openai Whisper Api