Create audiobooks from web content or text files. Handles content fetching, text processing, and TTS conversion with automatic fallback between ElevenLabs, OpenAI TTS, and gTTS.
name audiobook description Create audiobooks from web content or text files. Handles content fetching, text processing, and TTS conversion with automatic fallback between ElevenLabs, OpenAI TTS, and gTTS. Audiobook Creation Guide Create audiobooks from web articles, essays, or text files. This skill covers the full pipeline: content fetching, text processing, and audio generation. Quick Start import os
def get_tts_provider (): if os.environ.get( "ELEVENLABS_API_KEY" ): return "elevenlabs" elif os.environ.get( "OPENAI_API_KEY" ): return "openai" else : return "gtts"
provider = get_tts_provider() print ( f"Using TTS provider: {provider} " ) Step 1: Fetching Web Content IMPORTANT: Verify fetched content is complete WebFetch and similar tools may return summaries instead of full text. Always verify: import subprocess def fetch_article_content ( url ): """Fetch article content using curl for reliability."""
result = subprocess.run( [ "curl" , "-s" , url], capture_output= True , text= True ) html = result.stdout
import re text = re.sub( r'<script[^>]>.?</script>' , '' , html, flags=re.DOTALL) text = re.sub( r'<style[^>]>.?</style>' , '' , html, flags=re.DOTALL) text = re.sub( r'<[^>]+>' , ' ' , text) text = re.sub( r'\s+' , ' ' , text).strip() return text Content verification checklist Before converting to audio, verify: Text length is reasonable for the source (articles typically 1,000-10,000+ words) Content includes actual article text, not just navigation/headers No "summary" or "key points" headers that indicate truncation def verify_content ( text, expected_min_chars= 1000 ): """Basic verification that content is complete.""" if len (text) < expected_min_chars: print ( f"WARNING: Content may be truncated ( { len (text)} chars)" ) return False if "summary" in text.lower()[: 500 ] or "key points" in text.lower()[: 500 ]: print ( "WARNING: Content appears to be a summary, not full text" ) return False return True Step 2: Text Processing Clean and prepare text for TTS import re def clean_text_for_tts ( text ): """Clean text for better TTS output."""
text = re.sub( r'http[s]?://\S+' , '' , text)
text = re.sub( r'[\d+]' , '' , text)
text = re.sub( r'\s+' , ' ' , text)
text = re.sub( r'[^\w\s.,!?;:'"()-]' , '' , text) return text.strip() def chunk_text ( text, max_chars= 4000 ): """Split text into chunks at sentence boundaries.""" sentences = re.split( r'(?<=[.!?])\s+' , text) chunks = [] current_chunk = "" for sentence in sentences: if len (current_chunk) + len (sentence) < max_chars: current_chunk += sentence + " " else : if current_chunk: chunks.append(current_chunk.strip()) current_chunk = sentence + " " if current_chunk: chunks.append(current_chunk.strip()) return chunks Step 3: TTS Conversion with Fallback Automatic provider selection import os import subprocess def create_audiobook ( text, output_path ): """Convert text to audiobook with automatic TTS provider selection."""
has_elevenlabs = bool (os.environ.get( "ELEVENLABS_API_KEY" )) has_openai = bool (os.environ.get( "OPENAI_API_KEY" )) if has_elevenlabs: print ( "Using ElevenLabs TTS (highest quality)" ) return create_with_elevenlabs(text, output_path) elif has_openai: print ( "Using OpenAI TTS (high quality)" ) return create_with_openai(text, output_path) else : print ( "Using gTTS (free, no API key required)" ) return create_with_gtts(text, output_path) ElevenLabs implementation import requests def create_with_elevenlabs ( text, output_path ): """Generate audiobook using ElevenLabs API.""" api_key = os.environ.get( "ELEVENLABS_API_KEY" ) voice_id = "21m00Tcm4TlvDq8ikWAM"
{response.text} " ) return False return concatenate_audio(audio_files, output_path) OpenAI TTS implementation def create_with_openai ( text, output_path ): """Generate audiobook using OpenAI TTS API.""" api_key = os.environ.get( "OPENAI_API_KEY" )
chunks = chunk_text(text, max_chars=
4000 ) audio_files = [] for i, chunk in enumerate (chunks): chunk_file = f"/tmp/chunk_ {i:03d} .mp3" response = requests.post( "https://api.openai.com/v1/audio/speech" , headers={ "Authorization" : f"Bearer {api_key} " , "Content-Type" : "application/json" }, json={ "model" : "tts-1" , "input" : chunk, "voice" : "onyx" ,
{response.text} " ) return False return concatenate_audio(audio_files, output_path) gTTS implementation (free fallback) def create_with_gtts ( text, output_path ): """Generate audiobook using gTTS (free, no API key).""" from gtts import gTTS from pydub import AudioSegment
chunks = chunk_text(text, max_chars=
4500 ) audio_files = [] for i, chunk in enumerate (chunks): chunk_file = f"/tmp/chunk_ {i:03d} .mp3" tts = gTTS(text=chunk, lang= 'en' , slow= False ) tts.save(chunk_file) audio_files.append(chunk_file) return concatenate_audio(audio_files, output_path) Audio concatenation def concatenate_audio ( audio_files, output_path ): """Concatenate multiple audio files using ffmpeg.""" if not audio_files: return False
list_file = "/tmp/audio_list.txt" with open (list_file, "w" ) as f: for audio_file in audio_files: f.write( f"file ' {audio_file} '\n" )
result = subprocess.run([ "ffmpeg" , "-y" , "-f" , "concat" , "-safe" , "0" , "-i" , list_file, "-c" , "copy" , output_path ], capture_output= True )
import os for f in audio_files: os.unlink(f) os.unlink(list_file) return result.returncode == 0 Complete Example #!/usr/bin/env python3 """Create audiobook from web articles.""" import os import re import subprocess import requests
def main ():
urls = [ "https://example.com/article1" , "https://example.com/article2" ]
all_text =
"" for url in urls: print ( f"Fetching: {url} " ) text = fetch_article_content(url) if not verify_content(text): print ( f"WARNING: Content from {url} may be incomplete" )
all_text +=
f"\n\n {text} "
clean_text = clean_text_for_tts(all_text) print ( f"Total text: { len (clean_text)} characters" )
success = create_audiobook(clean_text, "/root/audiobook.mp3" ) if success: print ( "Audiobook created successfully!" ) else : print ( "Failed to create audiobook" ) if name == "main" : main() TTS Provider Comparison Provider Quality Cost API Key Required Best For ElevenLabs Excellent Paid Yes Professional audiobooks OpenAI TTS Very Good Paid Yes General purpose gTTS Good Free No Testing, budget projects Troubleshooting "Content appears to be a summary" Use curl directly instead of web fetch tools Verify the URL is correct and accessible Check if the site requires JavaScript rendering "API key not found" Check environment variables: echo $OPENAI_API_KEY Ensure keys are exported in the shell Fall back to gTTS if no paid API keys available "Audio chunks don't sound continuous" Ensure chunking happens at sentence boundaries Consider adding small pauses between sections Use consistent voice settings across all chunks