Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from bundle
Generate talking-head avatar videos from text. Pipeline: ElevenLabs V3 TTS → OmniHuman 1.5 lipsync → Kling v3 motion enhancement.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/tts_elevenlabs_v3.py
1#!/usr/bin/env python32"""ElevenLabs V3 text-to-speech. Any voice, any text."""3import argparse4import json5import os6from pathlib import Path7from urllib import request8from urllib.error import HTTPError91011def load_api_key(explicit: str | None, env_file: str | None) -> str:12if explicit:13return explicit14for name in ("ELEVENLABS_API_KEY", "XI_API_KEY"):15value = os.getenv(name)16if value:17return value18if env_file:19path = Path(env_file)20if path.exists():21for line in path.read_text().splitlines():22if line.startswith("ELEVENLABS_API_KEY=") or line.startswith("XI_API_KEY="):23return line.split("=", 1)[1].strip()24raise SystemExit("Missing ELEVENLABS_API_KEY. Pass --api-key, --env-file, or set the env var.")252627def list_voices(api_key: str) -> list[dict]:28"""Fetch all available voices from ElevenLabs."""29req = request.Request(30"https://api.elevenlabs.io/v1/voices",31headers={"xi-api-key": api_key, "Accept": "application/json"}32)33with request.urlopen(req, timeout=120) as resp:34payload = json.loads(resp.read())35return payload.get("voices", [])363738def resolve_voice_id(api_key: str, voice_name: str) -> str:39voices = list_voices(api_key)40exact = fallback = fallback_name = None41for voice in voices:42name = voice.get("name", "")43if name.lower() == voice_name.lower():44exact = voice["voice_id"]45break46if voice_name.lower() in name.lower():47fallback = voice["voice_id"]48fallback_name = name49vid = exact or fallback50if not vid:51# Show available voices to help the user find the right one52available = sorted(v.get("name", "?") for v in voices)53raise SystemExit(54f"Voice not found: {voice_name}\n"55f"Available voices ({len(available)}):\n"56+ "\n".join(f" - {n}" for n in available)57)58if fallback and not exact:59import sys60print(f"Exact match not found for '{voice_name}', using partial match: '{fallback_name}'", file=sys.stderr)61return vid626364def main() -> None:65parser = argparse.ArgumentParser(description="ElevenLabs V3 text-to-speech")66parser.add_argument("--text", help="Text to speak. If omitted, reads from --text-file.")67parser.add_argument("--text-file", help="Path to a .txt file with the text to speak.")68parser.add_argument("--voice-id", help="ElevenLabs voice ID.")69parser.add_argument("--voice-name", help="ElevenLabs voice name (resolved via API).")70parser.add_argument("--output", help="Output mp3 path (required unless --list-voices).")71parser.add_argument("--api-key", help="ElevenLabs API key.")72parser.add_argument("--env-file", default=os.path.expanduser("~/.secrets/elevenlabs.env"),73help="Path to env file with ELEVENLABS_API_KEY.")74parser.add_argument("--model-id", default="eleven_v3",75help="TTS model. Recommended: eleven_v3 (default)")76parser.add_argument("--stability", type=float, default=0.34)77parser.add_argument("--similarity-boost", type=float, default=0.91)78parser.add_argument("--style", type=float, default=0.49)79parser.add_argument("--list-voices", action="store_true",80help="List all available voices and exit.")81args = parser.parse_args()8283if args.list_voices:84api_key = load_api_key(args.api_key, args.env_file)85voices = list_voices(api_key)86for v in sorted(voices, key=lambda x: x.get("name", "")):87print(f"{v['voice_id']} {v.get('name', '?')}")88raise SystemExit(0)8990if not args.output:91raise SystemExit("Provide --output (required for TTS generation)")92if not args.text and not args.text_file:93raise SystemExit("Provide --text or --text-file")94if not args.voice_id and not args.voice_name:95raise SystemExit("Provide --voice-id or --voice-name")9697text = args.text or Path(args.text_file).read_text().strip()98api_key = load_api_key(args.api_key, args.env_file)99voice_id = args.voice_id or resolve_voice_id(api_key, args.voice_name)100101output_path = Path(args.output).expanduser().resolve()102output_path.parent.mkdir(parents=True, exist_ok=True)103104payload = json.dumps({105"text": text,106"model_id": args.model_id,107"voice_settings": {108"stability": args.stability,109"similarity_boost": args.similarity_boost,110"style": args.style,111"use_speaker_boost": True,112}113}).encode()114115req = request.Request(116f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",117data=payload,118method="POST",119headers={120"xi-api-key": api_key,121"Content-Type": "application/json",122"Accept": "audio/mpeg",123}124)125126try:127with request.urlopen(req, timeout=300) as resp:128audio = resp.read()129except HTTPError as exc:130detail = exc.read().decode("utf-8", errors="replace")131raise SystemExit(f"ElevenLabs TTS failed: HTTP {exc.code}\n{detail}")132133output_path.write_bytes(audio)134print(json.dumps({"ok": True, "output": str(output_path), "voice_id": voice_id, "bytes": len(audio), "model": args.model_id}))135136137if __name__ == "__main__":138main()139