Source from bundle

Avatar Video from Text

Generate talking-head avatar videos from text. Pipeline: ElevenLabs V3 TTS → OmniHuman 1.5 lipsync → Kling v3 motion enhancement.

Костянтин@Latand

Files

Skill

1.1K

Size

15.2 KB

Entrypoint

SKILL.md

Format

folder

Open file

scripts/tts_elevenlabs_v3.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code139 linesFree

scripts/tts_elevenlabs_v3.py

1#!/usr/bin/env python3
2"""ElevenLabs V3 text-to-speech. Any voice, any text."""
3import argparse
4import json
5import os
6from pathlib import Path
7from urllib import request
8from urllib.error import HTTPError
9 
10 
11def load_api_key(explicit: str | None, env_file: str | None) -> str:
12    if explicit:
13        return explicit
14    for name in ("ELEVENLABS_API_KEY", "XI_API_KEY"):
15        value = os.getenv(name)
16        if value:
17            return value
18    if env_file:
19        path = Path(env_file)
20        if path.exists():
21            for line in path.read_text().splitlines():
22                if line.startswith("ELEVENLABS_API_KEY=") or line.startswith("XI_API_KEY="):
23                    return line.split("=", 1)[1].strip()
24    raise SystemExit("Missing ELEVENLABS_API_KEY. Pass --api-key, --env-file, or set the env var.")
25 
26 
27def list_voices(api_key: str) -> list[dict]:
28    """Fetch all available voices from ElevenLabs."""
29    req = request.Request(
30        "https://api.elevenlabs.io/v1/voices",
31        headers={"xi-api-key": api_key, "Accept": "application/json"}
32    )
33    with request.urlopen(req, timeout=120) as resp:
34        payload = json.loads(resp.read())
35    return payload.get("voices", [])
36 
37 
38def resolve_voice_id(api_key: str, voice_name: str) -> str:
39    voices = list_voices(api_key)
40    exact = fallback = fallback_name = None
41    for voice in voices:
42        name = voice.get("name", "")
43        if name.lower() == voice_name.lower():
44            exact = voice["voice_id"]
45            break
46        if voice_name.lower() in name.lower():
47            fallback = voice["voice_id"]
48            fallback_name = name
49    vid = exact or fallback
50    if not vid:
51        # Show available voices to help the user find the right one
52        available = sorted(v.get("name", "?") for v in voices)
53        raise SystemExit(
54            f"Voice not found: {voice_name}\n"
55            f"Available voices ({len(available)}):\n"
56            + "\n".join(f"  - {n}" for n in available)
57        )
58    if fallback and not exact:
59        import sys
60        print(f"Exact match not found for '{voice_name}', using partial match: '{fallback_name}'", file=sys.stderr)
61    return vid
62 
63 
64def main() -> None:
65    parser = argparse.ArgumentParser(description="ElevenLabs V3 text-to-speech")
66    parser.add_argument("--text", help="Text to speak. If omitted, reads from --text-file.")
67    parser.add_argument("--text-file", help="Path to a .txt file with the text to speak.")
68    parser.add_argument("--voice-id", help="ElevenLabs voice ID.")
69    parser.add_argument("--voice-name", help="ElevenLabs voice name (resolved via API).")
70    parser.add_argument("--output", help="Output mp3 path (required unless --list-voices).")
71    parser.add_argument("--api-key", help="ElevenLabs API key.")
72    parser.add_argument("--env-file", default=os.path.expanduser("~/.secrets/elevenlabs.env"),
73                        help="Path to env file with ELEVENLABS_API_KEY.")
74    parser.add_argument("--model-id", default="eleven_v3",
75                        help="TTS model. Recommended: eleven_v3 (default)")
76    parser.add_argument("--stability", type=float, default=0.34)
77    parser.add_argument("--similarity-boost", type=float, default=0.91)
78    parser.add_argument("--style", type=float, default=0.49)
79    parser.add_argument("--list-voices", action="store_true",
80                        help="List all available voices and exit.")
81    args = parser.parse_args()
82 
83    if args.list_voices:
84        api_key = load_api_key(args.api_key, args.env_file)
85        voices = list_voices(api_key)
86        for v in sorted(voices, key=lambda x: x.get("name", "")):
87            print(f"{v['voice_id']}  {v.get('name', '?')}")
88        raise SystemExit(0)
89 
90    if not args.output:
91        raise SystemExit("Provide --output (required for TTS generation)")
92    if not args.text and not args.text_file:
93        raise SystemExit("Provide --text or --text-file")
94    if not args.voice_id and not args.voice_name:
95        raise SystemExit("Provide --voice-id or --voice-name")
96 
97    text = args.text or Path(args.text_file).read_text().strip()
98    api_key = load_api_key(args.api_key, args.env_file)
99    voice_id = args.voice_id or resolve_voice_id(api_key, args.voice_name)
100 
101    output_path = Path(args.output).expanduser().resolve()
102    output_path.parent.mkdir(parents=True, exist_ok=True)
103 
104    payload = json.dumps({
105        "text": text,
106        "model_id": args.model_id,
107        "voice_settings": {
108            "stability": args.stability,
109            "similarity_boost": args.similarity_boost,
110            "style": args.style,
111            "use_speaker_boost": True,
112        }
113    }).encode()
114 
115    req = request.Request(
116        f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
117        data=payload,
118        method="POST",
119        headers={
120            "xi-api-key": api_key,
121            "Content-Type": "application/json",
122            "Accept": "audio/mpeg",
123        }
124    )
125 
126    try:
127        with request.urlopen(req, timeout=300) as resp:
128            audio = resp.read()
129    except HTTPError as exc:
130        detail = exc.read().decode("utf-8", errors="replace")
131        raise SystemExit(f"ElevenLabs TTS failed: HTTP {exc.code}\n{detail}")
132 
133    output_path.write_bytes(audio)
134    print(json.dumps({"ok": True, "output": str(output_path), "voice_id": voice_id, "bytes": len(audio), "model": args.model_id}))
135 
136 
137if __name__ == "__main__":
138    main()
139

Marketplace

Source from bundle

Avatar Video from Text

Generate talking-head avatar videos from text. Pipeline: ElevenLabs V3 TTS → OmniHuman 1.5 lipsync → Kling v3 motion enhancement.

Костянтин@Latand

Files

Skill

1.1K

Size

15.2 KB

Entrypoint

SKILL.md

Format

folder

Open file

scripts/tts_elevenlabs_v3.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code139 linesFree

scripts/tts_elevenlabs_v3.py

1#!/usr/bin/env python3
2"""ElevenLabs V3 text-to-speech. Any voice, any text."""
3import argparse
4import json
5import os
6from pathlib import Path
7from urllib import request
8from urllib.error import HTTPError
9 
10 
11def load_api_key(explicit: str | None, env_file: str | None) -> str:
12    if explicit:
13        return explicit
14    for name in ("ELEVENLABS_API_KEY", "XI_API_KEY"):
15        value = os.getenv(name)
16        if value:
17            return value
18    if env_file:
19        path = Path(env_file)
20        if path.exists():
21            for line in path.read_text().splitlines():
22                if line.startswith("ELEVENLABS_API_KEY=") or line.startswith("XI_API_KEY="):
23                    return line.split("=", 1)[1].strip()
24    raise SystemExit("Missing ELEVENLABS_API_KEY. Pass --api-key, --env-file, or set the env var.")
25 
26 
27def list_voices(api_key: str) -> list[dict]:
28    """Fetch all available voices from ElevenLabs."""
29    req = request.Request(
30        "https://api.elevenlabs.io/v1/voices",
31        headers={"xi-api-key": api_key, "Accept": "application/json"}
32    )
33    with request.urlopen(req, timeout=120) as resp:
34        payload = json.loads(resp.read())
35    return payload.get("voices", [])
36 
37 
38def resolve_voice_id(api_key: str, voice_name: str) -> str:
39    voices = list_voices(api_key)
40    exact = fallback = fallback_name = None
41    for voice in voices:
42        name = voice.get("name", "")
43        if name.lower() == voice_name.lower():
44            exact = voice["voice_id"]
45            break
46        if voice_name.lower() in name.lower():
47            fallback = voice["voice_id"]
48            fallback_name = name
49    vid = exact or fallback
50    if not vid:
51        # Show available voices to help the user find the right one
52        available = sorted(v.get("name", "?") for v in voices)
53        raise SystemExit(
54            f"Voice not found: {voice_name}\n"
55            f"Available voices ({len(available)}):\n"
56            + "\n".join(f"  - {n}" for n in available)
57        )
58    if fallback and not exact:
59        import sys
60        print(f"Exact match not found for '{voice_name}', using partial match: '{fallback_name}'", file=sys.stderr)
61    return vid
62 
63 
64def main() -> None:
65    parser = argparse.ArgumentParser(description="ElevenLabs V3 text-to-speech")
66    parser.add_argument("--text", help="Text to speak. If omitted, reads from --text-file.")
67    parser.add_argument("--text-file", help="Path to a .txt file with the text to speak.")
68    parser.add_argument("--voice-id", help="ElevenLabs voice ID.")
69    parser.add_argument("--voice-name", help="ElevenLabs voice name (resolved via API).")
70    parser.add_argument("--output", help="Output mp3 path (required unless --list-voices).")
71    parser.add_argument("--api-key", help="ElevenLabs API key.")
72    parser.add_argument("--env-file", default=os.path.expanduser("~/.secrets/elevenlabs.env"),
73                        help="Path to env file with ELEVENLABS_API_KEY.")
74    parser.add_argument("--model-id", default="eleven_v3",
75                        help="TTS model. Recommended: eleven_v3 (default)")
76    parser.add_argument("--stability", type=float, default=0.34)
77    parser.add_argument("--similarity-boost", type=float, default=0.91)
78    parser.add_argument("--style", type=float, default=0.49)
79    parser.add_argument("--list-voices", action="store_true",
80                        help="List all available voices and exit.")
81    args = parser.parse_args()
82 
83    if args.list_voices:
84        api_key = load_api_key(args.api_key, args.env_file)
85        voices = list_voices(api_key)
86        for v in sorted(voices, key=lambda x: x.get("name", "")):
87            print(f"{v['voice_id']}  {v.get('name', '?')}")
88        raise SystemExit(0)
89 
90    if not args.output:
91        raise SystemExit("Provide --output (required for TTS generation)")
92    if not args.text and not args.text_file:
93        raise SystemExit("Provide --text or --text-file")
94    if not args.voice_id and not args.voice_name:
95        raise SystemExit("Provide --voice-id or --voice-name")
96 
97    text = args.text or Path(args.text_file).read_text().strip()
98    api_key = load_api_key(args.api_key, args.env_file)
99    voice_id = args.voice_id or resolve_voice_id(api_key, args.voice_name)
100 
101    output_path = Path(args.output).expanduser().resolve()
102    output_path.parent.mkdir(parents=True, exist_ok=True)
103 
104    payload = json.dumps({
105        "text": text,
106        "model_id": args.model_id,
107        "voice_settings": {
108            "stability": args.stability,
109            "similarity_boost": args.similarity_boost,
110            "style": args.style,
111            "use_speaker_boost": True,
112        }
113    }).encode()
114 
115    req = request.Request(
116        f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
117        data=payload,
118        method="POST",
119        headers={
120            "xi-api-key": api_key,
121            "Content-Type": "application/json",
122            "Accept": "audio/mpeg",
123        }
124    )
125 
126    try:
127        with request.urlopen(req, timeout=300) as resp:
128            audio = resp.read()
129    except HTTPError as exc:
130        detail = exc.read().decode("utf-8", errors="replace")
131        raise SystemExit(f"ElevenLabs TTS failed: HTTP {exc.code}\n{detail}")
132 
133    output_path.write_bytes(audio)
134    print(json.dumps({"ok": True, "output": str(output_path), "voice_id": voice_id, "bytes": len(audio), "model": args.model_id}))
135 
136 
137if __name__ == "__main__":
138    main()
139

Avatar Video from Text

scripts/tts_elevenlabs_v3.py

Preparing the source view

Avatar Video from Text

scripts/tts_elevenlabs_v3.py