Source from bundle
fal.ai Video & Image Toolkit

Generate videos, transfer motion, create speaking avatars, and optionally deliver finished media to Telegram via fal.ai. Supports Kling O3/v3 Pro/Turbo, Seedance 1.5 Pro, HeyGen Avatar, and Seedream v5/v4.5.
Костянтин@Latand
Files
Skill
2.4K
Size
29.3 KB
Entrypoint
SKILL.md
Format
folder
Open file
scripts/fal_video_toolkit.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code418 linesFree
scripts/fal_video_toolkit.py
1#!/usr/bin/env python3
2"""Unified fal.ai video generation CLI.
3 
4Supports image-to-video, motion control, speaking avatar, and image editing.
5"""
6from __future__ import annotations
7 
8import argparse
9import json
10import os
11import subprocess
12from pathlib import Path
13from typing import Any
14 
15 
16MODELS = {
17    "kling-o3": "fal-ai/kling-video/o3/standard/image-to-video",
18    "kling-v3-pro": "fal-ai/kling-video/v3/pro/image-to-video",
19    "kling-turbo": "fal-ai/kling-video/v2.5-turbo/pro/image-to-video",
20    "kling-motion": "fal-ai/kling-video/v3/pro/motion-control",
21    "kling-motion-std": "fal-ai/kling-video/v3/standard/motion-control",
22    "seedance": "fal-ai/bytedance/seedance/v1.5/pro/image-to-video",
23    "heygen": "fal-ai/heygen/avatar4/image-to-video",
24    "seedream-v5": "fal-ai/bytedance/seedream/v5/lite/edit",
25    "seedream-v4": "fal-ai/bytedance/seedream/v4.5/edit",
26    "grok-text": "xai/grok-imagine-video/text-to-video",
27    "grok-image": "xai/grok-imagine-video/image-to-video",
28    "grok-edit": "xai/grok-imagine-video/edit-video",
29}
30 
31MOTION_MODELS = {"kling-motion", "kling-motion-std"}
32IMAGE_EDIT_MODELS = {"seedream-v5", "seedream-v4"}
33AVATAR_MODELS = {"heygen"}
34STRING_DURATION_MODELS = {"seedance"}
35 
36 
37def _load_api_key(args: argparse.Namespace) -> str:
38    if args.api_key:
39        return args.api_key.strip()
40    if args.api_key_file:
41        return Path(args.api_key_file).read_text(encoding="utf-8").strip()
42    for var in ("FAL_KEY", "FAL_API_KEY", "FAL_AI_KEY"):
43        if value := os.getenv(var):
44            return value.strip()
45    if (key_id := os.getenv("FAL_KEY_ID")) and (key_secret := os.getenv("FAL_KEY_SECRET")):
46        return f"{key_id.strip()}:{key_secret.strip()}"
47    raise SystemExit(
48        "Missing fal.ai credentials. Pass --api-key / --api-key-file or set FAL_KEY / FAL_AI_KEY."
49    )
50 
51 
52def _resolve_path(path_str: str) -> Path:
53    path = Path(path_str).expanduser()
54    if not path.is_absolute():
55        path = Path.cwd() / path
56    path.parent.mkdir(parents=True, exist_ok=True)
57    return path
58 
59 
60def _print_logs(update: Any) -> None:
61    status = getattr(update, "status", None)
62    logs = getattr(update, "logs", None) or []
63    if status:
64        print(f"STATUS: {status}")
65    for log in logs:
66        message = log.get("message") if isinstance(log, dict) else str(log)
67        if message:
68            print(message)
69 
70 
71def _is_motion_endpoint(endpoint: str) -> bool:
72    return endpoint.endswith("/motion-control")
73 
74 
75def _is_image_edit_endpoint(endpoint: str) -> bool:
76    return endpoint.endswith("/edit") and "seedream" in endpoint
77 
78 
79def _is_avatar_endpoint(endpoint: str) -> bool:
80    return "heygen/avatar4/image-to-video" in endpoint
81 
82 
83def _is_text_to_video_endpoint(endpoint: str) -> bool:
84    return endpoint.endswith("/text-to-video")
85 
86 
87def _is_video_edit_endpoint(endpoint: str) -> bool:
88    return endpoint.endswith("/edit-video")
89 
90 
91def _is_image_to_video_endpoint(endpoint: str) -> bool:
92    return endpoint.endswith("/image-to-video") and not _is_avatar_endpoint(endpoint)
93 
94 
95def _supports_audio_flag(endpoint: str) -> bool:
96    return "kling-video" in endpoint or "seedance" in endpoint
97 
98 
99def _supports_negative_prompt(endpoint: str) -> bool:
100    return "kling-video" in endpoint and endpoint.endswith("/image-to-video")
101 
102 
103def _supports_end_image(endpoint: str) -> bool:
104    return (
105        ("kling-video" in endpoint and endpoint.endswith("/image-to-video"))
106        or "seedance" in endpoint
107    )
108 
109 
110def _supports_safety_checker(endpoint: str) -> bool:
111    return ("kling-video" in endpoint and endpoint.endswith("/image-to-video")) or (
112        "seedream" in endpoint and endpoint.endswith("/edit")
113    )
114 
115 
116def _supports_resolution(endpoint: str) -> bool:
117    return (
118        "seedance" in endpoint
119        or _is_avatar_endpoint(endpoint)
120        or endpoint.startswith("xai/grok-imagine-video/")
121    )
122 
123 
124def _supports_aspect_ratio(endpoint: str) -> bool:
125    return (
126        "seedance" in endpoint
127        or _is_avatar_endpoint(endpoint)
128        or endpoint in {
129            "xai/grok-imagine-video/text-to-video",
130            "xai/grok-imagine-video/image-to-video",
131        }
132    )
133 
134 
135def _extract_frame(video_path: Path, frame_time: str, out_path: Path) -> Path:
136    out_path.parent.mkdir(parents=True, exist_ok=True)
137    cmd = [
138        "ffmpeg",
139        "-y",
140        "-ss",
141        frame_time,
142        "-i",
143        str(video_path),
144        "-frames:v",
145        "1",
146        str(out_path),
147    ]
148    try:
149        subprocess.run(cmd, check=True, capture_output=True, text=True)
150    except FileNotFoundError as exc:
151        raise SystemExit("ffmpeg is required to extract a frame from video.") from exc
152    except subprocess.CalledProcessError as exc:
153        stderr = (exc.stderr or "").strip()
154        raise SystemExit(
155            f"Failed to extract frame from {video_path}: {stderr or exc}"
156        ) from exc
157    return out_path
158 
159 
160def _send_to_telegram(
161    media_path: Path,
162    *,
163    target: str,
164    thread_id: str | None,
165    reply_to: str | None,
166    message: str | None,
167) -> None:
168    cmd = [
169        "openclaw",
170        "message",
171        "send",
172        "--channel",
173        "telegram",
174        "--target",
175        target,
176        "--media",
177        str(media_path),
178    ]
179    if message:
180        cmd.extend(["--message", message])
181    if thread_id:
182        cmd.extend(["--thread-id", thread_id])
183    if reply_to:
184        cmd.extend(["--reply-to", reply_to])
185 
186    print(f"Sending to Telegram target {target}...")
187    try:
188        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
189    except FileNotFoundError as exc:
190        raise SystemExit("openclaw CLI is required for Telegram delivery.") from exc
191    except subprocess.CalledProcessError as exc:
192        stderr = (exc.stderr or "").strip()
193        stdout = (exc.stdout or "").strip()
194        details = stderr or stdout or str(exc)
195        raise SystemExit(f"Telegram send failed: {details}") from exc
196 
197    if result.stdout.strip():
198        print(result.stdout.strip())
199    if result.stderr.strip():
200        print(result.stderr.strip())
201    print("Telegram send complete.")
202 
203 
204def _warn(message: str) -> None:
205    print(f"WARNING: {message}")
206 
207 
208def main() -> None:
209    model_list = "\n".join(f"  {k:20s} {v}" for k, v in MODELS.items())
210    parser = argparse.ArgumentParser(
211        description="fal.ai video/image generation CLI.",
212        epilog=f"Available model shortcuts:\n{model_list}",
213        formatter_class=argparse.RawDescriptionHelpFormatter,
214    )
215    parser.add_argument("--image", help="Input image path.")
216    parser.add_argument(
217        "--video",
218        help="Input video path for motion control or video-edit models.",
219    )
220    parser.add_argument(
221        "--extract-first-frame-from-video",
222        help="Extract a frame from this video and use it as the input image.",
223    )
224    parser.add_argument(
225        "--extract-frame-time",
226        default="00:00:00",
227        help="Timestamp for the extracted frame (default: 00:00:00).",
228    )
229    parser.add_argument("--prompt", default="", help="Guidance prompt or speech text (HeyGen).")
230    parser.add_argument(
231        "--model", default="kling-o3",
232        help=f"Model shortcut or full fal endpoint. Shortcuts: {', '.join(MODELS.keys())}",
233    )
234    parser.add_argument("--duration", default="10", help="Video duration in seconds.")
235    parser.add_argument("--orientation", choices=["image", "video"], default="video",
236                        help="Motion control orientation.")
237    parser.add_argument("--resolution", help="Resolution for Seedance/HeyGen (480p/720p/1080p).")
238    parser.add_argument("--aspect-ratio", help="Aspect ratio (16:9, 9:16, 1:1, etc).")
239    parser.add_argument("--no-audio", action="store_true", help="Disable audio generation.")
240    parser.add_argument("--no-safety", action="store_true", help="Disable client safety checker.")
241    parser.add_argument("--negative-prompt", default="blur, distortion, low quality, artifacts, glitch, deformed hands, watermark",
242                        help="Negative prompt for Kling models.")
243    parser.add_argument("--talking-style", choices=["stable", "expressive"], default="expressive",
244                        help="HeyGen talking style.")
245    parser.add_argument("--image-size", help="Output image size for Seedream (e.g. 2048x2048).")
246    parser.add_argument("--end-image", help="End frame image path (Kling O3 / Seedance).")
247    parser.add_argument("--out", required=True, help="Output file path.")
248    parser.add_argument("--json-out", help="Save raw fal response JSON.")
249    parser.add_argument("--api-key", help="fal key value.")
250    parser.add_argument("--api-key-file", help="Read fal key from file.")
251    parser.add_argument("--telegram-target", help="Telegram chat id or @username for optional delivery.")
252    parser.add_argument("--telegram-thread-id", help="Optional Telegram thread/topic id.")
253    parser.add_argument("--telegram-reply-to", help="Optional Telegram message id to reply to.")
254    parser.add_argument("--telegram-message", default="", help="Optional Telegram caption/message.")
255    args = parser.parse_args()
256 
257    api_key = _load_api_key(args)
258    os.environ["FAL_KEY"] = api_key
259 
260    model_key = args.model
261    endpoint = MODELS.get(model_key, model_key)
262 
263    out_path = _resolve_path(args.out)
264    json_out_path = _resolve_path(args.json_out) if args.json_out else None
265 
266    import fal_client
267    from urllib.request import urlopen
268 
269    # Build payload based on model type
270    payload: dict[str, Any] = {}
271    image_url: str | None = None
272    image_path: Path | None = None
273 
274    if args.image:
275        image_path = Path(args.image).expanduser()
276        if not image_path.exists():
277            raise SystemExit(f"Image not found: {image_path}")
278 
279    if args.extract_first_frame_from_video:
280        source_video = Path(args.extract_first_frame_from_video).expanduser()
281        if not source_video.exists():
282            raise SystemExit(f"Frame source video not found: {source_video}")
283        if image_path:
284            _warn("--image was provided, so --extract-first-frame-from-video is ignored.")
285        else:
286            frame_path = out_path.with_suffix(".first-frame.png")
287            print(f"Extracting frame at {args.extract_frame_time}: {source_video} -> {frame_path}")
288            image_path = _extract_frame(source_video, args.extract_frame_time, frame_path)
289 
290    if _is_motion_endpoint(endpoint) or _is_image_edit_endpoint(endpoint) or _is_avatar_endpoint(endpoint) or _is_image_to_video_endpoint(endpoint):
291        if not image_path:
292            raise SystemExit(
293                f"This model requires an image. Pass --image or --extract-first-frame-from-video."
294            )
295        print(f"Uploading image: {image_path}")
296        image_url = fal_client.upload_file(str(image_path))
297        print(f"IMAGE_URL: {image_url}")
298 
299    if _is_motion_endpoint(endpoint):
300        if not args.video:
301            raise SystemExit("--video is required for motion control models.")
302        video_path = Path(args.video).expanduser()
303        if not video_path.exists():
304            raise SystemExit(f"Video not found: {video_path}")
305        print(f"Uploading video: {video_path}")
306        video_url = fal_client.upload_file(str(video_path))
307        print(f"VIDEO_URL: {video_url}")
308        payload = {
309            "image_url": image_url,
310            "video_url": video_url,
311            "character_orientation": args.orientation,
312            "keep_original_sound": not args.no_audio,
313        }
314 
315    elif _is_image_edit_endpoint(endpoint):
316        payload = {
317            "image_urls": [image_url],
318            "prompt": args.prompt.strip() or "enhance image quality",
319        }
320        if _supports_safety_checker(endpoint):
321            payload["enable_safety_checker"] = not args.no_safety
322        if args.image_size:
323            w, h = args.image_size.split("x")
324            payload["image_size"] = {"width": int(w), "height": int(h)}
325 
326    elif _is_avatar_endpoint(endpoint):
327        payload = {
328            "image_url": image_url,
329            "prompt": args.prompt.strip(),
330            "talking_style": args.talking_style,
331        }
332        if args.resolution and _supports_resolution(endpoint):
333            payload["resolution"] = args.resolution
334        if args.aspect_ratio and _supports_aspect_ratio(endpoint):
335            payload["aspect_ratio"] = args.aspect_ratio
336 
337    elif _is_text_to_video_endpoint(endpoint):
338        if not args.prompt.strip():
339            raise SystemExit("--prompt is required for text-to-video models.")
340        payload = {
341            "prompt": args.prompt.strip(),
342            "duration": int(args.duration),
343        }
344        if args.aspect_ratio and _supports_aspect_ratio(endpoint):
345            payload["aspect_ratio"] = args.aspect_ratio
346        if args.resolution and _supports_resolution(endpoint):
347            payload["resolution"] = args.resolution
348 
349    elif _is_video_edit_endpoint(endpoint):
350        if not args.video:
351            raise SystemExit("--video is required for video-edit models.")
352        if not args.prompt.strip():
353            raise SystemExit("--prompt is required for video-edit models.")
354        video_path = Path(args.video).expanduser()
355        if not video_path.exists():
356            raise SystemExit(f"Video not found: {video_path}")
357        print(f"Uploading video: {video_path}")
358        video_url = fal_client.upload_file(str(video_path))
359        print(f"VIDEO_URL: {video_url}")
360        payload = {
361            "prompt": args.prompt.strip(),
362            "video_url": video_url,
363        }
364        if args.resolution and _supports_resolution(endpoint):
365            payload["resolution"] = args.resolution
366 
367    else:
368        # Image-to-video (Kling, Seedance, Grok Image)
369        if not args.prompt.strip():
370            raise SystemExit("--prompt is required for image-to-video models.")
371        duration = args.duration
372        if model_key in STRING_DURATION_MODELS or "seedance" in endpoint:
373            duration = str(duration)
374        else:
375            try:
376                duration = int(duration)
377            except ValueError:
378                pass
379 
380        payload = {
381            "image_url": image_url,
382            "prompt": args.prompt.strip(),
383            "duration": duration,
384        }
385 
386        if _supports_safety_checker(endpoint):
387            payload["enable_safety_checker"] = not args.no_safety
388 
389        if model_key in STRING_DURATION_MODELS or "seedance" in endpoint:
390            if args.resolution and _supports_resolution(endpoint):
391                payload["resolution"] = args.resolution
392            if args.aspect_ratio and _supports_aspect_ratio(endpoint):
393                payload["aspect_ratio"] = args.aspect_ratio
394            if _supports_audio_flag(endpoint):
395                payload["generate_audio"] = not args.no_audio
396        else:
397            if args.resolution and _supports_resolution(endpoint):
398                payload["resolution"] = args.resolution
399            if args.aspect_ratio and _supports_aspect_ratio(endpoint):
400                payload["aspect_ratio"] = args.aspect_ratio
401            if _supports_audio_flag(endpoint):
402                payload["generate_audio"] = not args.no_audio
403            if args.negative_prompt and _supports_negative_prompt(endpoint):
404                payload["negative_prompt"] = args.negative_prompt
405 
406        if args.end_image and _supports_end_image(endpoint):
407            end_path = Path(args.end_image).expanduser()
408            if not end_path.exists():
409                raise SystemExit(f"End image not found: {end_path}")
410            print(f"Uploading end image: {end_path}")
411            payload["end_image_url"] = fal_client.upload_file(str(end_path))
412 
413    if args.no_safety and not _supports_safety_checker(endpoint):
414        _warn(
415            f"{endpoint} does not expose a client-side safety toggle in the fal.ai schema. "
416            "--no-safety has no effect here."
417        )
418    if args.no_audio and not _su
Preparing the source view

fal.ai Video & Image Toolkit

scripts/fal_video_toolkit.py