Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Generate images via OpenAI, Google, OpenRouter, DashScope, Jimeng, Seedream, and Replicate APIs with batch support.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/providers/google.ts
1import path from "node:path";2import { readFile } from "node:fs/promises";3import { execFileSync } from "node:child_process";4import type { CliArgs } from "../types";56const GOOGLE_MULTIMODAL_MODELS = [7"gemini-3-pro-image-preview",8"gemini-3-flash-preview",9"gemini-3.1-flash-image-preview",10];11const GOOGLE_IMAGEN_MODELS = [12"imagen-3.0-generate-002",13"imagen-3.0-generate-001",14];1516export function getDefaultModel(): string {17return process.env.GOOGLE_IMAGE_MODEL || "gemini-3-pro-image-preview";18}1920export function normalizeGoogleModelId(model: string): string {21return model.startsWith("models/") ? model.slice("models/".length) : model;22}2324export function isGoogleMultimodal(model: string): boolean {25const normalized = normalizeGoogleModelId(model);26return GOOGLE_MULTIMODAL_MODELS.some((m) => normalized.includes(m));27}2829export function isGoogleImagen(model: string): boolean {30const normalized = normalizeGoogleModelId(model);31return GOOGLE_IMAGEN_MODELS.some((m) => normalized.includes(m));32}3334function getGoogleApiKey(): string | null {35return process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || null;36}3738export function getGoogleImageSize(args: CliArgs): "1K" | "2K" | "4K" {39if (args.imageSize) return args.imageSize as "1K" | "2K" | "4K";40return args.quality === "2k" ? "2K" : "1K";41}4243function getGoogleBaseUrl(): string {44const base =45process.env.GOOGLE_BASE_URL || "https://generativelanguage.googleapis.com";46return base.replace(/\/+$/g, "");47}4849export function buildGoogleUrl(pathname: string): string {50const base = getGoogleBaseUrl();51const cleanedPath = pathname.replace(/^\/+/g, "");52if (base.endsWith("/v1beta")) return `${base}/${cleanedPath}`;53return `${base}/v1beta/${cleanedPath}`;54}5556function toModelPath(model: string): string {57const modelId = normalizeGoogleModelId(model);58return `models/${modelId}`;59}6061function getHttpProxy(): string | null {62return (63process.env.https_proxy ||64process.env.HTTPS_PROXY ||65process.env.http_proxy ||66process.env.HTTP_PROXY ||67process.env.ALL_PROXY ||68null69);70}7172async function postGoogleJsonViaCurl<T>(73url: string,74apiKey: string,75body: unknown,76): Promise<T> {77const proxy = getHttpProxy();78const bodyStr = JSON.stringify(body);79const args = [80"-s",81"--connect-timeout",82"30",83"--max-time",84"300",85...(proxy ? ["-x", proxy] : []),86url,87"-H",88"Content-Type: application/json",89"-H",90`x-goog-api-key: ${apiKey}`,91"-d",92"@-",93];9495let result = "";96try {97result = execFileSync("curl", args, {98input: bodyStr,99encoding: "utf8",100maxBuffer: 100 * 1024 * 1024,101timeout: 310000,102});103} catch (error) {104const e = error as { message?: string; stderr?: string | Buffer };105const stderrText =106typeof e.stderr === "string"107? e.stderr108: e.stderr109? e.stderr.toString("utf8")110: "";111const details = stderrText.trim() || e.message || "curl request failed";112throw new Error(`Google API request failed via curl: ${details}`);113}114115const parsed = JSON.parse(result) as any;116if (parsed.error) {117throw new Error(118`Google API error (${parsed.error.code}): ${parsed.error.message}`,119);120}121return parsed as T;122}123124async function postGoogleJsonViaFetch<T>(125url: string,126apiKey: string,127body: unknown,128): Promise<T> {129const res = await fetch(url, {130method: "POST",131headers: {132"Content-Type": "application/json",133"x-goog-api-key": apiKey,134},135body: JSON.stringify(body),136});137138if (!res.ok) {139const err = await res.text();140throw new Error(`Google API error (${res.status}): ${err}`);141}142143return (await res.json()) as T;144}145146async function postGoogleJson<T>(pathname: string, body: unknown): Promise<T> {147const apiKey = getGoogleApiKey();148if (!apiKey) throw new Error("GOOGLE_API_KEY or GEMINI_API_KEY is required");149150const url = buildGoogleUrl(pathname);151const proxy = getHttpProxy();152153// When an HTTP proxy is detected, use curl instead of fetch.154// Bun's fetch has a known issue where long-lived connections through155// HTTP proxies get their sockets closed unexpectedly, causing image156// generation requests to fail with "socket connection was closed157// unexpectedly". Using curl as the HTTP client works around this.158if (proxy) {159return postGoogleJsonViaCurl<T>(url, apiKey, body);160}161162return postGoogleJsonViaFetch<T>(url, apiKey, body);163}164165export function buildPromptWithAspect(166prompt: string,167ar: string | null,168quality: CliArgs["quality"],169): string {170let result = prompt;171if (ar) {172result += ` Aspect ratio: ${ar}.`;173}174if (quality === "2k") {175result += " High resolution 2048px.";176}177return result;178}179180export function addAspectRatioToPrompt(prompt: string, ar: string | null): string {181if (!ar) return prompt;182return `${prompt} Aspect ratio: ${ar}.`;183}184185async function readImageAsBase64(186p: string,187): Promise<{ data: string; mimeType: string }> {188const buf = await readFile(p);189const ext = path.extname(p).toLowerCase();190let mimeType = "image/png";191if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg";192else if (ext === ".gif") mimeType = "image/gif";193else if (ext === ".webp") mimeType = "image/webp";194return { data: buf.toString("base64"), mimeType };195}196197export function extractInlineImageData(response: {198candidates?: Array<{199content?: { parts?: Array<{ inlineData?: { data?: string } }> };200}>;201}): string | null {202for (const candidate of response.candidates || []) {203for (const part of candidate.content?.parts || []) {204const data = part.inlineData?.data;205if (typeof data === "string" && data.length > 0) return data;206}207}208return null;209}210211export function extractPredictedImageData(response: {212predictions?: Array<any>;213generatedImages?: Array<any>;214}): string | null {215const candidates = [216...(response.predictions || []),217...(response.generatedImages || []),218];219for (const candidate of candidates) {220if (!candidate || typeof candidate !== "object") continue;221if (typeof candidate.imageBytes === "string") return candidate.imageBytes;222if (typeof candidate.bytesBase64Encoded === "string")223return candidate.bytesBase64Encoded;224if (typeof candidate.data === "string") return candidate.data;225const image = candidate.image;226if (image && typeof image === "object") {227if (typeof image.imageBytes === "string") return image.imageBytes;228if (typeof image.bytesBase64Encoded === "string")229return image.bytesBase64Encoded;230if (typeof image.data === "string") return image.data;231}232}233return null;234}235236async function generateWithGemini(237prompt: string,238model: string,239args: CliArgs,240): Promise<Uint8Array> {241const promptWithAspect = addAspectRatioToPrompt(prompt, args.aspectRatio);242const parts: Array<{243text?: string;244inlineData?: { data: string; mimeType: string };245}> = [];246for (const refPath of args.referenceImages) {247const { data, mimeType } = await readImageAsBase64(refPath);248parts.push({ inlineData: { data, mimeType } });249}250parts.push({ text: promptWithAspect });251252const imageConfig: { imageSize: "1K" | "2K" | "4K" } = {253imageSize: getGoogleImageSize(args),254};255256console.log("Generating image with Gemini...", imageConfig);257const response = await postGoogleJson<{258candidates?: Array<{259content?: { parts?: Array<{ inlineData?: { data?: string } }> };260}>;261}>(`${toModelPath(model)}:generateContent`, {262contents: [263{264role: "user",265parts,266},267],268generationConfig: {269responseModalities: ["IMAGE"],270imageConfig,271},272});273console.log("Generation completed.");274275const imageData = extractInlineImageData(response);276if (imageData) return Uint8Array.from(Buffer.from(imageData, "base64"));277278throw new Error("No image in response");279}280281async function generateWithImagen(282prompt: string,283model: string,284args: CliArgs,285): Promise<Uint8Array> {286const fullPrompt = buildPromptWithAspect(287prompt,288args.aspectRatio,289args.quality,290);291const imageSize = getGoogleImageSize(args);292if (imageSize === "4K") {293console.error(294"Warning: Imagen models do not support 4K imageSize, using 2K instead.",295);296}297298const parameters: Record<string, unknown> = {299sampleCount: args.n,300};301if (args.aspectRatio) {302parameters.aspectRatio = args.aspectRatio;303}304if (imageSize === "1K" || imageSize === "2K") {305parameters.imageSize = imageSize;306} else {307parameters.imageSize = "2K";308}309310const response = await postGoogleJson<{311predictions?: Array<any>;312generatedImages?: Array<any>;313}>(`${toModelPath(model)}:predict`, {314instances: [315{316prompt: fullPrompt,317},318],319parameters,320});321322const imageData = extractPredictedImageData(response);323if (imageData) return Uint8Array.from(Buffer.from(imageData, "base64"));324325throw new Error("No image in response");326}327328export async function generateImage(329prompt: string,330model: string,331args: CliArgs,332): Promise<Uint8Array> {333if (isGoogleImagen(model)) {334if (args.referenceImages.length > 0) {335throw new Error(336"Reference images are not supported with Imagen models. Use gemini-3-pro-image-preview, gemini-3-flash-preview, or gemini-3.1-flash-image-preview.",337);338}339return generateWithImagen(prompt, model, args);340}341342if (!isGoogleMultimodal(model) && args.referenceImages.length > 0) {343throw new Error(344"Reference images are only supported with Gemini multimodal models. Use gemini-3-pro-image-preview, gemini-3-flash-preview, or gemini-3.1-flash-image-preview.",345);346}347348return generateWithGemini(prompt, model, args);349}350