Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Generate images via OpenAI, Google, OpenRouter, DashScope, Jimeng, Seedream, and Replicate APIs with batch support.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/providers/google.ts
1import path from "node:path";2import { readFile } from "node:fs/promises";3import { execFileSync } from "node:child_process";4import type { CliArgs } from "../types";56const GOOGLE_MULTIMODAL_MODELS = [7"gemini-3-pro-image",8"gemini-3.1-flash-image",9"gemini-3-pro-image-preview",10"gemini-3-flash-preview",11"gemini-3.1-flash-image-preview",12];13const GOOGLE_IMAGEN_MODELS = [14"imagen-3.0-generate-002",15"imagen-3.0-generate-001",16];1718export function getDefaultModel(): string {19return process.env.GOOGLE_IMAGE_MODEL || "gemini-3-pro-image";20}2122export function normalizeGoogleModelId(model: string): string {23return model.startsWith("models/") ? model.slice("models/".length) : model;24}2526export function isGoogleMultimodal(model: string): boolean {27const normalized = normalizeGoogleModelId(model);28return GOOGLE_MULTIMODAL_MODELS.some((m) => normalized.includes(m));29}3031export function isGoogleImagen(model: string): boolean {32const normalized = normalizeGoogleModelId(model);33return GOOGLE_IMAGEN_MODELS.some((m) => normalized.includes(m));34}3536function getGoogleApiKey(): string | null {37return process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY || null;38}3940export function getGoogleImageSize(args: CliArgs): "1K" | "2K" | "4K" {41if (args.imageSize) return args.imageSize as "1K" | "2K" | "4K";42return args.quality === "2k" ? "2K" : "1K";43}4445function getGoogleBaseUrl(): string {46const base =47process.env.GOOGLE_BASE_URL || "https://generativelanguage.googleapis.com";48return base.replace(/\/+$/g, "");49}5051export function buildGoogleUrl(pathname: string): string {52const base = getGoogleBaseUrl();53const cleanedPath = pathname.replace(/^\/+/g, "");54if (base.endsWith("/v1beta")) return `${base}/${cleanedPath}`;55return `${base}/v1beta/${cleanedPath}`;56}5758function toModelPath(model: string): string {59const modelId = normalizeGoogleModelId(model);60return `models/${modelId}`;61}6263function getHttpProxy(): string | null {64return (65process.env.https_proxy ||66process.env.HTTPS_PROXY ||67process.env.http_proxy ||68process.env.HTTP_PROXY ||69process.env.ALL_PROXY ||70null71);72}7374async function postGoogleJsonViaCurl<T>(75url: string,76apiKey: string,77body: unknown,78): Promise<T> {79const proxy = getHttpProxy();80const bodyStr = JSON.stringify(body);81const args = [82"-s",83"--connect-timeout",84"30",85"--max-time",86"300",87...(proxy ? ["-x", proxy] : []),88url,89"-H",90"Content-Type: application/json",91"-H",92`x-goog-api-key: ${apiKey}`,93"-d",94"@-",95];9697let result = "";98try {99result = execFileSync("curl", args, {100input: bodyStr,101encoding: "utf8",102maxBuffer: 100 * 1024 * 1024,103timeout: 310000,104});105} catch (error) {106const e = error as { message?: string; stderr?: string | Buffer };107const stderrText =108typeof e.stderr === "string"109? e.stderr110: e.stderr111? e.stderr.toString("utf8")112: "";113const details = stderrText.trim() || e.message || "curl request failed";114throw new Error(`Google API request failed via curl: ${details}`);115}116117const parsed = JSON.parse(result) as any;118if (parsed.error) {119throw new Error(120`Google API error (${parsed.error.code}): ${parsed.error.message}`,121);122}123return parsed as T;124}125126async function postGoogleJsonViaFetch<T>(127url: string,128apiKey: string,129body: unknown,130): Promise<T> {131const res = await fetch(url, {132method: "POST",133headers: {134"Content-Type": "application/json",135"x-goog-api-key": apiKey,136},137body: JSON.stringify(body),138});139140if (!res.ok) {141const err = await res.text();142throw new Error(`Google API error (${res.status}): ${err}`);143}144145return (await res.json()) as T;146}147148async function postGoogleJson<T>(pathname: string, body: unknown): Promise<T> {149const apiKey = getGoogleApiKey();150if (!apiKey) throw new Error("GOOGLE_API_KEY or GEMINI_API_KEY is required");151152const url = buildGoogleUrl(pathname);153const proxy = getHttpProxy();154155// When an HTTP proxy is detected, use curl instead of fetch.156// Bun's fetch has a known issue where long-lived connections through157// HTTP proxies get their sockets closed unexpectedly, causing image158// generation requests to fail with "socket connection was closed159// unexpectedly". Using curl as the HTTP client works around this.160if (proxy) {161return postGoogleJsonViaCurl<T>(url, apiKey, body);162}163164return postGoogleJsonViaFetch<T>(url, apiKey, body);165}166167export function buildPromptWithAspect(168prompt: string,169ar: string | null,170quality: CliArgs["quality"],171): string {172let result = prompt;173if (ar) {174result += ` Aspect ratio: ${ar}.`;175}176if (quality === "2k") {177result += " High resolution 2048px.";178}179return result;180}181182export function addAspectRatioToPrompt(prompt: string, ar: string | null): string {183if (!ar) return prompt;184return `${prompt} Aspect ratio: ${ar}.`;185}186187async function readImageAsBase64(188p: string,189): Promise<{ data: string; mimeType: string }> {190const buf = await readFile(p);191const ext = path.extname(p).toLowerCase();192let mimeType = "image/png";193if (ext === ".jpg" || ext === ".jpeg") mimeType = "image/jpeg";194else if (ext === ".gif") mimeType = "image/gif";195else if (ext === ".webp") mimeType = "image/webp";196return { data: buf.toString("base64"), mimeType };197}198199export function extractInlineImageData(response: {200candidates?: Array<{201content?: { parts?: Array<{ inlineData?: { data?: string } }> };202}>;203}): string | null {204for (const candidate of response.candidates || []) {205for (const part of candidate.content?.parts || []) {206const data = part.inlineData?.data;207if (typeof data === "string" && data.length > 0) return data;208}209}210return null;211}212213export function extractPredictedImageData(response: {214predictions?: Array<any>;215generatedImages?: Array<any>;216}): string | null {217const candidates = [218...(response.predictions || []),219...(response.generatedImages || []),220];221for (const candidate of candidates) {222if (!candidate || typeof candidate !== "object") continue;223if (typeof candidate.imageBytes === "string") return candidate.imageBytes;224if (typeof candidate.bytesBase64Encoded === "string")225return candidate.bytesBase64Encoded;226if (typeof candidate.data === "string") return candidate.data;227const image = candidate.image;228if (image && typeof image === "object") {229if (typeof image.imageBytes === "string") return image.imageBytes;230if (typeof image.bytesBase64Encoded === "string")231return image.bytesBase64Encoded;232if (typeof image.data === "string") return image.data;233}234}235return null;236}237238async function generateWithGemini(239prompt: string,240model: string,241args: CliArgs,242): Promise<Uint8Array> {243const promptWithAspect = addAspectRatioToPrompt(prompt, args.aspectRatio);244const parts: Array<{245text?: string;246inlineData?: { data: string; mimeType: string };247}> = [];248for (const refPath of args.referenceImages) {249const { data, mimeType } = await readImageAsBase64(refPath);250parts.push({ inlineData: { data, mimeType } });251}252parts.push({ text: promptWithAspect });253254const imageConfig: { imageSize: "1K" | "2K" | "4K" } = {255imageSize: getGoogleImageSize(args),256};257258console.log("Generating image with Gemini...", imageConfig);259const response = await postGoogleJson<{260candidates?: Array<{261content?: { parts?: Array<{ inlineData?: { data?: string } }> };262}>;263}>(`${toModelPath(model)}:generateContent`, {264contents: [265{266role: "user",267parts,268},269],270generationConfig: {271responseModalities: ["IMAGE"],272imageConfig,273},274});275console.log("Generation completed.");276277const imageData = extractInlineImageData(response);278if (imageData) return Uint8Array.from(Buffer.from(imageData, "base64"));279280throw new Error("No image in response");281}282283async function generateWithImagen(284prompt: string,285model: string,286args: CliArgs,287): Promise<Uint8Array> {288const fullPrompt = buildPromptWithAspect(289prompt,290args.aspectRatio,291args.quality,292);293const imageSize = getGoogleImageSize(args);294if (imageSize === "4K") {295console.error(296"Warning: Imagen models do not support 4K imageSize, using 2K instead.",297);298}299300const parameters: Record<string, unknown> = {301sampleCount: args.n,302};303if (args.aspectRatio) {304parameters.aspectRatio = args.aspectRatio;305}306if (imageSize === "1K" || imageSize === "2K") {307parameters.imageSize = imageSize;308} else {309parameters.imageSize = "2K";310}311312const response = await postGoogleJson<{313predictions?: Array<any>;314generatedImages?: Array<any>;315}>(`${toModelPath(model)}:predict`, {316instances: [317{318prompt: fullPrompt,319},320],321parameters,322});323324const imageData = extractPredictedImageData(response);325if (imageData) return Uint8Array.from(Buffer.from(imageData, "base64"));326327throw new Error("No image in response");328}329330export async function generateImage(331prompt: string,332model: string,333args: CliArgs,334): Promise<Uint8Array> {335if (isGoogleImagen(model)) {336if (args.referenceImages.length > 0) {337throw new Error(338"Reference images are not supported with Imagen models. Use a Gemini multimodal model such as gemini-3-pro-image, gemini-3.1-flash-image, gemini-3-pro-image-preview, gemini-3-flash-preview, or gemini-3.1-flash-image-preview.",339);340}341return generateWithImagen(prompt, model, args);342}343344if (!isGoogleMultimodal(model) && args.referenceImages.length > 0) {345throw new Error(346"Reference images are only supported with Gemini multimodal models such as gemini-3-pro-image, gemini-3.1-flash-image, gemini-3-pro-image-preview, gemini-3-flash-preview, or gemini-3.1-flash-image-preview.",347);348}349350return generateWithGemini(prompt, model, args);351}352