Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Fetch any URL via Chrome CDP and convert the rendered page to clean markdown with YouTube transcript support.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/lib/media/default-downloader.ts
1import path from "node:path";2import { mkdir, writeFile } from "node:fs/promises";3import {4buildFileName,5isDataUri,6normalizeContentType,7normalizeMediaUrl,8resolveExtensionFromContentType,9resolveExtensionFromUrl,10resolveMediaKind,11resolveOutputExtension,12toPosixPath,13} from "./media-utils";14import type { MediaAsset, MediaDownloadRequest, MediaDownloadResult, MediaKind } from "./types";1516const DOWNLOAD_USER_AGENT =17"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36";1819function parseBase64DataUri(rawUrl: string): { contentType: string; bytes: Buffer } | null {20const match = rawUrl.match(/^data:([^;,]+);base64,([A-Za-z0-9+/=\s]+)$/i);21if (!match?.[1] || !match[2]) {22return null;23}2425const contentType = normalizeContentType(match[1]);26if (!contentType) {27return null;28}2930try {31const bytes = Buffer.from(match[2].replace(/\s+/g, ""), "base64");32if (bytes.length === 0) {33return null;34}35return { contentType, bytes };36} catch {37return null;38}39}4041function dedupeMedia(media: MediaAsset[]): MediaAsset[] {42const deduped: MediaAsset[] = [];43const seen = new Set<string>();44for (const item of media) {45const normalizedUrl = normalizeMediaUrl(item.url);46if (!normalizedUrl || seen.has(normalizedUrl)) {47continue;48}49seen.add(normalizedUrl);50deduped.push({51...item,52url: normalizedUrl,53});54}55return deduped;56}5758function toRelativePath(fromDir: string, absoluteTarget: string): string {59const relative = path.relative(fromDir, absoluteTarget) || path.basename(absoluteTarget);60return toPosixPath(relative);61}6263export async function downloadMediaAssets(64request: MediaDownloadRequest,65): Promise<MediaDownloadResult> {66const dedupedMedia = dedupeMedia(request.media);67const absoluteOutputPath = path.resolve(request.outputPath);68const markdownDir = path.dirname(absoluteOutputPath);69const baseDir = request.mediaDir ? path.resolve(request.mediaDir) : markdownDir;70const replacements: MediaDownloadResult["replacements"] = [];7172let downloadedImages = 0;73let downloadedVideos = 0;7475for (const asset of dedupedMedia) {76try {77let sourceUrl = normalizeMediaUrl(asset.url);78let contentType = "";79let extension: string | undefined;80let kind: MediaKind | undefined;81let bytes: Buffer | null = null;8283if (isDataUri(asset.url)) {84const parsed = parseBase64DataUri(asset.url);85if (!parsed) {86request.log.warn(`Skipping unsupported embedded media: ${asset.url.slice(0, 32)}...`);87continue;88}8990contentType = parsed.contentType;91extension =92resolveExtensionFromContentType(contentType) ??93resolveExtensionFromUrl(asset.fileNameHint ?? "");94kind = resolveMediaKind(sourceUrl, contentType, extension, asset.kind);95bytes = parsed.bytes;96} else {97const response = await fetch(sourceUrl, {98method: "GET",99redirect: "follow",100headers: {101"user-agent": DOWNLOAD_USER_AGENT,102...(asset.headers ?? {}),103},104});105106if (!response.ok) {107request.log.warn(`Skipping media (${response.status}): ${asset.url}`);108continue;109}110111sourceUrl = normalizeMediaUrl(response.url || sourceUrl);112contentType = normalizeContentType(response.headers.get("content-type"));113extension =114resolveExtensionFromUrl(sourceUrl) ??115resolveExtensionFromUrl(asset.url) ??116resolveExtensionFromUrl(asset.fileNameHint ?? "");117kind = resolveMediaKind(sourceUrl, contentType, extension, asset.kind);118bytes = Buffer.from(await response.arrayBuffer());119}120121if (!kind || !bytes) {122request.log.debug(`Skipping media with unresolved kind: ${asset.url}`);123continue;124}125126const outputExtension = resolveOutputExtension(contentType, extension, kind);127const nextIndex = kind === "image" ? downloadedImages + 1 : downloadedVideos + 1;128const dirName = kind === "image" ? "imgs" : "videos";129const targetDir = path.join(baseDir, dirName);130await mkdir(targetDir, { recursive: true });131132const fileName = buildFileName(kind, nextIndex, sourceUrl, outputExtension, asset.fileNameHint);133const absolutePath = path.join(targetDir, fileName);134await writeFile(absolutePath, bytes);135136replacements.push({137url: asset.url,138localPath: toRelativePath(markdownDir, absolutePath),139absolutePath,140kind,141});142143if (kind === "image") {144downloadedImages = nextIndex;145} else {146downloadedVideos = nextIndex;147}148} catch (error) {149const message = error instanceof Error ? error.message : String(error);150request.log.warn(`Failed to download media ${asset.url}: ${message}`);151}152}153154return {155replacements,156downloadedImages,157downloadedVideos,158imageDir: downloadedImages > 0 ? path.join(baseDir, "imgs") : null,159videoDir: downloadedVideos > 0 ? path.join(baseDir, "videos") : null,160};161}162