Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Fetch any URL via Chrome CDP and convert the rendered page to clean markdown with YouTube transcript support.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/lib/media/media-utils.ts
1import path from "node:path";2import type { MediaKind } from "./types";34const IMAGE_EXTENSIONS = new Set([5"jpg",6"jpeg",7"png",8"webp",9"gif",10"bmp",11"avif",12"heic",13"heif",14"svg",15]);1617const VIDEO_EXTENSIONS = new Set(["mp4", "m4v", "mov", "webm", "mkv"]);1819const MIME_EXTENSION_MAP: Record<string, string> = {20"image/jpeg": "jpg",21"image/jpg": "jpg",22"image/png": "png",23"image/webp": "webp",24"image/gif": "gif",25"image/bmp": "bmp",26"image/avif": "avif",27"image/heic": "heic",28"image/heif": "heif",29"image/svg+xml": "svg",30"video/mp4": "mp4",31"video/webm": "webm",32"video/quicktime": "mov",33"video/x-m4v": "m4v",34};3536export function normalizeContentType(raw: string | null): string {37return raw?.split(";")[0]?.trim().toLowerCase() ?? "";38}3940export function normalizeExtension(raw: string | undefined | null): string | undefined {41if (!raw) {42return undefined;43}44const trimmed = raw.replace(/^\./, "").trim().toLowerCase();45if (!trimmed) {46return undefined;47}48if (trimmed === "jpeg" || trimmed === "jpg") {49return "jpg";50}51return trimmed;52}5354export function resolveExtensionFromUrl(rawUrl: string): string | undefined {55try {56const parsed = new URL(rawUrl);57const extFromPath = normalizeExtension(path.posix.extname(parsed.pathname));58if (extFromPath) {59return extFromPath;60}61const extFromFormat = normalizeExtension(parsed.searchParams.get("format"));62if (extFromFormat) {63return extFromFormat;64}65} catch {66return undefined;67}68return undefined;69}7071export function resolveExtensionFromContentType(contentType: string): string | undefined {72return normalizeExtension(MIME_EXTENSION_MAP[contentType]);73}7475export function resolveKindFromContentType(contentType: string): MediaKind | undefined {76if (!contentType) {77return undefined;78}79if (contentType.startsWith("image/")) {80return "image";81}82if (contentType.startsWith("video/")) {83return "video";84}85return undefined;86}8788export function resolveKindFromExtension(extension: string | undefined): MediaKind | undefined {89if (!extension) {90return undefined;91}92if (IMAGE_EXTENSIONS.has(extension)) {93return "image";94}95if (VIDEO_EXTENSIONS.has(extension)) {96return "video";97}98return undefined;99}100101export function resolveMediaKind(102rawUrl: string,103contentType: string,104extension: string | undefined,105hint?: MediaKind,106): MediaKind | undefined {107const kindFromType = resolveKindFromContentType(contentType);108if (kindFromType) {109return kindFromType;110}111112const kindFromExtension = resolveKindFromExtension(extension);113if (kindFromExtension) {114return kindFromExtension;115}116117if (contentType && contentType !== "application/octet-stream") {118return undefined;119}120121if (hint) {122return hint;123}124125if (rawUrl.startsWith("data:image/")) {126return "image";127}128129if (rawUrl.startsWith("data:video/")) {130return "video";131}132133return undefined;134}135136export function resolveOutputExtension(137contentType: string,138extension: string | undefined,139kind: MediaKind,140): string {141const fromMime = resolveExtensionFromContentType(contentType);142if (fromMime) {143return fromMime;144}145const normalized = normalizeExtension(extension);146if (normalized) {147return normalized;148}149return kind === "video" ? "mp4" : "jpg";150}151152export function isDataUri(value: string): boolean {153return value.startsWith("data:");154}155156export function safeDecodeURIComponent(value: string): string {157try {158return decodeURIComponent(value);159} catch {160return value;161}162}163164function extractEmbeddedUrl(value: string): string | undefined {165const encodedMatch = value.match(/https?%3A%2F%2F.+$/i)?.[0];166if (encodedMatch) {167const decoded = safeDecodeURIComponent(encodedMatch);168try {169return new URL(decoded).href;170} catch {171return undefined;172}173}174175const literalMatch = value.match(/https?:\/\/.+$/i)?.[0];176if (!literalMatch) {177return undefined;178}179180try {181return new URL(literalMatch).href;182} catch {183return undefined;184}185}186187export function normalizeMediaUrl(rawUrl: string): string {188if (isDataUri(rawUrl)) {189return rawUrl;190}191192try {193const parsed = new URL(rawUrl);194const hostname = parsed.hostname.toLowerCase();195196if (hostname === "substackcdn.com" || hostname.endsWith(".substackcdn.com")) {197const embeddedUrl = extractEmbeddedUrl(`${parsed.pathname}${parsed.search}`);198if (embeddedUrl) {199return embeddedUrl;200}201}202203return parsed.href;204} catch {205return rawUrl;206}207}208209export function sanitizeFileSegment(input: string): string {210return input211.replace(/[^a-zA-Z0-9_-]+/g, "-")212.replace(/-+/g, "-")213.replace(/^[-_]+|[-_]+$/g, "")214.slice(0, 48);215}216217export function resolveFileStem(rawUrl: string, extension: string, fileNameHint?: string): string {218const hintBase = fileNameHint?.trim();219if (hintBase) {220const parsed = path.posix.parse(hintBase);221const stem = parsed.name || parsed.base;222return sanitizeFileSegment(stem);223}224225if (isDataUri(rawUrl)) {226return "";227}228229try {230const parsed = new URL(rawUrl);231const base = path.posix.basename(parsed.pathname);232if (!base) {233return "";234}235const decodedBase = safeDecodeURIComponent(base);236const normalizedExtension = normalizeExtension(extension);237const stripExtension = normalizedExtension ? new RegExp(`\\.${normalizedExtension}$`, "i") : null;238const rawStem = stripExtension ? decodedBase.replace(stripExtension, "") : decodedBase;239return sanitizeFileSegment(rawStem);240} catch {241return "";242}243}244245export function buildFileName(246kind: MediaKind,247index: number,248sourceUrl: string,249extension: string,250fileNameHint?: string,251): string {252const stem = resolveFileStem(sourceUrl, extension, fileNameHint);253const prefix = kind === "image" ? "img" : "video";254const serial = String(index).padStart(3, "0");255const suffix = stem ? `-${stem}` : "";256return `${prefix}-${serial}${suffix}.${extension}`;257}258259export function toPosixPath(value: string): string {260return value.split(path.sep).join(path.posix.sep);261}262