Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Convert X (Twitter) tweets, threads, and articles to Markdown with YAML front matter via reverse-engineered API.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/media-localizer.ts
1import path from "node:path";2import { mkdir, writeFile } from "node:fs/promises";34type MediaKind = "image" | "video";5type MediaHint = "image" | "unknown";67type MarkdownLinkCandidate = {8url: string;9hint: MediaHint;10};1112export type LocalizeMarkdownMediaOptions = {13markdownPath: string;14log?: (message: string) => void;15};1617export type LocalizeMarkdownMediaResult = {18markdown: string;19downloadedImages: number;20downloadedVideos: number;21imageDir: string | null;22videoDir: string | null;23};2425const MARKDOWN_LINK_RE = /(!?\[[^\]\n]*\])\((<)?(https?:\/\/[^)\s>]+)(>)?\)/g;2627const IMAGE_EXTENSIONS = new Set([28"jpg",29"jpeg",30"png",31"webp",32"gif",33"bmp",34"avif",35"heic",36"heif",37"svg",38]);3940const VIDEO_EXTENSIONS = new Set(["mp4", "m4v", "mov", "webm", "mkv"]);4142const MIME_EXTENSION_MAP: Record<string, string> = {43"image/jpeg": "jpg",44"image/jpg": "jpg",45"image/png": "png",46"image/webp": "webp",47"image/gif": "gif",48"image/bmp": "bmp",49"image/avif": "avif",50"image/heic": "heic",51"image/heif": "heif",52"image/svg+xml": "svg",53"video/mp4": "mp4",54"video/webm": "webm",55"video/quicktime": "mov",56"video/x-m4v": "m4v",57};5859const DOWNLOAD_USER_AGENT =60"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36";6162function normalizeContentType(raw: string | null): string {63return raw?.split(";")[0]?.trim().toLowerCase() ?? "";64}6566function normalizeExtension(raw: string | undefined | null): string | undefined {67if (!raw) return undefined;68const trimmed = raw.replace(/^\./, "").trim().toLowerCase();69if (!trimmed) return undefined;70if (trimmed === "jpeg") return "jpg";71if (trimmed === "jpg") return "jpg";72return trimmed;73}7475function resolveExtensionFromUrl(rawUrl: string): string | undefined {76try {77const parsed = new URL(rawUrl);78const extFromPath = normalizeExtension(path.posix.extname(parsed.pathname));79if (extFromPath) return extFromPath;80const extFromFormat = normalizeExtension(parsed.searchParams.get("format"));81if (extFromFormat) return extFromFormat;82} catch {83return undefined;84}85return undefined;86}8788function resolveKindFromContentType(contentType: string): MediaKind | undefined {89if (!contentType) return undefined;90if (contentType.startsWith("image/")) return "image";91if (contentType.startsWith("video/")) return "video";92return undefined;93}9495function resolveKindFromExtension(ext: string | undefined): MediaKind | undefined {96if (!ext) return undefined;97if (IMAGE_EXTENSIONS.has(ext)) return "image";98if (VIDEO_EXTENSIONS.has(ext)) return "video";99return undefined;100}101102function resolveKindFromHostname(rawUrl: string): MediaKind | undefined {103try {104const hostname = new URL(rawUrl).hostname.toLowerCase();105if (hostname.includes("video.twimg.com")) return "video";106if (hostname.includes("pbs.twimg.com")) return "image";107} catch {108return undefined;109}110return undefined;111}112113function resolveMediaKind(114rawUrl: string,115contentType: string,116extension: string | undefined,117hint: MediaHint118): MediaKind | undefined {119const kindFromType = resolveKindFromContentType(contentType);120if (kindFromType) return kindFromType;121122const kindFromExtension = resolveKindFromExtension(extension);123if (kindFromExtension) return kindFromExtension;124125const kindFromHost = resolveKindFromHostname(rawUrl);126if (kindFromHost) return kindFromHost;127128if (contentType && contentType !== "application/octet-stream") {129return undefined;130}131132return hint === "image" ? "image" : undefined;133}134135function resolveOutputExtension(136contentType: string,137extension: string | undefined,138kind: MediaKind139): string {140const extFromMime = normalizeExtension(MIME_EXTENSION_MAP[contentType]);141if (extFromMime) return extFromMime;142143const normalizedExt = normalizeExtension(extension);144if (normalizedExt) return normalizedExt;145146return kind === "video" ? "mp4" : "jpg";147}148149function safeDecodeURIComponent(value: string): string {150try {151return decodeURIComponent(value);152} catch {153return value;154}155}156157function sanitizeFileSegment(input: string): string {158return input159.replace(/[^a-zA-Z0-9_-]+/g, "-")160.replace(/-+/g, "-")161.replace(/^[-_]+|[-_]+$/g, "")162.slice(0, 48);163}164165function resolveFileStem(rawUrl: string, extension: string): string {166try {167const parsed = new URL(rawUrl);168const base = path.posix.basename(parsed.pathname);169if (!base) return "";170const decodedBase = safeDecodeURIComponent(base);171const normalizedExt = normalizeExtension(extension);172const stripExt = normalizedExt ? new RegExp(`\\.${normalizedExt}$`, "i") : null;173const rawStem = stripExt ? decodedBase.replace(stripExt, "") : decodedBase;174return sanitizeFileSegment(rawStem);175} catch {176return "";177}178}179180function buildFileName(kind: MediaKind, index: number, sourceUrl: string, extension: string): string {181const stem = resolveFileStem(sourceUrl, extension);182const prefix = kind === "image" ? "img" : "video";183const serial = String(index).padStart(3, "0");184const suffix = stem ? `-${stem}` : "";185return `${prefix}-${serial}${suffix}.${extension}`;186}187188const FRONTMATTER_COVER_RE = /^(coverImage:\s*")(https?:\/\/[^"]+)(")/m;189190function toHighResUrl(rawUrl: string): string {191try {192const parsed = new URL(rawUrl);193if (parsed.hostname !== "pbs.twimg.com") return rawUrl;194const ext = path.posix.extname(parsed.pathname).replace(/^\./, "").toLowerCase();195if (!ext || !IMAGE_EXTENSIONS.has(ext)) return rawUrl;196parsed.pathname = parsed.pathname.replace(new RegExp(`\\.${ext}$`), "");197parsed.searchParams.set("format", ext === "jpeg" ? "jpg" : ext);198parsed.searchParams.set("name", "4096x4096");199return parsed.toString();200} catch {201return rawUrl;202}203}204205function isPlausibleMediaUrl(rawUrl: string): boolean {206const ext = resolveExtensionFromUrl(rawUrl);207if (ext && (IMAGE_EXTENSIONS.has(ext) || VIDEO_EXTENSIONS.has(ext))) return true;208if (resolveKindFromHostname(rawUrl) !== undefined) return true;209return false;210}211212function collectMarkdownLinkCandidates(markdown: string): MarkdownLinkCandidate[] {213const candidates: MarkdownLinkCandidate[] = [];214const seen = new Set<string>();215216const fmMatch = markdown.match(/^---\n([\s\S]*?)\n---/);217if (fmMatch) {218const coverMatch = fmMatch[1]?.match(FRONTMATTER_COVER_RE);219if (coverMatch?.[2] && !seen.has(coverMatch[2])) {220seen.add(coverMatch[2]);221candidates.push({ url: coverMatch[2], hint: "image" });222}223}224225MARKDOWN_LINK_RE.lastIndex = 0;226let match: RegExpExecArray | null;227while ((match = MARKDOWN_LINK_RE.exec(markdown))) {228const label = match[1] ?? "";229const rawUrl = match[3] ?? "";230if (!rawUrl || seen.has(rawUrl)) continue;231const isImage = label.startsWith("![");232if (!isImage && !isPlausibleMediaUrl(rawUrl)) continue;233seen.add(rawUrl);234candidates.push({235url: rawUrl,236hint: isImage ? "image" : "unknown",237});238}239240return candidates;241}242243function rewriteMarkdownMediaLinks(markdown: string, replacements: Map<string, string>): string {244if (replacements.size === 0) return markdown;245MARKDOWN_LINK_RE.lastIndex = 0;246247let result = markdown.replace(MARKDOWN_LINK_RE, (full, label, _openAngle, rawUrl) => {248const localPath = replacements.get(rawUrl);249if (!localPath) return full;250return `${label}(${localPath})`;251});252253result = result.replace(FRONTMATTER_COVER_RE, (full, prefix, rawUrl, suffix) => {254const localPath = replacements.get(rawUrl);255if (!localPath) return full;256return `${prefix}${localPath}${suffix}`;257});258259return result;260}261262export async function localizeMarkdownMedia(263markdown: string,264options: LocalizeMarkdownMediaOptions265): Promise<LocalizeMarkdownMediaResult> {266const log = options.log ?? (() => {});267const markdownDir = path.dirname(options.markdownPath);268const candidates = collectMarkdownLinkCandidates(markdown);269270if (candidates.length === 0) {271return {272markdown,273downloadedImages: 0,274downloadedVideos: 0,275imageDir: null,276videoDir: null,277};278}279280const replacements = new Map<string, string>();281let downloadedImages = 0;282let downloadedVideos = 0;283284for (const candidate of candidates) {285try {286const downloadUrl = toHighResUrl(candidate.url);287const response = await fetch(downloadUrl, {288method: "GET",289redirect: "follow",290headers: {291"user-agent": DOWNLOAD_USER_AGENT,292},293});294295if (!response.ok) {296log(`[x-to-markdown] Skip media (${response.status}): ${candidate.url}`);297continue;298}299300const sourceUrl = response.url || candidate.url;301const contentType = normalizeContentType(response.headers.get("content-type"));302const extension = resolveExtensionFromUrl(sourceUrl) ?? resolveExtensionFromUrl(candidate.url);303const kind = resolveMediaKind(sourceUrl, contentType, extension, candidate.hint);304if (!kind) {305continue;306}307308const outputExtension = resolveOutputExtension(contentType, extension, kind);309const nextIndex = kind === "image" ? downloadedImages + 1 : downloadedVideos + 1;310const dirName = kind === "image" ? "imgs" : "videos";311const targetDir = path.join(markdownDir, dirName);312await mkdir(targetDir, { recursive: true });313314const fileName = buildFileName(kind, nextIndex, sourceUrl, outputExtension);315const absolutePath = path.join(targetDir, fileName);316const relativePath = path.posix.join(dirName, fileName);317const bytes = Buffer.from(await response.arrayBuffer());318await writeFile(absolutePath, bytes);319replacements.set(candidate.url, relativePath);320321if (kind === "image") {322downloadedImages = nextIndex;323} else {324downloadedVideos = nextIndex;325}326} catch (error) {327const message = error instanceof Error ? error.message : String(error ?? "");328log(`[x-to-markdown] Failed to download media ${candidate.url}: ${message}`);329}330}331332return {333markdown: rewriteMarkdownMediaLinks(markdown, replacements),334downloadedImages,335downloadedVideos,336imageDir: downloadedImages > 0 ? path.join(markdownDir, "imgs") : null,337videoDir: downloadedVideos > 0 ? path.join(markdownDir, "videos") : null,338};339}340