Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Convert X (Twitter) tweets, threads, and articles to Markdown with YAML front matter via reverse-engineered API.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/markdown.ts
1import type {2ArticleBlock,3ArticleContentState,4ArticleEntity,5ArticleEntityMapEntry,6ArticleMediaInfo,7} from "./types.js";89export type ReferencedTweetInfo = {10id: string;11url: string;12authorName?: string;13authorUsername?: string;14text?: string;15};1617export type FormatArticleOptions = {18referencedTweets?: Map<string, ReferencedTweetInfo>;19};2021type ResolvedMediaAsset =22| {23kind: "image";24url: string;25}26| {27kind: "video";28url: string;29posterUrl?: string;30};3132function coerceArticleEntity(value: unknown): ArticleEntity | null {33if (!value || typeof value !== "object") return null;34const candidate = value as ArticleEntity;35if (36typeof candidate.title === "string" ||37typeof candidate.plain_text === "string" ||38typeof candidate.preview_text === "string" ||39candidate.content_state40) {41return candidate;42}43return null;44}4546function escapeMarkdownAlt(text: string): string {47return text.replace(/[\[\]]/g, "\\$&");48}4950function normalizeCaption(caption?: string): string {51const trimmed = caption?.trim();52if (!trimmed) return "";53return trimmed.replace(/\s+/g, " ");54}5556function summarizeTweetText(text?: string): string {57const trimmed = text?.trim();58if (!trimmed) return "";59const normalized = trimmed60.split(/\r?\n+/)61.map((line) => line.trim())62.filter(Boolean)63.join(" ");64if (normalized.length <= 280) return normalized;65return `${normalized.slice(0, 277)}...`;66}6768function buildTweetUrl(tweetId?: string, username?: string): string | null {69if (!tweetId) return null;70if (username) {71return `https://x.com/${username}/status/${tweetId}`;72}73return `https://x.com/i/web/status/${tweetId}`;74}7576type EntityLookup = {77byIndex: Map<number, ArticleEntityMapEntry>;78byLogicalKey: Map<number, ArticleEntityMapEntry>;79};8081function buildEntityLookup(82entityMap: ArticleContentState["entityMap"] | undefined83): EntityLookup {84const lookup: EntityLookup = {85byIndex: new Map<number, ArticleEntityMapEntry>(),86byLogicalKey: new Map<number, ArticleEntityMapEntry>(),87};8889if (!entityMap) return lookup;9091for (const [idx, entry] of Object.entries(entityMap)) {92const idxNum = Number(idx);93if (Number.isFinite(idxNum)) {94lookup.byIndex.set(idxNum, entry);95}9697const logicalKey = parseInt(entry?.key ?? "", 10);98if (Number.isFinite(logicalKey) && !lookup.byLogicalKey.has(logicalKey)) {99lookup.byLogicalKey.set(logicalKey, entry);100}101}102103return lookup;104}105106function resolveEntityEntry(107entityKey: number | undefined,108entityMap: ArticleContentState["entityMap"] | undefined,109lookup: EntityLookup110): ArticleEntityMapEntry | undefined {111if (entityKey === undefined) return undefined;112113const byLogicalKey = lookup.byLogicalKey.get(entityKey);114if (byLogicalKey) return byLogicalKey;115116const byIndex = lookup.byIndex.get(entityKey);117if (byIndex) return byIndex;118119if (!entityMap) return undefined;120return entityMap[String(entityKey)];121}122123function resolveVideoUrl(info?: ArticleMediaInfo): string | undefined {124if (!info) return undefined;125const variants = info.variants ?? [];126const mp4 = variants127.filter((variant) => variant?.content_type?.includes("video"))128.sort((a, b) => (b.bit_rate ?? 0) - (a.bit_rate ?? 0))[0];129return mp4?.url ?? variants.find((variant) => typeof variant?.url === "string")?.url;130}131132function resolveMediaAsset(info?: ArticleMediaInfo): ResolvedMediaAsset | undefined {133if (!info) return undefined;134135const posterUrl = info.preview_image?.original_img_url ?? info.original_img_url;136const videoUrl = resolveVideoUrl(info);137if (videoUrl) {138return {139kind: "video",140url: videoUrl,141posterUrl,142};143}144145const imageUrl = info.original_img_url ?? info.preview_image?.original_img_url;146if (imageUrl) {147return {148kind: "image",149url: imageUrl,150};151}152153return undefined;154}155156function resolveFallbackMediaAsset(rawUrl?: string): ResolvedMediaAsset | undefined {157if (!rawUrl) return undefined;158159if (/^https:\/\/video\.twimg\.com\//i.test(rawUrl) || /\.(mp4|m4v|mov|webm)(?:$|[?#])/i.test(rawUrl)) {160return {161kind: "video",162url: rawUrl,163};164}165166return {167kind: "image",168url: rawUrl,169};170}171172function resolveCoverUrl(info?: ArticleMediaInfo): string | undefined {173if (!info) return undefined;174return info.original_img_url ?? info.preview_image?.original_img_url;175}176177function buildMediaIdentity(asset: ResolvedMediaAsset): string {178return asset.kind === "video"179? `video:${asset.url}:${asset.posterUrl ?? ""}`180: `image:${asset.url}`;181}182183function renderMediaLines(184asset: ResolvedMediaAsset,185altText: string,186usedUrls: Set<string>187): string[] {188if (asset.kind === "video") {189const lines: string[] = [];190if (asset.posterUrl && !usedUrls.has(asset.posterUrl)) {191usedUrls.add(asset.posterUrl);192lines.push(``);193}194if (!usedUrls.has(asset.url)) {195usedUrls.add(asset.url);196lines.push(`[video](${asset.url})`);197}198return lines;199}200201if (usedUrls.has(asset.url)) {202return [];203}204205usedUrls.add(asset.url);206return [``];207}208209function buildMediaById(article: ArticleEntity): Map<string, ResolvedMediaAsset> {210const map = new Map<string, ResolvedMediaAsset>();211for (const entity of article.media_entities ?? []) {212if (!entity?.media_id) continue;213const asset = resolveMediaAsset(entity.media_info);214if (asset) {215map.set(entity.media_id, asset);216}217}218return map;219}220221function collectMediaAssets(article: ArticleEntity): ResolvedMediaAsset[] {222const assets: ResolvedMediaAsset[] = [];223const seen = new Set<string>();224const addAsset = (asset?: ResolvedMediaAsset) => {225if (!asset) return;226const identity = buildMediaIdentity(asset);227if (seen.has(identity)) return;228seen.add(identity);229assets.push(asset);230};231232for (const entity of article.media_entities ?? []) {233addAsset(resolveMediaAsset(entity?.media_info));234}235236return assets;237}238239function resolveEntityMediaLines(240entityKey: number | undefined,241entityMap: ArticleContentState["entityMap"] | undefined,242entityLookup: EntityLookup,243mediaById: Map<string, ResolvedMediaAsset>,244usedUrls: Set<string>245): string[] {246if (entityKey === undefined) return [];247const entry = resolveEntityEntry(entityKey, entityMap, entityLookup);248const value = entry?.value;249if (!value) return [];250const type = value.type;251if (type !== "MEDIA" && type !== "IMAGE") return [];252253const caption = normalizeCaption(value.data?.caption);254const altText = caption ? escapeMarkdownAlt(caption) : "";255const lines: string[] = [];256257const mediaItems = value.data?.mediaItems ?? [];258for (const item of mediaItems) {259const mediaId =260typeof item?.mediaId === "string"261? item.mediaId262: typeof item?.media_id === "string"263? item.media_id264: undefined;265const asset = mediaId ? mediaById.get(mediaId) : undefined;266if (asset) {267lines.push(...renderMediaLines(asset, altText, usedUrls));268}269}270271const fallbackUrl = typeof value.data?.url === "string" ? value.data.url : undefined;272const fallbackAsset = resolveFallbackMediaAsset(fallbackUrl);273if (fallbackAsset) {274lines.push(...renderMediaLines(fallbackAsset, altText, usedUrls));275}276277return lines;278}279280function resolveEntityTweetLines(281entityKey: number | undefined,282entityMap: ArticleContentState["entityMap"] | undefined,283entityLookup: EntityLookup,284referencedTweets?: Map<string, ReferencedTweetInfo>285): string[] {286if (entityKey === undefined) return [];287const entry = resolveEntityEntry(entityKey, entityMap, entityLookup);288const value = entry?.value;289if (!value || value.type !== "TWEET") return [];290291const tweetId = typeof value.data?.tweetId === "string" ? value.data.tweetId : "";292if (!tweetId) return [];293294const referenced = referencedTweets?.get(tweetId);295const url =296referenced?.url ??297buildTweetUrl(tweetId, referenced?.authorUsername) ??298`https://x.com/i/web/status/${tweetId}`;299300const authorText =301referenced?.authorName && referenced?.authorUsername302? `${referenced.authorName} (@${referenced.authorUsername})`303: referenced?.authorUsername304? `@${referenced.authorUsername}`305: referenced?.authorName;306307const lines: string[] = [];308lines.push(`> 引用推文${authorText ? `:${authorText}` : ""}`);309310const summary = summarizeTweetText(referenced?.text);311if (summary) {312lines.push(`> ${summary}`);313}314315lines.push(`> ${url}`);316return lines;317}318319function resolveEntityMarkdownLines(320entityKey: number | undefined,321entityMap: ArticleContentState["entityMap"] | undefined,322entityLookup: EntityLookup323): string[] {324if (entityKey === undefined) return [];325const entry = resolveEntityEntry(entityKey, entityMap, entityLookup);326const value = entry?.value;327if (!value || value.type !== "MARKDOWN") return [];328329const markdown = typeof value.data?.markdown === "string" ? value.data.markdown : "";330const normalized = markdown.replace(/\r\n/g, "\n").trimEnd();331if (!normalized) return [];332return normalized.split("\n");333}334335function buildMediaLinkMap(336entityMap: ArticleContentState["entityMap"] | undefined337): Map<number, string> {338const map = new Map<number, string>();339if (!entityMap) return map;340341const mediaEntries: { idx: number; key: number }[] = [];342const linkEntries: { key: number; url: string }[] = [];343344for (const [idx, entry] of Object.entries(entityMap)) {345const value = entry?.value;346if (!value) continue;347const key = parseInt(entry?.key ?? "", 10);348if (isNaN(key)) continue;349350if (value.type === "MEDIA" || value.type === "IMAGE") {351mediaEntries.push({ idx: Number(idx), key });352} else if (value.type === "LINK" && typeof value.data?.url === "string") {353linkEntries.push({ key, url: value.data.url });354}355}356357if (mediaEntries.length === 0 || linkEntries.length === 0) return map;358359mediaEntries.sort((a, b) => a.key - b.key);360linkEntries.sort((a, b) => a.key - b.key);361362const pool = [...linkEntries];363for (const media of mediaEntries) {364if (pool.length === 0) break;365let linkIdx = pool.findIndex((l) => l.key > media.key);366if (linkIdx === -1) linkIdx = 0;367const link = pool.splice(linkIdx, 1)[0]!;368map.set(media.idx, link.url);369map.set(media.key, link.url);370}371372return map;373}374375function renderInlineLinks(376text: string,377entityRanges: Array<{ key?: number; offset?: number; length?: number }>,378entityMap: ArticleContentState["entityMap"] | undefined,379entityLookup: EntityLookup,380mediaLinkMap: Map<number, string>381): string {382if (!entityMap || entityRanges.length === 0) return text;383384const valid = entityRanges.filter(385(r) =>386typeof r.key === "number" &&387typeof r.offset === "number" &&388typeof r.length === "number" &&389r.length > 0390);391if (valid.length === 0) return text;392393const sorted = [...valid].sort((a, b) => (b.offset ?? 0) - (a.offset ?? 0));394395let result = text;396for (const range of sorted) {397const offset = range.offset!;398const length = range.length!;399const key = range.key!;400401const entry = resolveEntityEntry(key, entityMap, entityLookup);402const value = entry?.value;403if (!value) continue;404405let url: string | undefined;406if (value.type === "LINK" && typeof value.data?.url === "string") {407url = value.data.url;408} else if (value.type === "MEDIA" || value.type === "IMAGE") {409url = mediaLinkMap.get(key);410}411412if (!url) continue;413414const linkText = result.slice(offset, offset + length);415result =416result.slice(0, offset) +417`[${linkText}](${url})` +418result.slice(offset + length);419}420421return result;422}423424function renderContentBlocks(425blocks: ArticleBlock[],426entityMap: ArticleContentState["entityMap"] | undefined,427entityLookup: EntityLookup,428mediaById: Map<string, ResolvedMediaAsset>,429usedUrls: Set<string>,430mediaLinkMap: Map<number, string>,431referencedTweets?: Map<string, ReferencedTweetInfo>432): string[] {433const lines: string[] = [];434let previousKind: "list" | "quote" | "heading" | "text" | "code" | "media" | null = null;435let listKind: "ordered" | "unordered" | null = null;436let orderedIndex = 0;437let inCodeBlock = false;438439const pushBlock = (440blockLines: string[],441kind: "list" | "quote" | "heading" | "text" | "media"442) => {443if (blockLines.length === 0) return;444if (445lines.length > 0 &&446previousKind &&447!(previousKind === kind && (kind === "list" || kind === "quote" || kind === "media"))448) {449lines.push("");450}451lines.push(...blockLines);452previousKind = kind;453};454455const collectMediaLines = (block: ArticleBlock): string[] => {456const ranges = Array.isArray(block.entityRanges) ? block.entityRanges : [];457const mediaLines: string[] = [];458for (const range of ranges) {459if (typeof range?.key !== "number") continue;460mediaLines.push(461...resolveEntityMediaLines(range.key, entityMap, entityLookup, mediaById, usedUrls)462);463}464return mediaLines;465};466467const collectTweetLines = (block: ArticleBlock): string[] => {468const ranges = Array.isArray(block.entityRanges) ? block.entityRanges : [];469const tweetLines: string[] = [];470for (const range of ranges) {471if (typeof range?.key !== "number") continue;472tweetLines.push(473...resolveEntityTweetLines(range.key, entityMap, entityLookup, referencedTweets)474);475}476return tweetLines;477};478479const collectLinkLines = (block: ArticleBlock): string[] => {480const ranges = Array.isArray(block.entityRanges) ? block.entityRanges : [];481const linkLines: string[] = [];482for (const range of ranges) {483if (typeof range?.key !== "number") continue;484const entry = resolveEntityEntry(range.key, entityMap, entityLookup);485const value = entry?.value;486if (value?.type !== "LINK") continue;487const url = typeof value.data?.url === "string" ? value.data.url : "";488if (url) {489linkLines.push(url);490}491}492return [...new Set(linkLines)];493};494495const collectMarkdownLines = (block: ArticleBlock): string[] => {496const ranges = Array.isArray(block.entityRanges) ? block.entityRanges : [];497const markdownLines: string[] = [];498for (const range of ranges) {499if (typeof range?.key !== "number") continue;500markdownLines.push(...resolveEntityMarkdownLines(range.key, entityMap, entityLookup));501}502return markdownLines;503};504505const pushTrailingMedia = (mediaLines: string[]) => {506if (mediaLines.length > 0) {507pushBlock(mediaLines, "media");508}509};510511for (const block of blocks) {512const type = typeof block?.type === "string" ? block.type : "unstyled";513const rawText = typeof block?.text === "string" ? block.text : "";514const ranges = Array.isArray(block.entityRanges) ? block.entityRanges : [];515const text =516type !== "atomic" && type !== "code-block"517? renderInlineLinks(rawText, ranges, entityMap, entityLookup, mediaLinkMap)518: rawText;519520if (type === "code-block") {521if (!inCodeBlock) {522if (lines.length > 0) {523lines.push("");524}525lines.push("```");526inCodeBlock = true;527}528lines.push(text);529previousKind = "code";530listKind = null;531orderedIndex = 0;532continue;533}534535if (type === "atomic") {536if (inCodeBlock) {537lines.push("```");538inCodeBlock = false;539previousKind = "code";540}541listKind = null;542orderedIndex = 0;543544const tweetLines = collectTweetLines(block);545if (tweetLines.length > 0) {546pushBlock(tweetLines, "quote");547}548549const markdownLines = collectMarkdownLines(block);550if (markdownLines.length > 0) {551pushBlock(markdownLines, "text");552}553554const mediaLines = collectMediaLines(block);555if (mediaLines.length > 0) {556pushBlock(mediaLines, "media");557}558559const linkLines = collectLinkLines(block);560if (linkLines.length > 0) {561pushBlock(linkLines, "text");562}563564continue;565}566567if (inCodeBlock) {568lines.push("```");569inCodeBlock = false;570previousKind = "code";571}572573if (type === "unordered-list-item") {574listKind = "unordered";575orderedIndex = 0;576pushBlock([`- ${text}`], "list");577pushTrailingMedia(collectMediaLines(block));578continue;579}580581if (type === "ordered-list-item") {582if (listKind !== "ordered") {583orderedIndex = 0;584}585listKind = "ordered";586orderedIndex += 1;587pushBlock([`${orderedIndex}. ${text}`], "list");588pushTrailingMedia(collectMediaLines(block));589continue;590}591592listKind = null;593orderedIndex = 0;594595switch (type) {596case "header-one":597pushBlock([`# ${text}`], "heading");598pushTrailingMedia(collectMediaLines(block));599break;600case "header-two":601pushBlock([`## ${text}`], "heading");602pushTrailingMedia(collectMediaLines(block));603break;604case "header-three":605pushBlock([`### ${text}`], "heading");606pushTrailingMedia(collectMediaLines(block));607break;608case "header-four":609pushBlock([`#### ${text}`], "heading");610pushTrailingMedia(collectMediaLines(block));611break;612case "header-five":613pushBlock([`##### ${text}`], "heading");614pushTrailingMedia(collectMediaLines(block));615break;616case "header-six":617pushBlock([`###### ${text}`], "heading");618pushTrailingMedia(collectMediaLines(block));619break;620case "blockquote": {621const quoteLines = text.length > 0 ? text.split("\n") : [""];622pushBlock(quoteLines.map((line) => `> ${line}`), "quote");623pushTrailingMedia(collectMediaLines(block));624break;625}626default:627if (/^XIMGPH_\d+$/.test(text.trim())) {628pushTrailingMedia(collectMediaLines(block));629break;630}631pushBlock([text], "text");632pushTrailingMedia(collectMediaLines(block));633break;634}635}636637if (inCodeBlock) {638lines.push("```");639}640641return lines;642}643644export type FormatArticleResult = {645markdown: string;646coverUrl: string | null;647};648649export function extractReferencedTweetIds(article: unknown): string[] {650const candidate = coerceArticleEntity(article);651const entityMap = candidate?.content_state?.entityMap;652if (!entityMap) return [];653654const ids: string[] = [];655const seen = new Set<string>();656for (const entry of Object.values(entityMap)) {657const value = entry?.value;658if (value?.type !== "TWEET") continue;659const tweetId = typeof value.data?.tweetId === "string" ? value.data.tweetId : "";660if (!tweetId || seen.has(tweetId)) continue;661seen.add(tweetId);662ids.push(tweetId);663}664return ids;665}666667export function formatArticleMarkdown(668article: unknown,669options: FormatArticleOptions = {}670): FormatArticleResult {671const candidate = coerceArticleEntity(article);672if (!candidate) {673return { markdown: `\`\`\`json\n${JSON.stringify(article, null, 2)}\n\`\`\``, coverUrl: null };674}675676const lines: string[] = [];677const usedUrls = new Set<string>();678const mediaById = buildMediaById(candidate);679const title = typeof candidate.title === "string" ? candidate.title.trim() : "";680if (title) {681lines.push(`# ${title}`);682}683684const coverUrl = resolveCoverUrl(candidate.cover_media?.media_info) ?? null;685if (coverUrl) {686usedUrls.add(coverUrl);687}688689const blocks = candidate.content_state?.blocks;690const entityMap = candidate.content_state?.entityMap;691const entityLookup = buildEntityLookup(entityMap);692if (Array.isArray(blocks) && blocks.length > 0) {693const mediaLinkMap = buildMediaLinkMap(entityMap);694const rendered = renderContentBlocks(695blocks,696entityMap,697entityLookup,698mediaById,699usedUrls,700mediaLinkMap,701options.referencedTweets702);703if (rendered.length > 0) {704if (lines.length > 0) lines.push("");705lines.push(...rendered);706}707} else if (typeof candidate.plain_text === "string") {708if (lines.length > 0) lines.push("");709lines.push(candidate.plain_text.trim());710} else if (typeof candidate.preview_text === "string") {711if (lines.length > 0) lines.push("");712lines.push(candidate.preview_text.trim());713}714715const trailingMediaLines: string[] = [];716for (const asset of collectMediaAssets(candidate)) {717trailingMediaLines.push(...renderMediaLines(asset, "", usedUrls));718}719if (trailingMediaLines.length > 0) {720lines.push("", "## Media", "");721lines.push(...trailingMediaLines);722}723724return { markdown: lines.join("\n").trimEnd(), coverUrl };725}726