Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Fetch any URL via Chrome CDP and convert the rendered page to clean markdown with YouTube transcript support.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/lib/adapters/x/thread.ts
1import type { ExtractedDocument } from "../../extract/document";2import {3formatMediaList,4formatTweetAuthor,5getLegacy,6getTweetAuthorMetadata,7isRecord,8normalizeTitle,9toXTweet,10unwrapTweetResult,11} from "./shared";12import type { JsonObject, XQuotedTweet, XTweet } from "./types";1314interface ParsedThreadTweet extends XTweet {15userId?: string;16conversationId?: string;17inReplyToUserId?: string;18sortTimestamp: number;19}2021function compareTweetIds(left: string, right: string): number {22try {23const leftId = BigInt(left);24const rightId = BigInt(right);25if (leftId === rightId) {26return 0;27}28return leftId < rightId ? -1 : 1;29} catch {30return left.localeCompare(right);31}32}3334function toTimestamp(value: string | undefined): number {35if (!value) {36return 0;37}38const parsed = Date.parse(value);39return Number.isNaN(parsed) ? 0 : parsed;40}4142function scoreParsedTweet(tweet: ParsedThreadTweet): number {43return (44(tweet.text ? 4 : 0) +45(tweet.author ? 2 : 0) +46(tweet.authorName ? 2 : 0) +47(tweet.media.length > 0 ? 1 : 0)48);49}5051function toParsedThreadTweet(tweet: JsonObject, pageUrl: string): ParsedThreadTweet {52const legacy = getLegacy(tweet);53const xTweet = toXTweet(tweet, pageUrl);5455return {56...xTweet,57userId: typeof legacy.user_id_str === "string" ? legacy.user_id_str : undefined,58conversationId: typeof legacy.conversation_id_str === "string" ? legacy.conversation_id_str : undefined,59inReplyToUserId: typeof legacy.in_reply_to_user_id_str === "string" ? legacy.in_reply_to_user_id_str : undefined,60sortTimestamp: toTimestamp(xTweet.createdAt),61};62}6364function collectTweetFromItemContent(65itemContent: unknown,66pageUrl: string,67tweets: Map<string, ParsedThreadTweet>,68): void {69if (!isRecord(itemContent)) {70return;71}7273const tweet = unwrapTweetResult(74isRecord(itemContent.tweet_results) ? itemContent.tweet_results.result : null,75);76if (!tweet || typeof tweet.rest_id !== "string") {77return;78}7980const parsed = toParsedThreadTweet(tweet, pageUrl);81const existing = tweets.get(parsed.id);82if (!existing || scoreParsedTweet(parsed) >= scoreParsedTweet(existing)) {83tweets.set(parsed.id, parsed);84}85}8687function collectTweetsFromItems(88items: unknown,89pageUrl: string,90tweets: Map<string, ParsedThreadTweet>,91): void {92if (!Array.isArray(items)) {93return;94}9596for (const item of items) {97if (!isRecord(item)) {98continue;99}100101if (isRecord(item.item) && isRecord(item.item.itemContent)) {102collectTweetFromItemContent(item.item.itemContent, pageUrl, tweets);103continue;104}105106if (isRecord(item.itemContent)) {107collectTweetFromItemContent(item.itemContent, pageUrl, tweets);108}109}110}111112function getInstructions(payload: unknown): unknown[] {113if (!isRecord(payload) || !isRecord(payload.data)) {114return [];115}116117const { data } = payload;118return (119(isRecord(data.threaded_conversation_with_injections_v2) &&120Array.isArray(data.threaded_conversation_with_injections_v2.instructions)121? data.threaded_conversation_with_injections_v2.instructions122: undefined) ??123(isRecord(data.threaded_conversation_with_injections) &&124Array.isArray(data.threaded_conversation_with_injections.instructions)125? data.threaded_conversation_with_injections.instructions126: undefined) ??127(isRecord(data.tweetResult) &&128isRecord(data.tweetResult.result) &&129isRecord(data.tweetResult.result.timeline) &&130Array.isArray(data.tweetResult.result.timeline.instructions)131? data.tweetResult.result.timeline.instructions132: [])133);134}135136function parseTweetDetailPayload(payload: unknown, pageUrl: string): ParsedThreadTweet[] {137const tweets = new Map<string, ParsedThreadTweet>();138139const instructions = getInstructions(payload);140for (const instruction of instructions) {141if (!isRecord(instruction)) {142continue;143}144145collectTweetsFromItems(instruction.moduleItems, pageUrl, tweets);146147if (!Array.isArray(instruction.entries)) {148continue;149}150151for (const entry of instruction.entries) {152if (!isRecord(entry)) {153continue;154}155156const content = isRecord(entry.content) ? entry.content : {};157collectTweetFromItemContent(content.itemContent, pageUrl, tweets);158collectTweetsFromItems(content.items, pageUrl, tweets);159}160}161162return Array.from(tweets.values());163}164165function buildContinuousThread(tweets: ParsedThreadTweet[], statusId: string): ParsedThreadTweet[] {166const byId = new Map<string, ParsedThreadTweet>();167for (const tweet of tweets) {168const existing = byId.get(tweet.id);169if (!existing || scoreParsedTweet(tweet) >= scoreParsedTweet(existing)) {170byId.set(tweet.id, tweet);171}172}173174const rootTweet = byId.get(statusId);175if (!rootTweet?.userId || !rootTweet.conversationId) {176return [];177}178179const candidates = Array.from(byId.values()).filter(180(tweet) =>181tweet.id === statusId ||182(tweet.userId === rootTweet.userId && tweet.conversationId === rootTweet.conversationId),183);184185const repliesByParent = new Map<string, ParsedThreadTweet[]>();186for (const tweet of candidates) {187if (!tweet.inReplyTo || tweet.id === statusId) {188continue;189}190const bucket = repliesByParent.get(tweet.inReplyTo) ?? [];191bucket.push(tweet);192bucket.sort((left, right) => {193if (left.sortTimestamp !== right.sortTimestamp) {194return left.sortTimestamp - right.sortTimestamp;195}196return compareTweetIds(left.id, right.id);197});198repliesByParent.set(tweet.inReplyTo, bucket);199}200201const ancestorPath: ParsedThreadTweet[] = [rootTweet];202const ancestorSeen = new Set<string>([rootTweet.id]);203let currentAncestor = rootTweet;204205while (currentAncestor.inReplyTo) {206const parent = byId.get(currentAncestor.inReplyTo);207if (!parent || ancestorSeen.has(parent.id)) {208break;209}210ancestorPath.unshift(parent);211ancestorSeen.add(parent.id);212currentAncestor = parent;213}214215const chain = ancestorPath.slice();216const seen = new Set<string>(chain.map((tweet) => tweet.id));217let currentId = rootTweet.id;218219while (true) {220const next = (repliesByParent.get(currentId) ?? []).find((tweet) => !seen.has(tweet.id));221if (!next) {222break;223}224chain.push(next);225seen.add(next.id);226currentId = next.id;227}228229return chain;230}231232export function extractThreadTweetsFromPayloads(233payloads: unknown[],234statusId: string,235pageUrl: string,236): XTweet[] {237const parsedTweets: ParsedThreadTweet[] = [];238239for (const payload of payloads) {240parsedTweets.push(...parseTweetDetailPayload(payload, pageUrl));241}242243return buildContinuousThread(parsedTweets, statusId).map(({ sortTimestamp: _sortTimestamp, ...tweet }) => tweet);244}245246function buildQuotedTweetMarkdown(quotedTweet: XQuotedTweet): string {247const author = quotedTweet.author ? `@${quotedTweet.author}` : "Unknown";248const name = quotedTweet.authorName ? `${quotedTweet.authorName} ` : "";249const lines: string[] = [`Quoted Tweet${quotedTweet.author || quotedTweet.authorName ? `: ${name}${author}`.trim() : ""}`];250251if (quotedTweet.text) {252lines.push(...quotedTweet.text.split("\n"));253}254255for (const mediaLine of formatMediaList(quotedTweet.media)) {256lines.push(mediaLine);257}258259return lines.map((line) => (line ? `> ${line}` : ">")).join("\n");260}261262function buildThreadMarkdown(tweets: XTweet[]): string {263return tweets264.map((tweet, index) => {265const lines: string[] = [];266const author = tweet.author ? `@${tweet.author}` : "Unknown";267const name = tweet.authorName ? `${tweet.authorName} ` : "";268lines.push(`## ${index + 1}. ${name}${author}`.trim());269if (tweet.createdAt) {270lines.push(`_Published: ${tweet.createdAt}_`);271}272lines.push(tweet.text || "(No text)");273const mediaLines = formatMediaList(tweet.media);274if (mediaLines.length > 0) {275lines.push(mediaLines.map((line) => `- ${line}`).join("\n"));276}277if (tweet.quotedTweet) {278lines.push(buildQuotedTweetMarkdown(tweet.quotedTweet));279}280return lines.join("\n\n");281})282.join("\n\n");283}284285export function extractThreadDocumentFromPayloads(286payloads: unknown[],287statusId: string,288pageUrl: string,289): ExtractedDocument | null {290const tweets = extractThreadTweetsFromPayloads(payloads, statusId, pageUrl);291if (tweets.length <= 1) {292return null;293}294295const rootTweet = tweets[0];296const rootAuthor = formatTweetAuthor(rootTweet);297298return {299url: pageUrl,300canonicalUrl: rootTweet.url,301title: normalizeTitle(rootTweet.text, "X Thread"),302author: rootAuthor,303siteName: "X",304publishedAt: rootTweet.createdAt,305summary: rootTweet.text.slice(0, 200) || undefined,306adapter: "x",307metadata: {308kind: "x/thread",309tweetId: rootTweet.id,310tweetCount: tweets.length,311lastTweetId: tweets[tweets.length - 1]?.id,312...getTweetAuthorMetadata(rootTweet),313},314content: [{ type: "markdown", markdown: buildThreadMarkdown(tweets) }],315};316}317