Source from repo
Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
muratcankoylanGitHub muratcankoylanSource repo Original GitHub link
Files
339
Skill
n/a
Size
4.3 MB
Entrypoint
SKILL.md
Format
git-repo
Open file
researcher/benchmarks/sdk-runner/src/common.ts

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code355 linesFree
researcher/benchmarks/sdk-runner/src/common.ts
1/**
2 * Shared utilities for the researcher SDK benchmark runner.
3 *
4 * Pure helpers only. No SDK calls live here so the helpers can be
5 * unit-tested or invoked from --dry-run without an API key.
6 */
7 
8import { createHash } from "node:crypto";
9import { execSync } from "node:child_process";
10import { mkdirSync, readFileSync, writeFileSync, existsSync, appendFileSync } from "node:fs";
11import { dirname, join, resolve } from "node:path";
12import { fileURLToPath } from "node:url";
13 
14export const SDK_RUNNER_DIR = dirname(fileURLToPath(import.meta.url));
15export const RUNNER_ROOT = resolve(SDK_RUNNER_DIR, "..");
16export const RESEARCHER_DIR = resolve(RUNNER_ROOT, "..", "..");
17export const REPO_ROOT = resolve(RESEARCHER_DIR, "..");
18 
19export type SkillId = string;
20 
21export interface ResolvedConfig {
22  models: string[];
23  reps: number;
24  maxRuns: number;
25  maxBudgetUsd: number;
26  seed: number;
27  fixturePath: string;
28  dryRun: boolean;
29  unsafeNoCostCap: boolean;
30  concurrency: number;
31  noResume: boolean;
32}
33 
34export interface RunPlanItem {
35  promptId: string;
36  modelId: string;
37  rep: number;
38  shuffleSeed: number;
39}
40 
41export interface CliFlags {
42  models?: string[];
43  reps?: number;
44  maxRuns?: number;
45  maxBudgetUsd?: number;
46  seed?: number;
47  fixture?: string;
48  dryRun: boolean;
49  unsafeNoCostCap: boolean;
50  concurrency?: number;
51  noResume: boolean;
52}
53 
54const DEFAULT_MODELS = ["composer-2"];
55 
56export function parseCliFlags(argv: string[]): CliFlags {
57  const flags: CliFlags = { dryRun: false, unsafeNoCostCap: false, noResume: false };
58  for (let i = 0; i < argv.length; i++) {
59    const arg = argv[i];
60    switch (arg) {
61      case "--dry-run":
62        flags.dryRun = true;
63        break;
64      case "--unsafe-no-cost-cap":
65        flags.unsafeNoCostCap = true;
66        break;
67      case "--no-resume":
68        flags.noResume = true;
69        break;
70      case "--models":
71        flags.models = (argv[++i] ?? "").split(",").map((value) => value.trim()).filter(Boolean);
72        break;
73      case "--reps":
74        flags.reps = Number(argv[++i]);
75        break;
76      case "--max-runs":
77        flags.maxRuns = Number(argv[++i]);
78        break;
79      case "--max-budget-usd":
80        flags.maxBudgetUsd = Number(argv[++i]);
81        break;
82      case "--seed":
83        flags.seed = Number(argv[++i]);
84        break;
85      case "--fixture":
86        flags.fixture = argv[++i] ?? "";
87        break;
88      case "--concurrency":
89        flags.concurrency = Number(argv[++i]);
90        break;
91      default:
92        if (arg?.startsWith("--")) {
93          throw new Error(`Unknown flag: ${arg}`);
94        }
95    }
96  }
97  return flags;
98}
99 
100export function resolveConfig(
101  flags: CliFlags,
102  defaultFixturePath: string,
103): ResolvedConfig {
104  if (!flags.unsafeNoCostCap && !flags.maxRuns && !flags.maxBudgetUsd && !flags.dryRun) {
105    throw new Error(
106      "Refusing to run without a cost cap. Pass --max-runs or --max-budget-usd or --unsafe-no-cost-cap. " +
107        "Use --dry-run to see the plan without any agent calls.",
108    );
109  }
110  return {
111    models: flags.models?.length ? flags.models : DEFAULT_MODELS,
112    reps: flags.reps && flags.reps > 0 ? flags.reps : 3,
113    maxRuns: flags.maxRuns ?? Number.MAX_SAFE_INTEGER,
114    maxBudgetUsd: flags.maxBudgetUsd ?? Number.MAX_SAFE_INTEGER,
115    seed: flags.seed ?? 1,
116    fixturePath: flags.fixture ?? defaultFixturePath,
117    dryRun: flags.dryRun,
118    unsafeNoCostCap: flags.unsafeNoCostCap,
119    concurrency: flags.concurrency && flags.concurrency > 0 ? flags.concurrency : 1,
120    noResume: flags.noResume,
121  };
122}
123 
124/**
125 * Bounded-concurrency executor. Runs `worker(item, index)` for every input,
126 * keeping at most `limit` workers active at any time. Preserves output order.
127 * Failures inside a worker bubble up; callers are expected to wrap workers in
128 * their own try/catch when partial failure is acceptable.
129 */
130export async function runConcurrently<T, R>(
131  items: T[],
132  limit: number,
133  worker: (item: T, index: number) => Promise<R>,
134): Promise<R[]> {
135  const results: R[] = new Array(items.length);
136  const concurrency = Math.max(1, Math.min(limit, items.length));
137  let next = 0;
138  async function run(): Promise<void> {
139    while (true) {
140      const index = next++;
141      if (index >= items.length) return;
142      results[index] = await worker(items[index] as T, index);
143    }
144  }
145  const workers = Array.from({ length: concurrency }, () => run());
146  await Promise.all(workers);
147  return results;
148}
149 
150/**
151 * Pure-function key derivation for a per-run result file. Used both by the
152 * runner (when writing) and the resume scan (when checking). Keeping this in
153 * one place prevents the two from drifting.
154 */
155export function resultFileName(promptId: string, modelId: string, rep: number): string {
156  return `${promptId}-${modelId}-${rep}.json`;
157}
158 
159export function loadJsonl<T>(path: string): T[] {
160  if (!existsSync(path)) {
161    throw new Error(`Fixture missing: ${path}`);
162  }
163  const lines = readFileSync(path, "utf-8").split("\n");
164  const records: T[] = [];
165  for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) {
166    const trimmed = lines[lineIndex]?.trim();
167    if (!trimmed) continue;
168    try {
169      records.push(JSON.parse(trimmed) as T);
170    } catch (error) {
171      throw new Error(`Invalid JSONL at ${path}:${lineIndex + 1}: ${(error as Error).message}`);
172    }
173  }
174  return records;
175}
176 
177export function appendHistoryEntry(historyPath: string, entry: Record<string, unknown>): void {
178  mkdirSync(dirname(historyPath), { recursive: true });
179  appendFileSync(historyPath, JSON.stringify(entry) + "\n");
180}
181 
182export function writeJson(path: string, data: unknown): void {
183  mkdirSync(dirname(path), { recursive: true });
184  writeFileSync(path, JSON.stringify(data, null, 2) + "\n");
185}
186 
187export function utcNow(): string {
188  return new Date().toISOString().replace(/\.\d{3}Z$/, "Z");
189}
190 
191export function todayUtc(): string {
192  return new Date().toISOString().slice(0, 10);
193}
194 
195export function repoCommitSha(): string | null {
196  try {
197    return execSync("git rev-parse HEAD", { cwd: REPO_ROOT, encoding: "utf-8" }).trim();
198  } catch {
199    return null;
200  }
201}
202 
203export function fixtureSha(path: string): string {
204  const content = readFileSync(path, "utf-8");
205  return createHash("sha256").update(content).digest("hex").slice(0, 16);
206}
207 
208export function apiKeyFingerprint(): string {
209  const key = process.env.CURSOR_API_KEY;
210  if (!key || key.length < 8) {
211    return "unset";
212  }
213  return `***${key.slice(-4)}`;
214}
215 
216/**
217 * Deterministic Fisher-Yates shuffle using a seeded mulberry32 PRNG so the
218 * skill ordering inside the routing prompt is reproducible for a given seed.
219 */
220export function shuffleSeeded<T>(input: T[], seed: number): T[] {
221  const out = input.slice();
222  let state = seed >>> 0;
223  const next = () => {
224    state += 0x6d2b79f5;
225    let t = state;
226    t = Math.imul(t ^ (t >>> 15), t | 1);
227    t ^= t + Math.imul(t ^ (t >>> 7), t | 61);
228    return ((t ^ (t >>> 14)) >>> 0) / 4294967296;
229  };
230  for (let i = out.length - 1; i > 0; i--) {
231    const j = Math.floor(next() * (i + 1));
232    [out[i], out[j]] = [out[j]!, out[i]!];
233  }
234  return out;
235}
236 
237export function buildRunPlan(
238  promptIds: string[],
239  models: string[],
240  reps: number,
241  baseSeed: number,
242): RunPlanItem[] {
243  const plan: RunPlanItem[] = [];
244  for (const promptId of promptIds) {
245    for (const modelId of models) {
246      for (let rep = 0; rep < reps; rep++) {
247        plan.push({
248          promptId,
249          modelId,
250          rep,
251          shuffleSeed: hash32(`${promptId}|${modelId}|${rep}|${baseSeed}`),
252        });
253      }
254    }
255  }
256  return plan;
257}
258 
259export function hash32(text: string): number {
260  const digest = createHash("sha256").update(text).digest();
261  return digest.readUInt32BE(0);
262}
263 
264export function loadSkillDescriptions(): Array<{ name: string; description: string }> {
265  const corpusPath = join(RESEARCHER_DIR, "corpus", "index.json");
266  if (!existsSync(corpusPath)) {
267    throw new Error(`Corpus index missing at ${corpusPath}`);
268  }
269  const corpus = JSON.parse(readFileSync(corpusPath, "utf-8")) as {
270    skills: Array<{ name: string; path: string }>;
271  };
272  const out: Array<{ name: string; description: string }> = [];
273  for (const skill of corpus.skills) {
274    const skillPath = join(REPO_ROOT, skill.path);
275    if (!existsSync(skillPath)) {
276      throw new Error(`Skill missing: ${skillPath}`);
277    }
278    const text = readFileSync(skillPath, "utf-8");
279    const description = extractDescription(text);
280    if (!description) {
281      throw new Error(`Skill ${skill.name} has no frontmatter description`);
282    }
283    out.push({ name: skill.name, description });
284  }
285  return out;
286}
287 
288export function extractDescription(text: string): string | null {
289  if (!text.startsWith("---")) return null;
290  const end = text.indexOf("\n---", 4);
291  if (end === -1) return null;
292  const body = text.slice(4, end);
293  let inDescription = false;
294  const lines: string[] = [];
295  for (const raw of body.split("\n")) {
296    if (raw.startsWith("description:")) {
297      inDescription = true;
298      const value = raw.slice("description:".length).trim();
299      if (value && value !== ">" && value !== ">-") {
300        lines.push(value.replace(/^"|"$/g, ""));
301        inDescription = false;
302      }
303      continue;
304    }
305    if (inDescription) {
306      if (/^[a-z_]+:/i.test(raw)) {
307        inDescription = false;
308        continue;
309      }
310      const trimmed = raw.trim();
311      if (trimmed) lines.push(trimmed);
312    }
313  }
314  return lines.join(" ").trim() || null;
315}
316 
317export interface CostForecast {
318  totalRuns: number;
319  estimatedTokensPerRunInput: number;
320  estimatedTokensPerRunOutput: number;
321  estimatedUsdPerRun: number;
322  estimatedTotalUsd: number;
323}
324 
325export function forecastCost(
326  plan: RunPlanItem[],
327  estimatedTokensPerRunInput: number,
328  estimatedTokensPerRunOutput: number,
329  estimatedUsdPerRun: number,
330): CostForecast {
331  return {
332    totalRuns: plan.length,
333    estimatedTokensPerRunInput,
334    estimatedTokensPerRunOutput,
335    estimatedUsdPerRun,
336    estimatedTotalUsd: Number((estimatedUsdPerRun * plan.length).toFixed(4)),
337  };
338}
339 
340export function assertBudget(plan: RunPlanItem[], forecast: CostForecast, config: ResolvedConfig): void {
341  if (plan.length > config.maxRuns) {
342    throw new Error(`Plan size ${plan.length} exceeds --max-runs ${config.maxRuns}`);
343  }
344  if (forecast.estimatedTotalUsd > config.maxBudgetUsd) {
345    throw new Error(
346      `Forecast ${forecast.estimatedTotalUsd} USD exceeds --max-budget-usd ${config.maxBudgetUsd}`,
347    );
348  }
349}
350 
351export function runHeader(label: string): string {
352  const sep = "=".repeat(72);
353  return `${sep}\n${label}\n${sep}`;
354}
355
Preparing the source view

Agent Skills for Context Engineering

researcher/benchmarks/sdk-runner/src/common.ts