Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
researcher/scripts/research_loop.py
1#!/usr/bin/env python32"""Create and manage file-based research-to-skill runs.34This runner does not call an LLM. It creates durable artifacts that autonomous5agents or humans can fill in, then runs deterministic validation.6"""78from __future__ import annotations910import argparse11import json12import re13import shutil14import subprocess15import sys16from datetime import datetime, timezone17from pathlib import Path18from typing import Any19from uuid import uuid4202122ROOT = Path(__file__).resolve().parents[2]23RESEARCHER = ROOT / "researcher"24LOCKED_SURFACES = [25"researcher/rubrics/content-curation.md",26"researcher/rubrics/skill-change.md",27"researcher/rubrics/harness-change.md",28"researcher/mechanisms/registry.jsonl",29".claude-plugin/marketplace.json",30".plugin/plugin.json",31"researcher/scripts/validate_repo.py",32]33VALID_CLOSE_STATUS = {"accepted", "rejected", "reference-only", "abandoned"}343536def slugify(value: str) -> str:37value = value.lower()38value = re.sub(r"[^a-z0-9]+", "-", value)39value = re.sub(r"-+", "-", value).strip("-")40return value[:64] or "research-run"414243def utc_now() -> str:44return datetime.now(timezone.utc).replace(microsecond=0).isoformat()454647def load_json(path: Path) -> Any:48return json.loads(path.read_text(encoding="utf-8"))495051def write_json(path: Path, data: Any) -> None:52path.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8")535455def append_jsonl(path: Path, data: Any) -> None:56path.parent.mkdir(parents=True, exist_ok=True)57with path.open("a", encoding="utf-8") as handle:58handle.write(json.dumps(data, sort_keys=True) + "\n")596061def run_relative(run_dir: Path) -> str:62return str(run_dir.relative_to(ROOT))636465def state_path(run_dir: Path) -> Path:66return run_dir / "run-state.json"676869def initial_state(run_dir: Path, title: str, source_url: str | None) -> dict[str, Any]:70timestamp = utc_now()71return {72"run_id": run_dir.name,73"title": title,74"source_url": source_url or "",75"current_state": "initialized",76"close_status": None,77"close_reason": None,78"locked_surfaces": LOCKED_SURFACES,79"editable_surfaces": [80f"{run_relative(run_dir)}/sources/",81f"{run_relative(run_dir)}/proposals/",82f"{run_relative(run_dir)}/reports/",83f"{run_relative(run_dir)}/logs/",84],85"state_history": [86{87"state": "initialized",88"timestamp": timestamp,89"reason": "run initialized",90"evidence": run_relative(run_dir),91}92],93"created_at": timestamp,94"updated_at": timestamp,95}969798def load_state(run_dir: Path) -> dict[str, Any]:99path = state_path(run_dir)100if not path.exists():101return initial_state(run_dir, run_dir.name, None)102return load_json(path)103104105def write_state(run_dir: Path, state: dict[str, Any]) -> None:106state["updated_at"] = utc_now()107write_json(state_path(run_dir), state)108109110def set_state(run_dir: Path, state_name: str, reason: str, evidence: str = "") -> None:111state = load_state(run_dir)112state["current_state"] = state_name113state.setdefault("state_history", []).append(114{115"state": state_name,116"timestamp": utc_now(),117"reason": reason,118"evidence": evidence,119}120)121write_state(run_dir, state)122append_thread_decision(run_dir, f"state -> {state_name}", reason, evidence, "continue run")123124125def append_thread_decision(run_dir: Path, decision: str, reason: str, evidence: str, next_action: str) -> None:126thread = run_dir / "THREAD.md"127if not thread.exists():128return129entry = (130"\n```text\n"131f"{utc_now()} decision: {decision}\n"132f"reason: {reason}\n"133f"evidence: {evidence}\n"134f"next: {next_action}\n"135"```\n"136)137with thread.open("a", encoding="utf-8") as handle:138handle.write(entry)139140141def create_thread(run_dir: Path, title: str, source_url: str | None) -> None:142thread = f"""# Research Thread: {run_dir.name}143144## Mission145146- Objective: {title}147- Scope: Evaluate candidate sources and produce reviewable skill proposals.148- Started: {utc_now()}149- Owner: autonomous-research-loop150- Current status: active151152## Locked Surfaces153154- `researcher/rubrics/content-curation.md`155- `researcher/rubrics/skill-change.md`156- `researcher/rubrics/harness-change.md`157- `researcher/mechanisms/registry.jsonl`158- `.claude-plugin/marketplace.json`159- `.plugin/plugin.json`160- `researcher/scripts/validate_repo.py`161- Merge policy: agents may prepare PRs, but human approval is required for push and merge.162163## Editable Surfaces164165- `{run_dir.relative_to(ROOT)}/sources/`166- `{run_dir.relative_to(ROOT)}/proposals/`167- `{run_dir.relative_to(ROOT)}/reports/`168169## Source Queue170171| ID | Source | Status | Next Action |172| --- | --- | --- | --- |173| S001 | {source_url or 'TBD'} | discovered | evaluate |174175## Decisions176177```text178T+00:00 decision: run initialized179reason: file-based research loop created durable state180evidence: {run_dir.relative_to(ROOT)}181next: fill source evaluation and skill proposal182```183184## Experiments And Evaluations185186| ID | Artifact | Rubric | Result | Notes |187| --- | --- | --- | --- | --- |188189## Open Questions190191- None recorded.192193## Handover Summary194195- Best current candidate: none yet196- Rejected candidates: none yet197- Unresolved risks: source evaluation is a draft until rubric fields are completed198- Files to read first: `THREAD.md`, `sources/evaluations/source-evaluation-draft.json`, `proposals/skill-proposal.md`199- Next concrete action: retrieve the source and complete content curation200"""201(run_dir / "THREAD.md").write_text(thread, encoding="utf-8")202203204def create_source_evaluation(205run_dir: Path,206title: str,207source_url: str,208author_or_org: str,209source_type: str,210) -> None:211template = load_json(RESEARCHER / "templates" / "source-evaluation.json")212template["evaluation_id"] = str(uuid4())213template["timestamp"] = utc_now()214template["source"].update(215{216"url": source_url,217"title": title,218"author_or_org": author_or_org,219"published_at": "",220"source_type": source_type,221"retrieval_status": "partial" if source_url else "failed",222"primary_or_secondary": "primary",223}224)225template["decision"].update(226{227"verdict": "HUMAN_REVIEW",228"override_triggered": "null",229"confidence": "low",230"justification": "Draft scaffold. Complete gates and scoring after retrieval.",231}232)233write_json(run_dir / "sources" / "evaluations" / "source-evaluation-draft.json", template)234235236def create_skill_proposal(run_dir: Path, title: str, source_url: str) -> None:237proposal = (RESEARCHER / "templates" / "skill-proposal.md").read_text(encoding="utf-8")238proposal = proposal.replace("# Skill Proposal: [Short Title]", f"# Skill Proposal: {title}")239proposal = proposal.replace("- URL:", f"- URL: {source_url}")240proposal = proposal.replace("- Title:", f"- Title: {title}")241proposal = proposal.replace("- Retrieval status:", "- Retrieval status: partial")242proposal = proposal.replace("- Decision: APPROVE / HUMAN_REVIEW / REJECT", "- Decision: HUMAN_REVIEW")243(run_dir / "proposals" / "skill-proposal.md").write_text(proposal, encoding="utf-8")244(run_dir / "proposals" / "mechanism-proposal.jsonl").write_text(245(RESEARCHER / "templates" / "mechanism-proposal.jsonl").read_text(encoding="utf-8"),246encoding="utf-8",247)248249250def update_queue(run_dir: Path, **updates: Any) -> None:251queue = run_dir / "sources" / "queue.jsonl"252if not queue.exists():253raise FileNotFoundError(f"source queue missing: {queue}")254records = [json.loads(line) for line in queue.read_text(encoding="utf-8").splitlines() if line.strip()]255if not records:256raise ValueError("source queue is empty")257records[0].update(updates)258queue.write_text("\n".join(json.dumps(record, sort_keys=True) for record in records) + "\n", encoding="utf-8")259260261def completed_evaluation(run_dir: Path) -> Path:262eval_dir = run_dir / "sources" / "evaluations"263completed = sorted(path for path in eval_dir.glob("*.json") if "draft" not in path.stem)264if not completed:265raise FileNotFoundError("completed source evaluation missing")266return completed[0]267268269def proposal_file(run_dir: Path) -> Path:270path = run_dir / "proposals" / "skill-proposal.md"271if not path.exists():272raise FileNotFoundError(f"skill proposal missing: {path}")273return path274275276def retrieve_source(args: argparse.Namespace) -> int:277run_dir = args.run_dir.resolve()278raw_dir = run_dir / "sources" / "evidence" / "raw"279raw_dir.mkdir(parents=True, exist_ok=True)280copied: list[str] = []281for source in args.file or []:282target = raw_dir / source.name283shutil.copy2(source, target)284copied.append(str(target.relative_to(ROOT)))285update_queue(286run_dir,287retrieval_status="retrieved",288retrieved_at=utc_now(),289raw_evidence=copied,290retrieval_notes=args.notes,291)292set_state(293run_dir,294"retrieved",295"source evidence retrieved",296", ".join(copied) if copied else "manual retrieval recorded",297)298return 0299300301def mark_evaluated(args: argparse.Namespace) -> int:302run_dir = args.run_dir.resolve()303eval_path = completed_evaluation(run_dir)304data = load_json(eval_path)305update_queue(306run_dir,307retrieval_status=data.get("source", {}).get("retrieval_status", "retrieved"),308evaluation_file=str(eval_path.relative_to(ROOT)),309evaluation_decision=data.get("decision", {}).get("verdict", ""),310)311set_state(run_dir, "evaluated", "completed source evaluation recorded", str(eval_path.relative_to(ROOT)))312return 0313314315def mark_proposed(args: argparse.Namespace) -> int:316run_dir = args.run_dir.resolve()317path = proposal_file(run_dir)318set_state(run_dir, "proposed", "skill proposal recorded", str(path.relative_to(ROOT)))319return 0320321322def run_novelty(args: argparse.Namespace) -> int:323run_dir = args.run_dir.resolve()324path = proposal_file(run_dir)325result_path = run_dir / "reports" / "novelty-result.json"326cmd = [327sys.executable,328str(RESEARCHER / "scripts" / "novelty_check.py"),329"--file",330str(path),331"--json",332]333completed = subprocess.run(cmd, cwd=ROOT, text=True, capture_output=True, check=False)334if completed.stdout:335result = json.loads(completed.stdout)336else:337result = {338"verdict": "human_review",339"error": completed.stderr or "novelty_check.py produced no output",340}341if args.human_review_rationale:342result["human_review_rationale"] = args.human_review_rationale343write_json(result_path, result)344set_state(run_dir, "novelty_checked", "novelty result recorded", str(result_path.relative_to(ROOT)))345return completed.returncode346347348def run_run_validator(run_dir: Path) -> int:349report_json = run_dir / "reports" / "run-readiness.json"350report_md = run_dir / "reports" / "run-readiness.md"351cmd = [352sys.executable,353str(RESEARCHER / "scripts" / "validate_run.py"),354"--run-dir",355str(run_dir),356"--json",357]358completed = subprocess.run(cmd, cwd=ROOT, text=True, capture_output=True, check=False)359report_json.write_text(completed.stdout or json.dumps({"ok": False, "error": completed.stderr}), encoding="utf-8")360try:361data = json.loads(completed.stdout)362summary = data.get("summary", {})363text = (364"# Run Readiness Report\n\n"365f"Run validation {'passed' if data.get('ok') else 'failed'}: "366f"{summary.get('errors', 0)} errors, {summary.get('warnings', 0)} warnings.\n"367)368except json.JSONDecodeError:369text = f"# Run Readiness Report\n\nValidation command failed.\n\n{completed.stderr}\n"370report_md.write_text(text, encoding="utf-8")371if completed.returncode == 0:372set_state(run_dir, "validated", "run readiness validation passed", str(report_json.relative_to(ROOT)))373return completed.returncode374375376def run_validator(run_dir: Path) -> int:377report_json = run_dir / "reports" / "validation-report.json"378report_md = run_dir / "reports" / "validation-report.md"379cmd = [380sys.executable,381str(RESEARCHER / "scripts" / "validate_repo.py"),382"--root",383str(ROOT),384"--json",385]386completed = subprocess.run(cmd, cwd=ROOT, text=True, capture_output=True, check=False)387if completed.stdout:388report_json.write_text(completed.stdout, encoding="utf-8")389else:390report_json.write_text(391json.dumps(392{393"ok": False,394"summary": {"errors": 1, "warnings": 0, "skill_count": 0},395"findings": [396{397"severity": "error",398"path": "researcher/scripts/validate_repo.py",399"message": completed.stderr or "validator produced no output",400}401],402},403indent=2,404)405+ "\n",406encoding="utf-8",407)408summary = "Validation command exited with code " + str(completed.returncode)409if completed.stdout:410try:411data = json.loads(completed.stdout)412summary = (413f"Validation {'passed' if data.get('ok') else 'failed'}: "414f"{data.get('summary', {}).get('errors', 0)} errors, "415f"{data.get('summary', {}).get('warnings', 0)} warnings."416)417except json.JSONDecodeError:418pass419report_md.write_text(f"# Validation Report\n\n{summary}\n", encoding="utf-8")420return completed.returncode421422423def write_pr_readiness(args: argparse.Namespace) -> int:424run_dir = args.run_dir.resolve()425path = run_dir / "reports" / "pr-readiness.md"426text = f"""# PR Readiness Notes427428## Summary429430{args.summary}431432## Test Plan433434{args.test_plan}435436## Risks437438{args.risks}439440Merge and push require explicit human approval.441"""442path.write_text(text, encoding="utf-8")443set_state(run_dir, "pr_ready", "PR readiness notes recorded", str(path.relative_to(ROOT)))444return 0445446447def close_run(args: argparse.Namespace) -> int:448if args.status not in VALID_CLOSE_STATUS:449raise ValueError(f"status must be one of {sorted(VALID_CLOSE_STATUS)}")450run_dir = args.run_dir.resolve()451closure = {452"status": args.status,453"reason": args.reason,454"closed_at": utc_now(),455"reviewed_by": args.reviewed_by,456}457path = run_dir / "reports" / "closure.json"458write_json(path, closure)459state = load_state(run_dir)460state["current_state"] = "closed"461state["close_status"] = args.status462state["close_reason"] = args.reason463state.setdefault("state_history", []).append(464{465"state": "closed",466"timestamp": utc_now(),467"reason": args.reason,468"evidence": str(path.relative_to(ROOT)),469}470)471write_state(run_dir, state)472append_thread_decision(run_dir, f"closed as {args.status}", args.reason, str(path.relative_to(ROOT)), "stop run")473return 0474475476def load_jsonl(path: Path) -> list[dict[str, Any]]:477if not path.exists():478raise FileNotFoundError(f"JSONL file missing: {path}")479records: list[dict[str, Any]] = []480for line_number, line in enumerate(path.read_text(encoding="utf-8").splitlines(), start=1):481if not line.strip():482continue483try:484records.append(json.loads(line))485except json.JSONDecodeError as exc:486raise ValueError(f"{path}:{line_number} invalid JSONL: {exc}") from exc487return records488489490def run_readiness_ok(run_dir: Path) -> bool:491cmd = [492sys.executable,493str(RESEARCHER / "scripts" / "validate_run.py"),494"--run-dir",495str(run_dir),496"--json",497]498completed = subprocess.run(cmd, cwd=ROOT, text=True, capture_output=True, check=False)499if completed.returncode != 0:500return False501try:502return bool(json.loads(completed.stdout).get("ok"))503except json.JSONDecodeError:504return False505506507def promote_mechanisms(args: argparse.Namespace) -> int:508if not args.reviewed_by:509raise ValueError("--reviewed-by is required for mechanism promotion")510run_dir = args.run_dir.resolve()511proposal_path = run_dir / "proposals" / "mechanism-proposal.jsonl"512proposals = load_jsonl(proposal_path)513if not proposals:514raise ValueError("mechanism proposal file is empty")515516registry_path = RESEARCHER / "mechanisms" / "registry.jsonl"517accepted_ledger = RESEARCHER / "mechanisms" / "ledgers" / "accepted.jsonl"518rejected_ledger = RESEARCHER / "mechanisms" / "ledgers" / "rejected.jsonl"519existing_ids = {520record.get("mechanism_id")521for record in load_jsonl(registry_path)522if record.get("mechanism_id")523}524promoted = 0525rejected = 0526readiness_ok = run_readiness_ok(run_dir)527528for proposal in proposals:529status = proposal.get("status_recommendation")530mechanism_id = proposal.get("mechanism_id")531if not mechanism_id or mechanism_id == "kebab-case-id":532raise ValueError("mechanism proposal contains placeholder mechanism_id")533event = {534"mechanism_id": mechanism_id,535"status": status,536"reviewed_by": args.reviewed_by,537"run_dir": run_relative(run_dir),538"rationale": proposal.get("review_rationale", ""),539"timestamp": utc_now(),540}541if status in {"accepted", "candidate"}:542if not readiness_ok and not args.allow_unready:543raise ValueError("run readiness must pass before promoting accepted or candidate mechanisms")544if mechanism_id in existing_ids:545raise ValueError(f"mechanism already exists in registry: {mechanism_id}")546registry_entry = {547"mechanism_id": mechanism_id,548"owning_skill": proposal.get("owning_skill", ""),549"status": status,550"activation_scenario": proposal.get("activation_scenario", ""),551"behavior_change": proposal.get("behavior_change", ""),552"evidence": proposal.get("evidence_claim_ids", []),553"failure_modes": proposal.get("failure_modes", []),554}555append_jsonl(registry_path, registry_entry)556append_jsonl(accepted_ledger, event)557existing_ids.add(mechanism_id)558promoted += 1559elif status == "rejected":560append_jsonl(rejected_ledger, event)561rejected += 1562else:563raise ValueError("status_recommendation must be accepted, candidate, or rejected")564565set_state(566run_dir,567"validated" if promoted else "closed",568f"mechanism promotion complete: {promoted} promoted, {rejected} rejected",569str(proposal_path.relative_to(ROOT)),570)571return 0572573574def create_run(args: argparse.Namespace) -> Path:575timestamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")576run_dir = RESEARCHER / "runs" / f"{timestamp}-{slugify(args.title)}"577for child in [578run_dir / "sources" / "evaluations",579run_dir / "proposals",580run_dir / "reports",581run_dir / "logs",582run_dir / "sources" / "evidence" / "raw",583]:584child.mkdir(parents=True, exist_ok=False)585586source_record = {587"id": "S001",588"url": args.url,589"title": args.title,590"author_or_org": args.author_or_org,591"source_type": args.source_type,592"retrieval_status": "partial" if args.url else "failed",593"candidate_reason": args.reason,594"created_at": utc_now(),595}596(run_dir / "sources" / "queue.jsonl").write_text(597json.dumps(source_record) + "\n",598encoding="utf-8",599)600create_thread(run_dir, args.title, args.url)601write_state(run_dir, initial_state(run_dir, args.title, args.url))602create_source_evaluation(run_dir, args.title, args.url, args.author_or_org, args.source_type)603create_skill_proposal(run_dir, args.title, args.url)604run_validator(run_dir)605return run_dir606607608def main() -> int:609parser = argparse.ArgumentParser(description="Create durable research-to-skill run artifacts")610sub = parser.add_subparsers(dest="command", required=True)611612init = sub.add_parser("init", help="initialize a research run")613init.add_argument("--title", required=True, help="candidate source or run title")614init.add_argument("--url", default="", help="candidate source URL")615init.add_argument("--author-or-org", default="", help="source author or organization")616init.add_argument(617"--source-type",618default="other",619choices=["paper", "engineering_blog", "documentation", "benchmark", "code", "talk", "other"],620)621init.add_argument("--reason", default="", help="why this source/run matters")622623validate = sub.add_parser("validate", help="run deterministic repo validation")624validate.add_argument("--run-dir", type=Path, help="optional run directory to store report")625626retrieve = sub.add_parser("retrieve", help="record retrieved source evidence for a run")627retrieve.add_argument("--run-dir", type=Path, required=True)628retrieve.add_argument("--file", type=Path, action="append", help="raw evidence file to copy into the run")629retrieve.add_argument("--notes", default="", help="retrieval notes")630631evaluate = sub.add_parser("evaluate", help="mark a run as source-evaluated")632evaluate.add_argument("--run-dir", type=Path, required=True)633634propose = sub.add_parser("propose", help="mark a run as having a proposal")635propose.add_argument("--run-dir", type=Path, required=True)636637novelty = sub.add_parser("novelty", help="run novelty check and persist result")638novelty.add_argument("--run-dir", type=Path, required=True)639novelty.add_argument("--human-review-rationale", default="")640641validate_run = sub.add_parser("validate-run", help="validate run publish readiness")642validate_run.add_argument("--run-dir", type=Path, required=True)643644pr_ready = sub.add_parser("pr-ready", help="write PR readiness notes")645pr_ready.add_argument("--run-dir", type=Path, required=True)646pr_ready.add_argument("--summary", required=True)647pr_ready.add_argument("--test-plan", required=True)648pr_ready.add_argument("--risks", required=True)649650close = sub.add_parser("close", help="close a run with rationale")651close.add_argument("--run-dir", type=Path, required=True)652close.add_argument("--status", required=True, choices=sorted(VALID_CLOSE_STATUS))653close.add_argument("--reason", required=True)654close.add_argument("--reviewed-by", default="")655656promote = sub.add_parser("promote-mechanisms", help="promote reviewed mechanism proposals")657promote.add_argument("--run-dir", type=Path, required=True)658promote.add_argument("--reviewed-by", required=True)659promote.add_argument(660"--allow-unready",661action="store_true",662help="allow promotion before run readiness; intended only for bootstrapping fixtures",663)664665args = parser.parse_args()666if args.command == "init":667run_dir = create_run(args)668print(run_dir.relative_to(ROOT))669return 0670if args.command == "validate":671if args.run_dir:672(args.run_dir / "reports").mkdir(parents=True, exist_ok=True)673return run_validator(args.run_dir)674cmd = [sys.executable, str(RESEARCHER / "scripts" / "validate_repo.py")]675return subprocess.run(cmd, cwd=ROOT, check=False).returncode676if args.command == "retrieve":677return retrieve_source(args)678if args.command == "evaluate":679return mark_evaluated(args)680if args.command == "propose":681return mark_proposed(args)682if args.command == "novelty":683return run_novelty(args)684if args.command == "validate-run":685return run_run_validator(args.run_dir.resolve())686if args.command == "pr-ready":687return write_pr_readiness(args)688if args.command == "close":689return close_run(args)690if args.command == "promote-mechanisms":691return promote_mechanisms(args)692return 1693694695if __name__ == "__main__":696sys.exit(main())697