Source from repo
Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
muratcankoylanGitHub muratcankoylanSource repo Original GitHub link
Files
339
Skill
n/a
Size
4.3 MB
Entrypoint
SKILL.md
Format
git-repo
Open file
researcher/scripts/validate_run.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code266 linesFree
researcher/scripts/validate_run.py
1#!/usr/bin/env python3
2"""Validate publish readiness for a single research run.
3 
4This complements validate_repo.py. Repo validation answers whether the corpus is
5structurally healthy; run validation answers whether one run is ready to produce
6reviewable corpus changes.
7"""
8 
9from __future__ import annotations
10 
11import argparse
12import json
13import re
14import sys
15from dataclasses import asdict, dataclass
16from pathlib import Path
17from typing import Any
18 
19 
20ROOT = Path(__file__).resolve().parents[2]
21VALID_STATES = {
22    "initialized",
23    "retrieved",
24    "evaluated",
25    "proposed",
26    "novelty_checked",
27    "validated",
28    "pr_ready",
29    "closed",
30}
31VALID_CLOSE_STATUS = {"accepted", "rejected", "reference-only", "abandoned"}
32PLACEHOLDER_PATTERNS = [
33    r"\[Short Title\]",
34    r"Describe the implementable mechanism",
35    r"State one of:",
36    r'path: ""',
37    r'section: ""',
38    r'summary: ""',
39    r"Verdict: pending",
40]
41 
42 
43@dataclass
44class Finding:
45    severity: str
46    path: str
47    message: str
48 
49 
50class RunValidator:
51    def __init__(self, run_dir: Path) -> None:
52        self.run_dir = run_dir.resolve()
53        self.root = ROOT.resolve()
54        self.findings: list[Finding] = []
55 
56    def rel(self, path: Path | str) -> str:
57        p = Path(path)
58        try:
59            return str(p.relative_to(self.root))
60        except ValueError:
61            return str(p)
62 
63    def error(self, path: Path | str, message: str) -> None:
64        self.findings.append(Finding("error", self.rel(path), message))
65 
66    def warn(self, path: Path | str, message: str) -> None:
67        self.findings.append(Finding("warning", self.rel(path), message))
68 
69    def run(self) -> dict[str, Any]:
70        if not self.run_dir.exists():
71            self.error(self.run_dir, "run directory missing")
72        else:
73            state_data = self.validate_state()
74            current_state = (state_data or {}).get("current_state")
75            if current_state == "closed":
76                # Closed runs are terminal. validate_run only enforces publish
77                # readiness for active runs; closed runs are validated by their
78                # own closure.json plus repo-level checks.
79                self.validate_closure()
80            else:
81                self.validate_queue()
82                source_status = self.validate_source_evaluation()
83                self.validate_proposal(source_status)
84                self.validate_novelty()
85                self.validate_pr_readiness()
86                self.validate_closure()
87 
88        errors = sum(1 for finding in self.findings if finding.severity == "error")
89        warnings = sum(1 for finding in self.findings if finding.severity == "warning")
90        return {
91            "ok": errors == 0,
92            "run_dir": self.rel(self.run_dir),
93            "summary": {"errors": errors, "warnings": warnings},
94            "findings": [asdict(finding) for finding in self.findings],
95        }
96 
97    def validate_state(self) -> dict[str, Any] | None:
98        path = self.run_dir / "run-state.json"
99        data = self.load_json(path)
100        if not isinstance(data, dict):
101            return None
102        current = data.get("current_state")
103        if current not in VALID_STATES:
104            self.error(path, f"current_state must be one of {sorted(VALID_STATES)}")
105        if not isinstance(data.get("state_history"), list) or not data["state_history"]:
106            self.error(path, "state_history must be a non-empty list")
107        locked = data.get("locked_surfaces", [])
108        for required in [
109            "researcher/rubrics/content-curation.md",
110            "researcher/mechanisms/registry.jsonl",
111            ".claude-plugin/marketplace.json",
112            ".plugin/plugin.json",
113        ]:
114            if required not in locked:
115                self.error(path, f"locked surface missing: {required}")
116        return data
117 
118    def validate_queue(self) -> None:
119        queue = self.run_dir / "sources" / "queue.jsonl"
120        if not queue.exists():
121            self.error(queue, "source queue missing")
122            return
123        for line_number, line in enumerate(queue.read_text(encoding="utf-8").splitlines(), start=1):
124            if not line.strip():
125                continue
126            try:
127                record = json.loads(line)
128            except json.JSONDecodeError as exc:
129                self.error(queue, f"line {line_number} invalid JSONL: {exc}")
130                continue
131            if not record.get("id") or not record.get("title"):
132                self.error(queue, f"line {line_number} must include id and title")
133 
134    def validate_source_evaluation(self) -> str | None:
135        eval_dir = self.run_dir / "sources" / "evaluations"
136        if not eval_dir.exists():
137            self.error(eval_dir, "source evaluations directory missing")
138            return None
139        completed = sorted(path for path in eval_dir.glob("*.json") if "draft" not in path.stem)
140        drafts = sorted(eval_dir.glob("*draft*.json"))
141        if drafts and not completed:
142            self.error(eval_dir, "run has only draft source evaluations")
143            return None
144        if not completed:
145            self.error(eval_dir, "completed source evaluation missing")
146            return None
147 
148        data = self.load_json(completed[0])
149        if not isinstance(data, dict):
150            return None
151        status = data.get("source", {}).get("retrieval_status")
152        if status != "retrieved":
153            self.error(completed[0], "publish-ready run requires retrieved source evaluation")
154        if data.get("decision", {}).get("verdict") == "REJECT":
155            self.error(completed[0], "rejected source cannot produce publish-ready changes")
156        return str(status) if status else None
157 
158    def validate_proposal(self, source_status: str | None) -> None:
159        path = self.run_dir / "proposals" / "skill-proposal.md"
160        if not path.exists():
161            self.error(path, "skill proposal missing")
162            return
163        text = path.read_text(encoding="utf-8")
164        for pattern in PLACEHOLDER_PATTERNS:
165            if re.search(pattern, text):
166                self.error(path, f"proposal still contains placeholder: {pattern}")
167        required_prefixes = [
168            "- URL:",
169            "- Title:",
170            "- Author or organization:",
171            "- Source type:",
172            "- Retrieval status:",
173            "- Evaluation file:",
174            "- Decision:",
175            "- Target path:",
176            "- Activation scenario:",
177            "- Verdict:",
178            "- Max mechanism overlap:",
179            "- Top mechanism overlaps:",
180            "- Evidence limitations:",
181            "- Possible duplication:",
182            "- Required human review:",
183        ]
184        for prefix in required_prefixes:
185            if re.search(rf"^{re.escape(prefix)}\s*$", text, flags=re.MULTILINE):
186                self.error(path, f"proposal field is blank: {prefix}")
187        has_evidence_row = False
188        for line in text.splitlines()[30:]:
189            if not line.startswith("|") or line.count("|") < 3:
190                continue
191            cells = [cell.strip() for cell in line.strip("|").split("|")]
192            if not cells or cells[0] in {"Claim", "---"} or set("".join(cells)) <= {"-", " "}:
193                continue
194            if any(cells):
195                has_evidence_row = True
196                break
197        if source_status != "retrieved" and has_evidence_row:
198            self.error(path, "proposal cites evidence from a source that is not fully retrieved")
199 
200    def validate_novelty(self) -> None:
201        path = self.run_dir / "reports" / "novelty-result.json"
202        data = self.load_json(path)
203        if not isinstance(data, dict):
204            return
205        verdict = data.get("verdict")
206        if verdict not in {"pass", "human_review", "likely_duplicate"}:
207            self.error(path, "novelty verdict is invalid")
208        if verdict != "pass" and not data.get("human_review_rationale"):
209            self.error(path, "non-pass novelty verdict requires human_review_rationale")
210 
211    def validate_pr_readiness(self) -> None:
212        path = self.run_dir / "reports" / "pr-readiness.md"
213        if not path.exists():
214            self.error(path, "PR readiness notes missing")
215            return
216        text = path.read_text(encoding="utf-8")
217        for required in ["## Summary", "## Test Plan", "## Risks", "human approval"]:
218            if required not in text:
219                self.error(path, f"PR readiness notes missing {required}")
220 
221    def validate_closure(self) -> None:
222        path = self.run_dir / "reports" / "closure.json"
223        if not path.exists():
224            return
225        data = self.load_json(path)
226        if not isinstance(data, dict):
227            return
228        if data.get("status") not in VALID_CLOSE_STATUS:
229            self.error(path, f"closure status must be one of {sorted(VALID_CLOSE_STATUS)}")
230        if not data.get("reason"):
231            self.error(path, "closure reason is required")
232 
233    def load_json(self, path: Path) -> Any:
234        if not path.exists():
235            self.error(path, "JSON file missing")
236            return None
237        try:
238            return json.loads(path.read_text(encoding="utf-8"))
239        except json.JSONDecodeError as exc:
240            self.error(path, f"invalid JSON: {exc}")
241            return None
242 
243 
244def main() -> int:
245    parser = argparse.ArgumentParser(description="Validate publish readiness for a research run")
246    parser.add_argument("--run-dir", type=Path, required=True)
247    parser.add_argument("--json", action="store_true")
248    args = parser.parse_args()
249 
250    result = RunValidator(args.run_dir).run()
251    if args.json:
252        print(json.dumps(result, indent=2))
253    else:
254        summary = result["summary"]
255        print(
256            f"Run validation {'passed' if result['ok'] else 'failed'}: "
257            f"{summary['errors']} errors, {summary['warnings']} warnings"
258        )
259        for finding in result["findings"]:
260            print(f"[{finding['severity']}] {finding['path']}: {finding['message']}")
261    return 0 if result["ok"] else 1
262 
263 
264if __name__ == "__main__":
265    sys.exit(main())
266
Preparing the source view

Agent Skills for Context Engineering

researcher/scripts/validate_run.py