Source from repo
Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
muratcankoylanGitHub muratcankoylanSource repo Original GitHub link
Files
339
Skill
n/a
Size
4.3 MB
Entrypoint
SKILL.md
Format
git-repo
Open file
researcher/scripts/skill_health.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code388 linesFree
researcher/scripts/skill_health.py
1#!/usr/bin/env python3
2"""Deterministic per-skill health metrics.
3 
4Produces a quality score per skill plus a corpus aggregate. Catches drift before
5users notice: missing sections, stale claims, dead internal links, weak gotchas.
6 
7Run with no arguments for the default report. Use --json for machine output.
8Output is written to researcher/reports/skill-health.json by default and can be
9piped into the daily snapshot for longitudinal trend tracking.
10 
11This script is intentionally deterministic. It does not call any LLM and does
12not make outbound HTTP requests unless --check-urls is set (off by default).
13"""
14 
15from __future__ import annotations
16 
17import argparse
18import json
19import re
20import sys
21import urllib.error
22import urllib.request
23from dataclasses import asdict, dataclass, field
24from pathlib import Path
25from typing import Any
26 
27 
28ROOT = Path(__file__).resolve().parents[2]
29SKILLS_DIR = ROOT / "skills"
30CLAIMS_FILE = ROOT / "researcher" / "claims" / "index.jsonl"
31MECHANISMS_FILE = ROOT / "researcher" / "mechanisms" / "registry.jsonl"
32ACTIVATION_FILE = ROOT / "researcher" / "fixtures" / "activation-cases.jsonl"
33DEFAULT_OUTPUT = ROOT / "researcher" / "reports" / "skill-health.json"
34HISTORY_FILE = ROOT / "researcher" / "reports" / "skill-health-history.jsonl"
35 
36REQUIRED_SECTIONS = [
37    "## When to Activate",
38    "## Core Concepts",
39    "## Practical Guidance",
40    "## Gotchas",
41    "## Integration",
42    "## References",
43]
44 
45NUMERIC_CLAIM_PATTERNS = [
46    re.compile(r"\b\d+(?:\.\d+)?\s*%"),
47    re.compile(r"\b\d+(?:\.\d+)?\s*x\b"),
48    re.compile(r"\b\d+(?:\.\d+)?\s*ms\b"),
49    re.compile(r"\b\d+(?:\.\d+)?\s*s(?:ec|econds)?\b"),
50    re.compile(r"\b\d+(?:\.\d+)?\s*tokens?\b", re.IGNORECASE),
51    re.compile(r"\b\d+(?:k|K|M|B|x)\b"),
52]
53 
54BENCHMARK_NAMES = {
55    "LoCoMo",
56    "LongMemEval",
57    "BrowseComp",
58    "SWE-bench",
59    "RULER",
60    "DMR",
61    "HotPotQA",
62    "MMLU",
63    "GSM8K",
64    "HumanEval",
65}
66 
67CLAIM_ID_PATTERN = re.compile(r"\bclaim-[a-z0-9][a-z0-9-]*[a-z0-9]\b")
68INTERNAL_SKILL_LINK = re.compile(r"\[([^\]]+)\]\((?:\.\./)?skills/([a-z0-9-]+)/(?:[^)]*)\)")
69EXTERNAL_LINK = re.compile(r"\[(?:[^\]]+)\]\((https?://[^)]+)\)")
70FENCE_LINE = re.compile(r"^\s*```")
71 
72 
73@dataclass
74class SkillHealth:
75    name: str
76    path: str
77    line_count: int = 0
78    line_count_ok: bool = True
79    frontmatter_valid: bool = True
80    frontmatter_issues: list[str] = field(default_factory=list)
81    missing_sections: list[str] = field(default_factory=list)
82    gotcha_count: int = 0
83    code_example_count: int = 0
84    internal_links_total: int = 0
85    internal_links_resolved: int = 0
86    external_link_count: int = 0
87    external_link_results: list[dict[str, Any]] = field(default_factory=list)
88    numeric_claims_total: int = 0
89    numeric_claims_with_id: int = 0
90    claim_ids_referenced: list[str] = field(default_factory=list)
91    claim_ids_unknown: list[str] = field(default_factory=list)
92    mechanism_count: int = 0
93    activation_case_count: int = 0
94    score: float = 0.0
95    flagged: bool = False
96 
97 
98def load_jsonl(path: Path) -> list[dict[str, Any]]:
99    if not path.exists():
100        return []
101    records: list[dict[str, Any]] = []
102    for line in path.read_text(encoding="utf-8").splitlines():
103        if not line.strip():
104            continue
105        try:
106            records.append(json.loads(line))
107        except json.JSONDecodeError:
108            continue
109    return records
110 
111 
112def parse_frontmatter(text: str) -> tuple[dict[str, str], list[str]]:
113    issues: list[str] = []
114    if not text.startswith("---\n"):
115        return {}, ["missing opening frontmatter delimiter"]
116    end = text.find("\n---", 4)
117    if end == -1:
118        return {}, ["missing closing frontmatter delimiter"]
119    data: dict[str, str] = {}
120    for raw in text[4:end].splitlines():
121        if not raw.strip() or raw.startswith(" "):
122            continue
123        if ":" not in raw:
124            continue
125        key, value = raw.split(":", 1)
126        data[key.strip()] = value.strip().strip('"').strip("'")
127    return data, issues
128 
129 
130def count_gotchas(text: str) -> int:
131    match = re.search(r"^## Gotchas\s*\n(.+?)(?=\n## |\Z)", text, flags=re.DOTALL | re.MULTILINE)
132    if not match:
133        return 0
134    body = match.group(1)
135    return len(re.findall(r"^\s*\d+\.\s+", body, flags=re.MULTILINE))
136 
137 
138def count_code_examples(text: str) -> int:
139    fence_open = 0
140    for line in text.splitlines():
141        if FENCE_LINE.match(line):
142            fence_open += 1
143    return fence_open // 2
144 
145 
146def count_numeric_claims(text: str) -> int:
147    total = 0
148    for pattern in NUMERIC_CLAIM_PATTERNS:
149        total += len(pattern.findall(text))
150    for name in BENCHMARK_NAMES:
151        total += text.count(name)
152    return total
153 
154 
155def collect_internal_links(text: str) -> tuple[int, int]:
156    matches = INTERNAL_SKILL_LINK.findall(text)
157    total = len(matches)
158    resolved = 0
159    for _, target in matches:
160        if (SKILLS_DIR / target / "SKILL.md").exists():
161            resolved += 1
162    return total, resolved
163 
164 
165def collect_external_links(text: str, check_urls: bool, timeout: float) -> tuple[int, list[dict[str, Any]]]:
166    urls = EXTERNAL_LINK.findall(text)
167    results: list[dict[str, Any]] = []
168    if not check_urls:
169        return len(urls), results
170    for url in urls:
171        try:
172            request = urllib.request.Request(url, method="HEAD", headers={"User-Agent": "skill-health/0.1"})
173            with urllib.request.urlopen(request, timeout=timeout) as response:
174                results.append({"url": url, "status": response.status})
175        except urllib.error.HTTPError as exc:
176            results.append({"url": url, "status": exc.code, "error": str(exc)})
177        except (urllib.error.URLError, TimeoutError) as exc:
178            results.append({"url": url, "status": "unreachable", "error": str(exc)})
179    return len(urls), results
180 
181 
182def collect_claim_ids(text: str, known_claim_ids: set[str]) -> tuple[list[str], list[str], int]:
183    found = list(set(CLAIM_ID_PATTERN.findall(text)))
184    unknown = [cid for cid in found if cid not in known_claim_ids]
185    return sorted(found), sorted(unknown), len(found)
186 
187 
188def normalize(value: int, target: int) -> float:
189    if target <= 0:
190        return 0.0
191    return min(1.0, value / target)
192 
193 
194def compute_score(record: SkillHealth) -> float:
195    required_section_score = (len(REQUIRED_SECTIONS) - len(record.missing_sections)) / len(REQUIRED_SECTIONS)
196    gotcha_score = normalize(record.gotcha_count, target=3)
197    code_score = normalize(record.code_example_count, target=2)
198    if record.internal_links_total:
199        internal_score = record.internal_links_resolved / record.internal_links_total
200    else:
201        internal_score = 1.0
202    if record.numeric_claims_total:
203        claim_score = record.numeric_claims_with_id / record.numeric_claims_total
204    else:
205        claim_score = 1.0
206    activation_score = 1.0 if record.activation_case_count >= 1 else 0.5
207    mechanism_score = 1.0 if record.mechanism_count >= 1 else 0.5
208    frontmatter_score = 1.0 if record.frontmatter_valid else 0.0
209 
210    return round(
211        0.20 * required_section_score
212        + 0.15 * gotcha_score
213        + 0.10 * code_score
214        + 0.15 * internal_score
215        + 0.10 * activation_score
216        + 0.15 * claim_score
217        + 0.10 * mechanism_score
218        + 0.05 * frontmatter_score,
219        4,
220    )
221 
222 
223def evaluate_skill(
224    skill_dir: Path,
225    known_claim_ids: set[str],
226    mechanism_owners: dict[str, int],
227    activation_owners: dict[str, int],
228    check_urls: bool,
229    url_timeout: float,
230) -> SkillHealth:
231    skill_file = skill_dir / "SKILL.md"
232    record = SkillHealth(
233        name=skill_dir.name,
234        path=str(skill_file.relative_to(ROOT)),
235    )
236    if not skill_file.exists():
237        record.frontmatter_valid = False
238        record.frontmatter_issues.append("SKILL.md missing")
239        record.flagged = True
240        return record
241 
242    text = skill_file.read_text(encoding="utf-8")
243    lines = text.splitlines()
244    record.line_count = len(lines)
245    record.line_count_ok = record.line_count <= 500
246 
247    frontmatter, issues = parse_frontmatter(text)
248    record.frontmatter_issues = issues
249    name = frontmatter.get("name", "")
250    description = frontmatter.get("description", "")
251    if not name:
252        issues.append("missing name")
253    elif name != skill_dir.name:
254        issues.append(f"name '{name}' does not match directory '{skill_dir.name}'")
255    if not description:
256        issues.append("missing description")
257    elif len(description) > 1024:
258        issues.append("description exceeds 1024 characters")
259    if re.search(r"\b(I can|Use me|You can use this)\b", description):
260        issues.append("description may not be third person")
261    record.frontmatter_valid = not issues and bool(name) and bool(description)
262 
263    for section in REQUIRED_SECTIONS:
264        if section not in text:
265            record.missing_sections.append(section)
266 
267    record.gotcha_count = count_gotchas(text)
268    record.code_example_count = count_code_examples(text)
269    record.internal_links_total, record.internal_links_resolved = collect_internal_links(text)
270    record.external_link_count, record.external_link_results = collect_external_links(text, check_urls, url_timeout)
271 
272    record.numeric_claims_total = count_numeric_claims(text)
273    found, unknown, with_id = collect_claim_ids(text, known_claim_ids)
274    record.claim_ids_referenced = found
275    record.claim_ids_unknown = unknown
276    record.numeric_claims_with_id = with_id
277 
278    record.mechanism_count = mechanism_owners.get(skill_dir.name, 0)
279    record.activation_case_count = activation_owners.get(skill_dir.name, 0)
280 
281    record.score = compute_score(record)
282    record.flagged = record.score < 0.75 or not record.line_count_ok or bool(record.missing_sections) or not record.frontmatter_valid
283    return record
284 
285 
286def build_report(check_urls: bool, url_timeout: float) -> dict[str, Any]:
287    if not SKILLS_DIR.exists():
288        return {
289            "ok": False,
290            "error": f"skills directory missing at {SKILLS_DIR}",
291            "skills": [],
292        }
293 
294    claims = load_jsonl(CLAIMS_FILE)
295    known_claim_ids = {record.get("claim_id") for record in claims if record.get("claim_id")}
296 
297    mechanisms = load_jsonl(MECHANISMS_FILE)
298    mechanism_owners: dict[str, int] = {}
299    for entry in mechanisms:
300        owner = entry.get("owning_skill")
301        if isinstance(owner, str):
302            mechanism_owners[owner] = mechanism_owners.get(owner, 0) + 1
303 
304    activation_owners: dict[str, int] = {}
305    for entry in load_jsonl(ACTIVATION_FILE):
306        owner = entry.get("expected_primary_skill")
307        if isinstance(owner, str):
308            activation_owners[owner] = activation_owners.get(owner, 0) + 1
309 
310    skills: list[SkillHealth] = []
311    for skill_dir in sorted(p for p in SKILLS_DIR.iterdir() if p.is_dir()):
312        skills.append(
313            evaluate_skill(
314                skill_dir,
315                known_claim_ids,
316                mechanism_owners,
317                activation_owners,
318                check_urls,
319                url_timeout,
320            )
321        )
322 
323    scores = [record.score for record in skills]
324    corpus_score = round(sum(scores) / len(scores), 4) if scores else 0.0
325    flagged = sum(1 for record in skills if record.flagged)
326 
327    return {
328        "ok": flagged == 0,
329        "summary": {
330            "skill_count": len(skills),
331            "corpus_score": corpus_score,
332            "min_score": min(scores) if scores else 0.0,
333            "max_score": max(scores) if scores else 0.0,
334            "flagged": flagged,
335            "known_claim_ids": len(known_claim_ids),
336            "mechanism_owners": mechanism_owners,
337            "activation_owners": activation_owners,
338        },
339        "skills": [asdict(record) for record in skills],
340    }
341 
342 
343def main() -> int:
344    parser = argparse.ArgumentParser(description="Deterministic skill health report")
345    parser.add_argument("--json", action="store_true", help="print machine-readable JSON to stdout")
346    parser.add_argument("--output", type=Path, default=DEFAULT_OUTPUT, help="path to write the JSON report")
347    parser.add_argument("--check-urls", action="store_true", help="perform HEAD requests on external URLs")
348    parser.add_argument("--url-timeout", type=float, default=10.0, help="seconds per external HEAD request")
349    parser.add_argument("--strict", action="store_true", help="exit non-zero if any skill is flagged")
350    parser.add_argument("--no-history", action="store_true", help="do not append a one-line summary to the history file")
351    args = parser.parse_args()
352 
353    report = build_report(args.check_urls, args.url_timeout)
354    args.output.parent.mkdir(parents=True, exist_ok=True)
355    args.output.write_text(json.dumps(report, indent=2) + "\n", encoding="utf-8")
356 
357    if not args.no_history:
358        HISTORY_FILE.parent.mkdir(parents=True, exist_ok=True)
359        history_entry = {
360            "ok": report.get("ok"),
361            "summary": report.get("summary"),
362        }
363        with HISTORY_FILE.open("a", encoding="utf-8") as handle:
364            handle.write(json.dumps(history_entry, sort_keys=True) + "\n")
365 
366    if args.json:
367        print(json.dumps(report, indent=2))
368    else:
369        summary = report.get("summary", {})
370        print(
371            f"Skill health: corpus={summary.get('corpus_score', 0.0)} "
372            f"min={summary.get('min_score', 0.0)} "
373            f"flagged={summary.get('flagged', 0)} "
374            f"skills={summary.get('skill_count', 0)}"
375        )
376        for record in report.get("skills", []):
377            marker = "FLAG" if record["flagged"] else "ok  "
378            print(f"  {marker} {record['name']:<28} score={record['score']:.3f} lines={record['line_count']}")
379        print(f"Wrote report to {args.output.relative_to(ROOT)}")
380 
381    if args.strict and not report.get("ok"):
382        return 1
383    return 0
384 
385 
386if __name__ == "__main__":
387    sys.exit(main())
388
Preparing the source view

Agent Skills for Context Engineering

researcher/scripts/skill_health.py