Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/verify_claim_support.py
1#!/usr/bin/env python32"""3Claim-Support Verification — checks whether evidence supports claims.45CLI subcommands:6verify Check all claims against evidence, update support_status7report Generate a support verification summary89Version 1 is deterministic and cheap: entity, number, date, and10lexical-overlap checks over stored evidence. No LLM calls.1112Only factual claims hard-fail on unsupported status.13Synthesis/recommendation need traceability but softer thresholds.14"""1516import argparse17import json18import os19import re20import sys21from collections import Counter22from datetime import datetime, timezone232425# ---------------------------------------------------------------------------26# JSONL helpers27# ---------------------------------------------------------------------------2829def read_jsonl(path: str) -> list[dict]:30rows = []31if not os.path.exists(path):32return rows33with open(path) as f:34for line in f:35line = line.strip()36if line:37rows.append(json.loads(line))38return rows394041def write_jsonl(path: str, rows: list[dict]) -> None:42with open(path, 'w') as f:43for row in rows:44f.write(json.dumps(row, ensure_ascii=False) + '\n')454647# ---------------------------------------------------------------------------48# Support verification logic49# ---------------------------------------------------------------------------5051# Extract numbers (integers and decimals)52NUMBER_RE = re.compile(r'\b\d+(?:\.\d+)?(?:%|x|X)?\b')5354# Extract year-like numbers55YEAR_RE = re.compile(r'\b(19|20)\d{2}\b')5657# Extract capitalized entities (naive NER)58ENTITY_RE = re.compile(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b')5960# Common stop entities to ignore61STOP_ENTITIES = frozenset([62'The', 'This', 'That', 'These', 'However', 'Furthermore',63'Moreover', 'Additionally', 'Therefore', 'Nevertheless',64])656667def extract_tokens(text: str) -> set[str]:68"""Extract significant lowercase tokens (>3 chars)."""69words = re.findall(r'\b[a-z]{4,}\b', text.lower())70return set(words)717273def extract_numbers(text: str) -> set[str]:74"""Extract numeric values."""75return set(NUMBER_RE.findall(text))767778def extract_years(text: str) -> set[str]:79"""Extract year mentions."""80return set(YEAR_RE.findall(text))818283def extract_entities(text: str) -> set[str]:84"""Extract capitalized entity mentions."""85ents = set(ENTITY_RE.findall(text))86return ents - STOP_ENTITIES878889def compute_support_score(claim_text: str, evidence_quotes: list[str]) -> tuple[str, float, str]:90"""91Compute support status for a claim given its linked evidence quotes.9293Returns (status, score, notes).94Score range: 0.0 (no overlap) to 1.0 (strong support).95"""96if not evidence_quotes:97return ('unsupported', 0.0, 'no evidence linked')9899claim_tokens = extract_tokens(claim_text)100claim_numbers = extract_numbers(claim_text)101claim_years = extract_years(claim_text)102claim_entities = extract_entities(claim_text)103104best_score = 0.0105best_notes = []106107for quote in evidence_quotes:108ev_tokens = extract_tokens(quote)109ev_numbers = extract_numbers(quote)110ev_years = extract_years(quote)111ev_entities = extract_entities(quote)112113# Token overlap (Jaccard-like)114if claim_tokens:115token_overlap = len(claim_tokens & ev_tokens) / len(claim_tokens)116else:117token_overlap = 0.0118119# Number match120if claim_numbers:121number_match = len(claim_numbers & ev_numbers) / len(claim_numbers)122else:123number_match = 1.0 # No numbers to check124125# Year match126if claim_years:127year_match = len(claim_years & ev_years) / len(claim_years)128else:129year_match = 1.0130131# Entity match132if claim_entities:133entity_match = len(claim_entities & ev_entities) / len(claim_entities)134else:135entity_match = 1.0136137# Weighted composite138score = (1390.4 * token_overlap +1400.25 * number_match +1410.15 * year_match +1420.2 * entity_match143)144145if score > best_score:146best_score = score147best_notes = []148if token_overlap < 0.3:149best_notes.append('low lexical overlap')150if claim_numbers and number_match < 0.5:151best_notes.append('number mismatch')152if claim_years and year_match < 1.0:153best_notes.append('year mismatch')154if claim_entities and entity_match < 0.3:155best_notes.append('entity mismatch')156157# Threshold decision158if best_score >= 0.6:159status = 'supported'160elif best_score >= 0.35:161status = 'partial'162else:163status = 'needs_review'164165notes = '; '.join(best_notes) if best_notes else 'adequate overlap'166return (status, round(best_score, 3), notes)167168169# ---------------------------------------------------------------------------170# Subcommands171# ---------------------------------------------------------------------------172173def cmd_verify(args: argparse.Namespace) -> None:174"""Verify all claims against evidence, update claims.jsonl."""175claims_path = os.path.join(args.dir, 'claims.jsonl')176evidence_path = os.path.join(args.dir, 'evidence.jsonl')177sources_path = os.path.join(args.dir, 'sources.jsonl')178179claims = read_jsonl(claims_path)180evidence = read_jsonl(evidence_path)181sources = read_jsonl(sources_path)182183# Build evidence index by source_id184ev_by_source: dict[str, list[str]] = {}185ev_by_id: dict[str, dict] = {}186for ev in evidence:187sid = ev.get('source_id', '')188eid = ev.get('evidence_id', '')189ev_by_source.setdefault(sid, []).append(ev.get('quote', ''))190ev_by_id[eid] = ev191192# Deduplicate claims193seen = set()194unique_claims = []195for c in claims:196cid = c.get('claim_id')197if cid not in seen:198seen.add(cid)199unique_claims.append(c)200201verified = 0202updated_claims = []203204for claim in unique_claims:205claim_type = claim.get('claim_type', 'factual')206207# Gather evidence for this claim208cited_ids = claim.get('cited_source_ids', [])209evidence_ids = claim.get('evidence_ids', [])210211# Collect evidence quotes from linked evidence_ids212quotes = []213for eid in evidence_ids:214if eid in ev_by_id:215quotes.append(ev_by_id[eid].get('quote', ''))216217# Also gather from cited sources218for sid in cited_ids:219if sid in ev_by_source:220quotes.extend(ev_by_source[sid])221222if not quotes and not cited_ids and not evidence_ids:223# No links at all224if claim_type == 'speculation':225claim['support_status'] = 'supported' # Speculation doesn't need evidence226else:227claim['support_status'] = 'unsupported'228elif not quotes:229# Has cited sources but no evidence captured yet230claim['support_status'] = 'needs_review'231else:232status, score, notes = compute_support_score(claim['text'], quotes)233claim['support_status'] = status234claim['_support_score'] = score235claim['_support_notes'] = notes236237verified += 1238updated_claims.append(claim)239240# Rewrite claims.jsonl with updated statuses241write_jsonl(claims_path, updated_claims)242243# Compute summary244status_counts = Counter(c.get('support_status') for c in updated_claims)245factual_unsupported = sum(2461 for c in updated_claims247if c.get('claim_type') == 'factual' and c.get('support_status') == 'unsupported'248)249total_factual = sum(1 for c in updated_claims if c.get('claim_type') == 'factual')250251# Strict mode: fail if any factual claim is unsupported252passed = True253if args.strict and factual_unsupported > 0:254passed = False255256print(json.dumps({257'status': 'pass' if passed else 'fail',258'verified': verified,259'support_status_counts': dict(status_counts),260'factual_unsupported': factual_unsupported,261'total_factual': total_factual,262'unsupported_rate': round(factual_unsupported / max(total_factual, 1), 3),263}, indent=2))264265if not passed:266sys.exit(1)267268269def cmd_report(args: argparse.Namespace) -> None:270"""Generate human-readable support verification report."""271claims_path = os.path.join(args.dir, 'claims.jsonl')272claims = read_jsonl(claims_path)273274# Deduplicate275seen = set()276unique = []277for c in claims:278cid = c.get('claim_id')279if cid not in seen:280seen.add(cid)281unique.append(c)282283lines = ['# Claim Support Verification Report', '']284285# Summary286status_counts = Counter(c.get('support_status') for c in unique)287type_counts = Counter(c.get('claim_type') for c in unique)288lines.append(f'**Total claims:** {len(unique)}')289lines.append(f'**By type:** {dict(type_counts)}')290lines.append(f'**By status:** {dict(status_counts)}')291lines.append('')292293# Unsupported factual claims (the failures)294unsupported_factual = [295c for c in unique296if c.get('claim_type') == 'factual' and c.get('support_status') in ('unsupported', 'needs_review')297]298if unsupported_factual:299lines.append('## Unsupported/Review-needed Factual Claims')300lines.append('')301for c in unsupported_factual:302lines.append(f'- [{c["support_status"]}] `{c["section_id"]}`: {c["text"][:100]}...')303if c.get('_support_notes'):304lines.append(f' Notes: {c["_support_notes"]}')305lines.append('')306307# All clear308if not unsupported_factual:309lines.append('## All factual claims have adequate support.')310lines.append('')311312print('\n'.join(lines))313314315# ---------------------------------------------------------------------------316# CLI entry point317# ---------------------------------------------------------------------------318319def main() -> None:320parser = argparse.ArgumentParser(321prog='verify_claim_support',322description='Claim-support verification for deep-research v3.0',323)324sub = parser.add_subparsers(dest='command', required=True)325326# verify327p_ver = sub.add_parser('verify', help='Verify claims against evidence')328p_ver.add_argument('--dir', required=True, help='Run directory')329p_ver.add_argument('--strict', action='store_true', help='Exit 1 if any factual claim unsupported')330331# report332p_rep = sub.add_parser('report', help='Generate verification report')333p_rep.add_argument('--dir', required=True, help='Run directory')334335args = parser.parse_args()336dispatch = {337'verify': cmd_verify,338'report': cmd_report,339}340dispatch[args.command](args)341342343if __name__ == '__main__':344main()345