Source from repo
Deep Research

Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.
199-biotechnologiesGitHub 199-biotechnologiesSource repo Original GitHub link
Files
Skill
n/a
Size
221.7 KB
Entrypoint
SKILL.md
Format
git-repo
Open file
scripts/verify_claim_support.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code345 linesFree
scripts/verify_claim_support.py
1#!/usr/bin/env python3
2"""
3Claim-Support Verification — checks whether evidence supports claims.
4 
5CLI subcommands:
6  verify       Check all claims against evidence, update support_status
7  report       Generate a support verification summary
8 
9Version 1 is deterministic and cheap: entity, number, date, and
10lexical-overlap checks over stored evidence. No LLM calls.
11 
12Only factual claims hard-fail on unsupported status.
13Synthesis/recommendation need traceability but softer thresholds.
14"""
15 
16import argparse
17import json
18import os
19import re
20import sys
21from collections import Counter
22from datetime import datetime, timezone
23 
24 
25# ---------------------------------------------------------------------------
26# JSONL helpers
27# ---------------------------------------------------------------------------
28 
29def read_jsonl(path: str) -> list[dict]:
30    rows = []
31    if not os.path.exists(path):
32        return rows
33    with open(path) as f:
34        for line in f:
35            line = line.strip()
36            if line:
37                rows.append(json.loads(line))
38    return rows
39 
40 
41def write_jsonl(path: str, rows: list[dict]) -> None:
42    with open(path, 'w') as f:
43        for row in rows:
44            f.write(json.dumps(row, ensure_ascii=False) + '\n')
45 
46 
47# ---------------------------------------------------------------------------
48# Support verification logic
49# ---------------------------------------------------------------------------
50 
51# Extract numbers (integers and decimals)
52NUMBER_RE = re.compile(r'\b\d+(?:\.\d+)?(?:%|x|X)?\b')
53 
54# Extract year-like numbers
55YEAR_RE = re.compile(r'\b(19|20)\d{2}\b')
56 
57# Extract capitalized entities (naive NER)
58ENTITY_RE = re.compile(r'\b[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b')
59 
60# Common stop entities to ignore
61STOP_ENTITIES = frozenset([
62    'The', 'This', 'That', 'These', 'However', 'Furthermore',
63    'Moreover', 'Additionally', 'Therefore', 'Nevertheless',
64])
65 
66 
67def extract_tokens(text: str) -> set[str]:
68    """Extract significant lowercase tokens (>3 chars)."""
69    words = re.findall(r'\b[a-z]{4,}\b', text.lower())
70    return set(words)
71 
72 
73def extract_numbers(text: str) -> set[str]:
74    """Extract numeric values."""
75    return set(NUMBER_RE.findall(text))
76 
77 
78def extract_years(text: str) -> set[str]:
79    """Extract year mentions."""
80    return set(YEAR_RE.findall(text))
81 
82 
83def extract_entities(text: str) -> set[str]:
84    """Extract capitalized entity mentions."""
85    ents = set(ENTITY_RE.findall(text))
86    return ents - STOP_ENTITIES
87 
88 
89def compute_support_score(claim_text: str, evidence_quotes: list[str]) -> tuple[str, float, str]:
90    """
91    Compute support status for a claim given its linked evidence quotes.
92 
93    Returns (status, score, notes).
94    Score range: 0.0 (no overlap) to 1.0 (strong support).
95    """
96    if not evidence_quotes:
97        return ('unsupported', 0.0, 'no evidence linked')
98 
99    claim_tokens = extract_tokens(claim_text)
100    claim_numbers = extract_numbers(claim_text)
101    claim_years = extract_years(claim_text)
102    claim_entities = extract_entities(claim_text)
103 
104    best_score = 0.0
105    best_notes = []
106 
107    for quote in evidence_quotes:
108        ev_tokens = extract_tokens(quote)
109        ev_numbers = extract_numbers(quote)
110        ev_years = extract_years(quote)
111        ev_entities = extract_entities(quote)
112 
113        # Token overlap (Jaccard-like)
114        if claim_tokens:
115            token_overlap = len(claim_tokens & ev_tokens) / len(claim_tokens)
116        else:
117            token_overlap = 0.0
118 
119        # Number match
120        if claim_numbers:
121            number_match = len(claim_numbers & ev_numbers) / len(claim_numbers)
122        else:
123            number_match = 1.0  # No numbers to check
124 
125        # Year match
126        if claim_years:
127            year_match = len(claim_years & ev_years) / len(claim_years)
128        else:
129            year_match = 1.0
130 
131        # Entity match
132        if claim_entities:
133            entity_match = len(claim_entities & ev_entities) / len(claim_entities)
134        else:
135            entity_match = 1.0
136 
137        # Weighted composite
138        score = (
139            0.4 * token_overlap +
140            0.25 * number_match +
141            0.15 * year_match +
142            0.2 * entity_match
143        )
144 
145        if score > best_score:
146            best_score = score
147            best_notes = []
148            if token_overlap < 0.3:
149                best_notes.append('low lexical overlap')
150            if claim_numbers and number_match < 0.5:
151                best_notes.append('number mismatch')
152            if claim_years and year_match < 1.0:
153                best_notes.append('year mismatch')
154            if claim_entities and entity_match < 0.3:
155                best_notes.append('entity mismatch')
156 
157    # Threshold decision
158    if best_score >= 0.6:
159        status = 'supported'
160    elif best_score >= 0.35:
161        status = 'partial'
162    else:
163        status = 'needs_review'
164 
165    notes = '; '.join(best_notes) if best_notes else 'adequate overlap'
166    return (status, round(best_score, 3), notes)
167 
168 
169# ---------------------------------------------------------------------------
170# Subcommands
171# ---------------------------------------------------------------------------
172 
173def cmd_verify(args: argparse.Namespace) -> None:
174    """Verify all claims against evidence, update claims.jsonl."""
175    claims_path = os.path.join(args.dir, 'claims.jsonl')
176    evidence_path = os.path.join(args.dir, 'evidence.jsonl')
177    sources_path = os.path.join(args.dir, 'sources.jsonl')
178 
179    claims = read_jsonl(claims_path)
180    evidence = read_jsonl(evidence_path)
181    sources = read_jsonl(sources_path)
182 
183    # Build evidence index by source_id
184    ev_by_source: dict[str, list[str]] = {}
185    ev_by_id: dict[str, dict] = {}
186    for ev in evidence:
187        sid = ev.get('source_id', '')
188        eid = ev.get('evidence_id', '')
189        ev_by_source.setdefault(sid, []).append(ev.get('quote', ''))
190        ev_by_id[eid] = ev
191 
192    # Deduplicate claims
193    seen = set()
194    unique_claims = []
195    for c in claims:
196        cid = c.get('claim_id')
197        if cid not in seen:
198            seen.add(cid)
199            unique_claims.append(c)
200 
201    verified = 0
202    updated_claims = []
203 
204    for claim in unique_claims:
205        claim_type = claim.get('claim_type', 'factual')
206 
207        # Gather evidence for this claim
208        cited_ids = claim.get('cited_source_ids', [])
209        evidence_ids = claim.get('evidence_ids', [])
210 
211        # Collect evidence quotes from linked evidence_ids
212        quotes = []
213        for eid in evidence_ids:
214            if eid in ev_by_id:
215                quotes.append(ev_by_id[eid].get('quote', ''))
216 
217        # Also gather from cited sources
218        for sid in cited_ids:
219            if sid in ev_by_source:
220                quotes.extend(ev_by_source[sid])
221 
222        if not quotes and not cited_ids and not evidence_ids:
223            # No links at all
224            if claim_type == 'speculation':
225                claim['support_status'] = 'supported'  # Speculation doesn't need evidence
226            else:
227                claim['support_status'] = 'unsupported'
228        elif not quotes:
229            # Has cited sources but no evidence captured yet
230            claim['support_status'] = 'needs_review'
231        else:
232            status, score, notes = compute_support_score(claim['text'], quotes)
233            claim['support_status'] = status
234            claim['_support_score'] = score
235            claim['_support_notes'] = notes
236 
237        verified += 1
238        updated_claims.append(claim)
239 
240    # Rewrite claims.jsonl with updated statuses
241    write_jsonl(claims_path, updated_claims)
242 
243    # Compute summary
244    status_counts = Counter(c.get('support_status') for c in updated_claims)
245    factual_unsupported = sum(
246        1 for c in updated_claims
247        if c.get('claim_type') == 'factual' and c.get('support_status') == 'unsupported'
248    )
249    total_factual = sum(1 for c in updated_claims if c.get('claim_type') == 'factual')
250 
251    # Strict mode: fail if any factual claim is unsupported
252    passed = True
253    if args.strict and factual_unsupported > 0:
254        passed = False
255 
256    print(json.dumps({
257        'status': 'pass' if passed else 'fail',
258        'verified': verified,
259        'support_status_counts': dict(status_counts),
260        'factual_unsupported': factual_unsupported,
261        'total_factual': total_factual,
262        'unsupported_rate': round(factual_unsupported / max(total_factual, 1), 3),
263    }, indent=2))
264 
265    if not passed:
266        sys.exit(1)
267 
268 
269def cmd_report(args: argparse.Namespace) -> None:
270    """Generate human-readable support verification report."""
271    claims_path = os.path.join(args.dir, 'claims.jsonl')
272    claims = read_jsonl(claims_path)
273 
274    # Deduplicate
275    seen = set()
276    unique = []
277    for c in claims:
278        cid = c.get('claim_id')
279        if cid not in seen:
280            seen.add(cid)
281            unique.append(c)
282 
283    lines = ['# Claim Support Verification Report', '']
284 
285    # Summary
286    status_counts = Counter(c.get('support_status') for c in unique)
287    type_counts = Counter(c.get('claim_type') for c in unique)
288    lines.append(f'**Total claims:** {len(unique)}')
289    lines.append(f'**By type:** {dict(type_counts)}')
290    lines.append(f'**By status:** {dict(status_counts)}')
291    lines.append('')
292 
293    # Unsupported factual claims (the failures)
294    unsupported_factual = [
295        c for c in unique
296        if c.get('claim_type') == 'factual' and c.get('support_status') in ('unsupported', 'needs_review')
297    ]
298    if unsupported_factual:
299        lines.append('## Unsupported/Review-needed Factual Claims')
300        lines.append('')
301        for c in unsupported_factual:
302            lines.append(f'- [{c["support_status"]}] `{c["section_id"]}`: {c["text"][:100]}...')
303            if c.get('_support_notes'):
304                lines.append(f'  Notes: {c["_support_notes"]}')
305        lines.append('')
306 
307    # All clear
308    if not unsupported_factual:
309        lines.append('## All factual claims have adequate support.')
310        lines.append('')
311 
312    print('\n'.join(lines))
313 
314 
315# ---------------------------------------------------------------------------
316# CLI entry point
317# ---------------------------------------------------------------------------
318 
319def main() -> None:
320    parser = argparse.ArgumentParser(
321        prog='verify_claim_support',
322        description='Claim-support verification for deep-research v3.0',
323    )
324    sub = parser.add_subparsers(dest='command', required=True)
325 
326    # verify
327    p_ver = sub.add_parser('verify', help='Verify claims against evidence')
328    p_ver.add_argument('--dir', required=True, help='Run directory')
329    p_ver.add_argument('--strict', action='store_true', help='Exit 1 if any factual claim unsupported')
330 
331    # report
332    p_rep = sub.add_parser('report', help='Generate verification report')
333    p_rep.add_argument('--dir', required=True, help='Run directory')
334 
335    args = parser.parse_args()
336    dispatch = {
337        'verify': cmd_verify,
338        'report': cmd_report,
339    }
340    dispatch[args.command](args)
341 
342 
343if __name__ == '__main__':
344    main()
345
Preparing the source view

Deep Research

scripts/verify_claim_support.py