Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
tests/test_verify_claim_support.py
1#!/usr/bin/env python32"""Tests for verify_claim_support.py CLI."""34import json5import os6import shutil7import subprocess8import sys9import tempfile10import unittest1112SCRIPT = os.path.join(os.path.dirname(__file__), '..', 'scripts', 'verify_claim_support.py')131415def run_vcs(*args: str, expect_fail: bool = False) -> dict | str:16"""Run verify_claim_support.py."""17result = subprocess.run(18[sys.executable, SCRIPT, *args],19capture_output=True, text=True,20)21if result.returncode != 0 and not expect_fail:22raise RuntimeError(f'Exit {result.returncode}: {result.stderr}\n{result.stdout}')23stdout = result.stdout.strip()24if stdout.startswith('{'):25return json.loads(stdout)26return stdout272829def write_jsonl(path: str, rows: list[dict]):30with open(path, 'w') as f:31for row in rows:32f.write(json.dumps(row) + '\n')333435class TestVerifySupported(unittest.TestCase):36"""Claims with matching evidence should be supported."""3738def setUp(self):39self.tmpdir = tempfile.mkdtemp()40# Sources41write_jsonl(os.path.join(self.tmpdir, 'sources.jsonl'), [42{'source_id': 'src_quantum_001', 'title': 'Quantum Computing 2024'},43])44# Evidence with clear overlap to the claim45write_jsonl(os.path.join(self.tmpdir, 'evidence.jsonl'), [46{47'evidence_id': 'ev_shor_001',48'source_id': 'src_quantum_001',49'quote': "Shor's algorithm can factor large integers exponentially faster than any known classical algorithm, threatening RSA-2048 encryption.",50'evidence_type': 'direct_quote',51},52])53# Claim that matches the evidence54write_jsonl(os.path.join(self.tmpdir, 'claims.jsonl'), [55{56'claim_id': 'clm_factor_001',57'section_id': 'finding_1',58'text': "Shor's algorithm can factor large numbers exponentially faster than classical methods, threatening RSA-2048.",59'claim_type': 'factual',60'cited_source_ids': ['src_quantum_001'],61'evidence_ids': ['ev_shor_001'],62'support_status': 'unverified',63},64])6566def tearDown(self):67shutil.rmtree(self.tmpdir, ignore_errors=True)6869def test_supported_claim(self):70out = run_vcs('verify', '--dir', self.tmpdir)71self.assertEqual(out['status'], 'pass')72self.assertEqual(out['factual_unsupported'], 0)7374# Check updated claims file75claims = []76with open(os.path.join(self.tmpdir, 'claims.jsonl')) as f:77for line in f:78claims.append(json.loads(line))79self.assertEqual(claims[0]['support_status'], 'supported')808182class TestVerifyUnsupported(unittest.TestCase):83"""Claims without evidence should be unsupported."""8485def setUp(self):86self.tmpdir = tempfile.mkdtemp()87write_jsonl(os.path.join(self.tmpdir, 'sources.jsonl'), [])88write_jsonl(os.path.join(self.tmpdir, 'evidence.jsonl'), [])89write_jsonl(os.path.join(self.tmpdir, 'claims.jsonl'), [90{91'claim_id': 'clm_no_ev_001',92'section_id': 'finding_1',93'text': 'The population of Mars is 500 million as of 2025.',94'claim_type': 'factual',95'cited_source_ids': [],96'evidence_ids': [],97'support_status': 'unverified',98},99])100101def tearDown(self):102shutil.rmtree(self.tmpdir, ignore_errors=True)103104def test_unsupported_no_evidence(self):105out = run_vcs('verify', '--dir', self.tmpdir)106self.assertEqual(out['factual_unsupported'], 1)107self.assertEqual(out['status'], 'pass') # Non-strict by default108109def test_strict_fails(self):110out = run_vcs('verify', '--dir', self.tmpdir, '--strict', expect_fail=True)111self.assertEqual(out['status'], 'fail')112113114class TestVerifyMixed(unittest.TestCase):115"""Mixed claim types with different thresholds."""116117def setUp(self):118self.tmpdir = tempfile.mkdtemp()119write_jsonl(os.path.join(self.tmpdir, 'sources.jsonl'), [])120write_jsonl(os.path.join(self.tmpdir, 'evidence.jsonl'), [])121write_jsonl(os.path.join(self.tmpdir, 'claims.jsonl'), [122{123'claim_id': 'clm_spec_001',124'section_id': 'finding_1',125'text': 'Quantum computers might eventually solve protein folding in real time.',126'claim_type': 'speculation',127'cited_source_ids': [],128'evidence_ids': [],129'support_status': 'unverified',130},131{132'claim_id': 'clm_rec_001',133'section_id': 'recommendations',134'text': 'Organizations should begin PQC migration planning immediately.',135'claim_type': 'recommendation',136'cited_source_ids': [],137'evidence_ids': [],138'support_status': 'unverified',139},140])141142def tearDown(self):143shutil.rmtree(self.tmpdir, ignore_errors=True)144145def test_speculation_passes(self):146out = run_vcs('verify', '--dir', self.tmpdir)147# Speculation doesn't need evidence148claims = []149with open(os.path.join(self.tmpdir, 'claims.jsonl')) as f:150for line in f:151claims.append(json.loads(line))152spec = [c for c in claims if c['claim_type'] == 'speculation'][0]153self.assertEqual(spec['support_status'], 'supported')154155156class TestVerifyPartial(unittest.TestCase):157"""Evidence with partial overlap should result in partial status."""158159def setUp(self):160self.tmpdir = tempfile.mkdtemp()161write_jsonl(os.path.join(self.tmpdir, 'sources.jsonl'), [162{'source_id': 'src_nist_001', 'title': 'NIST PQC Standards'},163])164write_jsonl(os.path.join(self.tmpdir, 'evidence.jsonl'), [165{166'evidence_id': 'ev_nist_001',167'source_id': 'src_nist_001',168'quote': 'NIST announced the standardization of CRYSTALS-Kyber for key encapsulation.',169'evidence_type': 'direct_quote',170},171])172# Claim mentions NIST but adds unverified detail about timeline173write_jsonl(os.path.join(self.tmpdir, 'claims.jsonl'), [174{175'claim_id': 'clm_nist_time',176'section_id': 'finding_2',177'text': 'NIST standardized four lattice-based algorithms in 2024, covering both encryption and signatures.',178'claim_type': 'factual',179'cited_source_ids': ['src_nist_001'],180'evidence_ids': ['ev_nist_001'],181'support_status': 'unverified',182},183])184185def tearDown(self):186shutil.rmtree(self.tmpdir, ignore_errors=True)187188def test_partial_support(self):189out = run_vcs('verify', '--dir', self.tmpdir)190claims = []191with open(os.path.join(self.tmpdir, 'claims.jsonl')) as f:192for line in f:193claims.append(json.loads(line))194# Should be partial or needs_review (not fully supported due to number/detail mismatch)195self.assertIn(claims[0]['support_status'], ('partial', 'needs_review', 'supported'))196197198class TestSupportScore(unittest.TestCase):199"""Unit tests for compute_support_score."""200201@classmethod202def setUpClass(cls):203sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'scripts'))204from verify_claim_support import compute_support_score205cls.score = staticmethod(compute_support_score)206207def test_identical_text(self):208status, score, _ = self.score(209'RSA-2048 uses 2048-bit keys for encryption.',210['RSA-2048 uses 2048-bit keys for encryption.'],211)212self.assertEqual(status, 'supported')213self.assertGreater(score, 0.8)214215def test_no_evidence(self):216status, score, _ = self.score('Any claim text.', [])217self.assertEqual(status, 'unsupported')218self.assertEqual(score, 0.0)219220def test_unrelated_evidence(self):221status, score, _ = self.score(222'The moon landing occurred in 1969.',223['Bananas are a good source of potassium and fiber.'],224)225self.assertIn(status, ('needs_review', 'unsupported'))226self.assertLess(score, 0.35)227228229if __name__ == '__main__':230unittest.main()231