Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
tests/test_extract_claims.py
1#!/usr/bin/env python32"""Tests for extract_claims.py CLI."""34import json5import os6import shutil7import subprocess8import sys9import tempfile10import unittest1112SCRIPT = os.path.join(os.path.dirname(__file__), '..', 'scripts', 'extract_claims.py')13FIXTURES = os.path.join(os.path.dirname(__file__), 'fixtures')141516def run_ec(*args: str) -> dict | list:17"""Run extract_claims.py with args."""18result = subprocess.run(19[sys.executable, SCRIPT, *args],20capture_output=True, text=True,21)22if result.returncode != 0:23raise RuntimeError(f'Exit {result.returncode}: {result.stderr}')24return json.loads(result.stdout)252627SAMPLE_REPORT = """\28---29title: Test Research Report30---3132## Executive Summary3334This report examines the impact of quantum computing on cryptography [1, 2]. The field has advanced significantly since 2020, with major breakthroughs in error correction.3536## Introduction3738Quantum computing represents a paradigm shift in computational capability. Researchers at Google demonstrated quantum supremacy in 2019 using a 53-qubit processor [3]. This milestone confirmed theoretical predictions made decades earlier.3940## Finding 14142The Shor algorithm can factor large numbers exponentially faster than classical methods [4]. Current RSA-2048 encryption could be broken by a sufficiently large quantum computer. However, such machines are estimated to require millions of physical qubits [5, 6].4344## Finding 24546Post-quantum cryptography standards should be adopted within the next 5 years. Organizations should consider hybrid classical-quantum approaches during the transition period. NIST has already standardized several lattice-based algorithms [7].4748## Synthesis4950Taken together, the evidence suggests that quantum computing poses a real but manageable threat to current cryptographic systems. The timeline for practical quantum attacks remains uncertain, but proactive migration reduces risk substantially.5152## Recommendations5354Organizations should begin evaluating post-quantum cryptography solutions immediately. Security teams should conduct a cryptographic inventory to identify vulnerable systems. Companies should consider implementing crypto-agility frameworks to enable rapid algorithm switching.5556## Bibliography5758[1] Smith et al. (2023). Quantum Computing Advances.59[2] Johnson (2024). Cryptographic Implications.60"""616263class TestExtract(unittest.TestCase):64def setUp(self):65self.tmpdir = tempfile.mkdtemp()66# Create empty claims.jsonl67open(os.path.join(self.tmpdir, 'claims.jsonl'), 'w').close()68# Write sample report69self.report_path = os.path.join(self.tmpdir, 'report.md')70with open(self.report_path, 'w') as f:71f.write(SAMPLE_REPORT)7273def tearDown(self):74shutil.rmtree(self.tmpdir, ignore_errors=True)7576def test_extract_finds_claims(self):77out = run_ec('extract', '--report', self.report_path, '--dir', self.tmpdir)78self.assertEqual(out['status'], 'ok')79self.assertGreater(out['claims_added'], 5)8081def test_extract_idempotent(self):82out1 = run_ec('extract', '--report', self.report_path, '--dir', self.tmpdir)83out2 = run_ec('extract', '--report', self.report_path, '--dir', self.tmpdir)84self.assertEqual(out2['claims_added'], 0)85self.assertEqual(out2['claims_skipped'], out1['claims_added'])8687def test_claim_types_assigned(self):88run_ec('extract', '--report', self.report_path, '--dir', self.tmpdir)89out = run_ec('stats', '--dir', self.tmpdir)90# Should have at least factual and recommendation types91self.assertIn('factual', out['by_type'])92self.assertIn('recommendation', out['by_type'])9394def test_sections_detected(self):95run_ec('extract', '--report', self.report_path, '--dir', self.tmpdir)96out = run_ec('stats', '--dir', self.tmpdir)97self.assertIn('finding_1', out['by_section'])98self.assertIn('finding_2', out['by_section'])99self.assertIn('recommendations', out['by_section'])100101102class TestAdd(unittest.TestCase):103def setUp(self):104self.tmpdir = tempfile.mkdtemp()105open(os.path.join(self.tmpdir, 'claims.jsonl'), 'w').close()106107def tearDown(self):108shutil.rmtree(self.tmpdir, ignore_errors=True)109110def test_add_and_dedup(self):111claim = json.dumps({112'section_id': 'finding_1',113'text': 'Quantum computers can break RSA encryption.',114'claim_type': 'factual',115})116out1 = run_ec('add', '--json', claim, '--dir', self.tmpdir)117self.assertEqual(out1['status'], 'added')118self.assertEqual(len(out1['claim_id']), 16)119120out2 = run_ec('add', '--json', claim, '--dir', self.tmpdir)121self.assertEqual(out2['status'], 'duplicate')122123def test_add_with_sources(self):124claim = json.dumps({125'section_id': 'finding_1',126'text': 'NIST standardized CRYSTALS-Kyber in 2024.',127'claim_type': 'factual',128'cited_source_ids': ['abcdef0123456789'],129'evidence_ids': ['1234567890abcdef'],130})131out = run_ec('add', '--json', claim, '--dir', self.tmpdir)132self.assertEqual(out['status'], 'added')133134135class TestListAndStats(unittest.TestCase):136def setUp(self):137self.tmpdir = tempfile.mkdtemp()138open(os.path.join(self.tmpdir, 'claims.jsonl'), 'w').close()139# Add mixed claims140for sec, text, ctype in [141('finding_1', 'The sky appears blue due to Rayleigh scattering.', 'factual'),142('finding_1', 'Light wavelengths scatter differently in the atmosphere.', 'factual'),143('synthesis', 'Overall, atmospheric optics explains most visual phenomena.', 'synthesis'),144('recommendations', 'Researchers should investigate polarization effects further.', 'recommendation'),145]:146run_ec('add', '--json', json.dumps({147'section_id': sec, 'text': text, 'claim_type': ctype,148}), '--dir', self.tmpdir)149150def tearDown(self):151shutil.rmtree(self.tmpdir, ignore_errors=True)152153def test_list_all(self):154out = run_ec('list', '--dir', self.tmpdir)155self.assertEqual(out['count'], 4)156157def test_list_by_section(self):158out = run_ec('list', '--dir', self.tmpdir, '--section', 'finding_1')159self.assertEqual(out['count'], 2)160161def test_list_by_type(self):162out = run_ec('list', '--dir', self.tmpdir, '--type', 'recommendation')163self.assertEqual(out['count'], 1)164165def test_stats(self):166out = run_ec('stats', '--dir', self.tmpdir)167self.assertEqual(out['total'], 4)168self.assertEqual(out['by_type']['factual'], 2)169self.assertEqual(out['by_type']['synthesis'], 1)170self.assertEqual(out['by_type']['recommendation'], 1)171172173class TestClaimID(unittest.TestCase):174"""Unit tests for compute_claim_id."""175176@classmethod177def setUpClass(cls):178sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', 'scripts'))179from extract_claims import compute_claim_id, classify_claim180cls.compute_id = staticmethod(compute_claim_id)181cls.classify = staticmethod(classify_claim)182183def test_deterministic(self):184id1 = self.compute_id('finding_1', 'Test claim.')185id2 = self.compute_id('finding_1', 'Test claim.')186self.assertEqual(id1, id2)187188def test_section_matters(self):189id1 = self.compute_id('finding_1', 'Same text.')190id2 = self.compute_id('finding_2', 'Same text.')191self.assertNotEqual(id1, id2)192193def test_classify_recommendation(self):194self.assertEqual(195self.classify('Organizations should adopt PQC immediately.', 'recommendations'),196'recommendation',197)198199def test_classify_factual(self):200self.assertEqual(201self.classify('RSA-2048 uses 2048-bit keys.', 'finding_1'),202'factual',203)204205def test_classify_synthesis(self):206self.assertEqual(207self.classify('Taken together, the results indicate a clear trend.', 'synthesis'),208'synthesis',209)210211212if __name__ == '__main__':213unittest.main()214