Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/validate_report.py
1#!/usr/bin/env python32"""3Report Validation Script4Ensures research reports meet quality standards before delivery5"""67import argparse8import re9import sys10from pathlib import Path11from typing import List, Tuple, Dict121314class ReportValidator:15"""Validates research report quality"""1617def __init__(self, report_path: Path):18self.report_path = report_path19self.content = self._read_report()20self.errors: List[str] = []21self.warnings: List[str] = []2223def _read_report(self) -> str:24"""Read report file"""25try:26with open(self.report_path, 'r', encoding='utf-8') as f:27return f.read()28except Exception as e:29print(f"❌ ERROR: Cannot read report: {e}")30sys.exit(1)3132def validate(self) -> bool:33"""Run all validation checks"""34print(f"\n{'='*60}")35print(f"VALIDATING REPORT: {self.report_path.name}")36print(f"{'='*60}\n")3738checks = [39("Executive Summary", self._check_executive_summary),40("Required Sections", self._check_required_sections),41("Citations", self._check_citations),42("Bibliography", self._check_bibliography),43("Placeholder Text", self._check_placeholders),44("Content Truncation", self._check_content_truncation),45("Word Count", self._check_word_count),46("Source Count", self._check_source_count),47("Broken Links", self._check_broken_references),48]4950for check_name, check_func in checks:51print(f"⏳ Checking: {check_name}...", end=" ")52passed = check_func()53if passed:54print("✅ PASS")55else:56print("❌ FAIL")5758self._print_summary()5960return len(self.errors) == 06162def _check_executive_summary(self) -> bool:63"""Check executive summary exists and is 200-400 words"""64pattern = r'## Executive Summary(.*?)(?=##|\Z)'65match = re.search(pattern, self.content, re.DOTALL | re.IGNORECASE)6667if not match:68self.errors.append("Missing 'Executive Summary' section")69return False7071summary = match.group(1).strip()72word_count = len(summary.split())7374if word_count > 400:75self.warnings.append(f"Executive summary too long: {word_count} words (should be ≤400)")7677if word_count < 50:78self.warnings.append(f"Executive summary too short: {word_count} words (should be ≥50)")7980return True8182def _check_required_sections(self) -> bool:83"""Check all required sections are present"""84required = [85"Executive Summary",86"Introduction",87"Main Analysis",88"Synthesis",89"Limitations",90"Recommendations",91"Bibliography",92"Methodology"93]9495# Recommended sections (warnings if missing, not errors)96recommended = [97"Counterevidence Register",98"Claims-Evidence Table"99]100101missing = []102for section in required:103if not re.search(rf'##.*{section}', self.content, re.IGNORECASE):104missing.append(section)105106if missing:107self.errors.append(f"Missing sections: {', '.join(missing)}")108return False109110# Check recommended sections (warnings only)111missing_recommended = []112for section in recommended:113if not re.search(rf'##.*{section}', self.content, re.IGNORECASE):114missing_recommended.append(section)115116if missing_recommended:117self.warnings.append(f"Missing recommended sections (for academic rigor): {', '.join(missing_recommended)}")118119return True120121def _check_citations(self) -> bool:122"""Check citation format and presence"""123# Find all citation references [1], [2], etc.124citations = re.findall(r'\[(\d+)\]', self.content)125126if not citations:127self.errors.append("No citations found in report")128return False129130unique_citations = set(citations)131132if len(unique_citations) < 10:133self.warnings.append(f"Only {len(unique_citations)} unique sources cited (recommended: ≥10)")134135# Check for consecutive citation numbers136citation_nums = sorted([int(c) for c in unique_citations])137if citation_nums:138max_citation = max(citation_nums)139expected = set(range(1, max_citation + 1))140missing = expected - set(citation_nums)141142if missing:143self.warnings.append(f"Non-consecutive citation numbers, missing: {sorted(missing)}")144145return True146147def _check_bibliography(self) -> bool:148"""Check bibliography exists, matches citations, and has no truncation placeholders"""149pattern = r'## Bibliography(.*?)(?=##|\Z)'150match = re.search(pattern, self.content, re.DOTALL | re.IGNORECASE)151152if not match:153self.errors.append("Missing 'Bibliography' section")154return False155156bib_section = match.group(1)157158# CRITICAL: Check for truncation placeholders (2025 CiteGuard enhancement)159truncation_patterns = [160(r'\[\d+-\d+\]', 'Citation range (e.g., [8-75])'),161(r'Additional.*citations', 'Phrase "Additional citations"'),162(r'would be included', 'Phrase "would be included"'),163(r'\[\.\.\.continue', 'Pattern "[...continue"'),164(r'\[Continue with', 'Pattern "[Continue with"'),165(r'etc\.(?!\w)', 'Standalone "etc."'),166(r'and so on', 'Phrase "and so on"'),167]168169for pattern_re, description in truncation_patterns:170if re.search(pattern_re, bib_section, re.IGNORECASE):171self.errors.append(f"⚠️ CRITICAL: Bibliography contains truncation placeholder: {description}")172self.errors.append(f" This makes the report UNUSABLE - complete bibliography required")173return False174175# Count bibliography entries [1], [2], etc.176bib_entries = re.findall(r'^\[(\d+)\]', bib_section, re.MULTILINE)177178if not bib_entries:179self.errors.append("Bibliography has no entries")180return False181182# Check citation number continuity (no gaps)183bib_nums = sorted([int(n) for n in bib_entries])184if bib_nums:185expected = list(range(1, bib_nums[-1] + 1))186actual = bib_nums187missing = [n for n in expected if n not in actual]188if missing:189self.errors.append(f"Bibliography has gaps in numbering: missing {missing}")190return False191192# Find citations in text193text_citations = set(re.findall(r'\[(\d+)\]', self.content))194bib_citations = set(bib_entries)195196# Check all citations have bibliography entries197missing_in_bib = text_citations - bib_citations198if missing_in_bib:199self.errors.append(f"Citations missing from bibliography: {sorted(missing_in_bib)}")200return False201202# Check for unused bibliography entries203unused = bib_citations - text_citations204if unused:205self.warnings.append(f"Unused bibliography entries: {sorted(unused)}")206207return True208209def _check_placeholders(self) -> bool:210"""Check for placeholder text that shouldn't be in final report"""211placeholders = [212'TBD', 'TODO', 'FIXME', 'XXX',213'[citation needed]', '[needs citation]',214'[placeholder]', '[TODO]', '[TBD]'215]216217found_placeholders = []218for placeholder in placeholders:219if placeholder in self.content:220found_placeholders.append(placeholder)221222if found_placeholders:223self.errors.append(f"Found placeholder text: {', '.join(found_placeholders)}")224return False225226return True227228def _check_content_truncation(self) -> bool:229"""Check for content truncation patterns (2025 Progressive Assembly enhancement)"""230truncation_patterns = [231(r'Content continues', 'Phrase "Content continues"'),232(r'Due to length', 'Phrase "Due to length"'),233(r'would continue', 'Phrase "would continue"'),234(r'\[Sections \d+-\d+', 'Pattern "[Sections X-Y"'),235(r'Additional sections', 'Phrase "Additional sections"'),236(r'comprehensive.*word document that continues', 'Pattern "comprehensive...document that continues"'),237]238239for pattern_re, description in truncation_patterns:240if re.search(pattern_re, self.content, re.IGNORECASE):241self.errors.append(f"⚠️ CRITICAL: Content truncation detected: {description}")242self.errors.append(f" Report is INCOMPLETE and UNUSABLE - regenerate with progressive assembly")243return False244245return True246247def _check_word_count(self) -> bool:248"""Check overall report length"""249word_count = len(self.content.split())250251if word_count < 500:252self.warnings.append(f"Report is very short: {word_count} words (consider expanding)")253# No upper limit warning - progressive assembly supports unlimited lengths254255return True256257def _check_source_count(self) -> bool:258"""Check minimum source count"""259pattern = r'## Bibliography(.*?)(?=##|\Z)'260match = re.search(pattern, self.content, re.DOTALL | re.IGNORECASE)261262if not match:263return True # Already caught in bibliography check264265bib_section = match.group(1)266bib_entries = re.findall(r'^\[(\d+)\]', bib_section, re.MULTILINE)267268source_count = len(set(bib_entries))269270if source_count < 10:271self.warnings.append(f"Only {source_count} sources (recommended: ≥10)")272273return True274275def _check_broken_references(self) -> bool:276"""Check for broken internal references"""277# Find all markdown links [text](./path)278internal_links = re.findall(r'\[.*?\]\((\.\/.*?)\)', self.content)279280broken = []281for link in internal_links:282# Remove anchor if present283link_path = link.split('#')[0]284full_path = self.report_path.parent / link_path285286if not full_path.exists():287broken.append(link)288289if broken:290self.errors.append(f"Broken internal links: {', '.join(broken)}")291return False292293return True294295def _print_summary(self):296"""Print validation summary"""297print(f"\n{'='*60}")298print(f"VALIDATION SUMMARY")299print(f"{'='*60}\n")300301if self.errors:302print(f"❌ ERRORS ({len(self.errors)}):")303for error in self.errors:304print(f" • {error}")305print()306307if self.warnings:308print(f"⚠️ WARNINGS ({len(self.warnings)}):")309for warning in self.warnings:310print(f" • {warning}")311print()312313if not self.errors and not self.warnings:314print("✅ ALL CHECKS PASSED - Report meets quality standards!\n")315elif not self.errors:316print("✅ VALIDATION PASSED (with warnings)\n")317else:318print("❌ VALIDATION FAILED - Please fix errors before delivery\n")319320321def main():322parser = argparse.ArgumentParser(323description="Validate research report quality",324formatter_class=argparse.RawDescriptionHelpFormatter,325epilog="""326Examples:327python validate_report.py --report report.md328python validate_report.py -r ~/.claude/research_output/research_report_20251104_153045.md329"""330)331332parser.add_argument(333'--report', '-r',334type=str,335required=True,336help='Path to research report markdown file'337)338339args = parser.parse_args()340341report_path = Path(args.report)342343if not report_path.exists():344print(f"❌ ERROR: Report file not found: {report_path}")345sys.exit(1)346347validator = ReportValidator(report_path)348passed = validator.validate()349350sys.exit(0 if passed else 1)351352353if __name__ == '__main__':354main()355