Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/research_engine.py
1#!/usr/bin/env python32"""3Deep Research Engine — STATE SCAFFOLD (not a runtime orchestrator)45This file provides phase instruction templates and research state persistence.6It does NOT drive Claude Code — Claude is the orchestrator; this file provides7data structures and CLI utilities for state management.89For the actual research workflow, see reference/methodology.md.10For the evidence substrate, see scripts/citation_manager.py and scripts/evidence_store.py.11"""1213import argparse14import json15import sys16import time17from datetime import datetime18from pathlib import Path19from typing import Dict, List, Optional, Any20from dataclasses import dataclass, asdict21from enum import Enum222324class ResearchPhase(Enum):25"""Research pipeline phases"""26SCOPE = "scope"27PLAN = "plan"28RETRIEVE = "retrieve"29TRIANGULATE = "triangulate"30SYNTHESIZE = "synthesize"31CRITIQUE = "critique"32REFINE = "refine"33PACKAGE = "package"343536class ResearchMode(Enum):37"""Research depth modes"""38QUICK = "quick" # 3 phases: scope, retrieve, package39STANDARD = "standard" # 6 phases: skip refine and critique40DEEP = "deep" # Full 8 phases41ULTRADEEP = "ultradeep" # 8 phases + extended iterations424344@dataclass45class Source:46"""Represents a research source"""47url: str48title: str49snippet: str50retrieved_at: str51credibility_score: float = 0.052source_type: str = "web" # web, academic, documentation, code53verification_status: str = "unverified" # unverified, verified, conflicted5455def to_citation(self, index: int) -> str:56"""Generate citation string"""57return f"[{index}] {self.title} - {self.url} (Retrieved: {self.retrieved_at})"585960@dataclass61class ResearchState:62"""Maintains research state across phases"""63query: str64mode: ResearchMode65phase: ResearchPhase66scope: Dict[str, Any]67plan: Dict[str, Any]68sources: List[Source]69findings: List[Dict[str, Any]]70synthesis: Dict[str, Any]71critique: Dict[str, Any]72report: str73metadata: Dict[str, Any]7475def save(self, filepath: Path):76"""Save research state to file with retry logic"""77max_retries = 378for attempt in range(max_retries):79try:80with open(filepath, 'w') as f:81json.dump(self._serialize(), f, indent=2)82return # Success83except (IOError, OSError) as e:84if attempt == max_retries - 1:85# Final attempt failed86raise IOError(f"Failed to save state after {max_retries} attempts: {e}")87# Wait with exponential backoff before retry88wait_time = (attempt + 1) * 0.5 # 0.5s, 1s, 1.5s89time.sleep(wait_time)9091def _serialize(self) -> dict:92"""Convert to serializable dict"""93return {94'query': self.query,95'mode': self.mode.value,96'phase': self.phase.value,97'scope': self.scope,98'plan': self.plan,99'sources': [asdict(s) for s in self.sources],100'findings': self.findings,101'synthesis': self.synthesis,102'critique': self.critique,103'report': self.report,104'metadata': self.metadata105}106107@classmethod108def load(cls, filepath: Path) -> 'ResearchState':109"""Load research state from file"""110with open(filepath, 'r') as f:111data = json.load(f)112113return cls(114query=data['query'],115mode=ResearchMode(data['mode']),116phase=ResearchPhase(data['phase']),117scope=data['scope'],118plan=data['plan'],119sources=[Source(**s) for s in data['sources']],120findings=data['findings'],121synthesis=data['synthesis'],122critique=data['critique'],123report=data['report'],124metadata=data['metadata']125)126127128class ResearchEngine:129"""Main research orchestration engine"""130131def __init__(self, mode: ResearchMode = ResearchMode.STANDARD):132self.mode = mode133self.state: Optional[ResearchState] = None134self.output_dir = Path.home() / ".claude" / "research_output"135self.output_dir.mkdir(parents=True, exist_ok=True)136137def initialize_research(self, query: str) -> ResearchState:138"""Initialize new research session"""139self.state = ResearchState(140query=query,141mode=self.mode,142phase=ResearchPhase.SCOPE,143scope={},144plan={},145sources=[],146findings=[],147synthesis={},148critique={},149report="",150metadata={151'started_at': datetime.now().isoformat(),152'version': '1.0'153}154)155return self.state156157def get_phase_instructions(self, phase: ResearchPhase) -> str:158"""Get instructions for current phase"""159instructions = {160ResearchPhase.SCOPE: """161# Phase 1: SCOPE162163Your task: Define research boundaries and success criteria164165## Execute:1661. Decompose the question into 3-5 core components1672. Identify 2-4 key stakeholder perspectives1683. Define what's IN scope and what's OUT of scope1694. List 3-5 success criteria for this research1705. Document 3-5 assumptions that need validation171172## Output Format:173```json174{175"core_components": ["component1", "component2", ...],176"stakeholder_perspectives": ["perspective1", "perspective2", ...],177"in_scope": ["item1", "item2", ...],178"out_of_scope": ["item1", "item2", ...],179"success_criteria": ["criteria1", "criteria2", ...],180"assumptions": ["assumption1", "assumption2", ...]181}182```183184Use extended reasoning to explore multiple framings before finalizing scope.185""",186ResearchPhase.PLAN: """187# Phase 2: PLAN188189Your task: Create intelligent research roadmap190191## Execute:1921. Identify 5-10 primary sources to investigate1932. List 5-10 secondary/backup sources1943. Map knowledge dependencies (what must be understood first)1954. Create 10-15 search query variations1965. Plan triangulation approach (how to verify claims)1976. Define 3-5 quality gates198199## Output Format:200```json201{202"primary_sources": ["source_type1", "source_type2", ...],203"secondary_sources": ["source_type1", "source_type2", ...],204"knowledge_dependencies": {"concept1": ["prerequisite1", "prerequisite2"], ...},205"search_queries": ["query1", "query2", ...],206"triangulation_strategy": "description of verification approach",207"quality_gates": ["gate1", "gate2", ...]208}209```210211Use Graph-of-Thoughts: branch into 3-4 potential research paths, evaluate, then converge on optimal strategy.212""",213ResearchPhase.RETRIEVE: """214# Phase 3: RETRIEVE215216Your task: Systematically collect information from multiple sources217218## Execute:2191. Use WebSearch with iterative query refinement (minimum 10 searches)2202. Use WebFetch to deep-dive into 5-10 most promising sources2213. Extract key passages with metadata2224. Track information gaps2235. Follow 2-3 promising tangents2246. Ensure source diversity (different domains, perspectives)225226## Tools to Use:227- WebSearch: For current information and broad coverage228- WebFetch: For detailed extraction from specific URLs229- Grep/Read: For local documentation if relevant230- Task: Spawn 2-3 parallel retrieval agents for efficiency231232## Output:233Store all sources with metadata. Each source should include:234- URL/location235- Title236- Key excerpts237- Relevance score238- Source type239- Retrieved timestamp240241Aim for 15-30 distinct sources minimum.242""",243ResearchPhase.TRIANGULATE: """244# Phase 4: TRIANGULATE245246Your task: Validate information across multiple independent sources247248## Execute:2491. List all major claims from retrieved information2502. For each claim, find 3+ independent confirmatory sources2513. Flag any contradictions or uncertainties2524. Assess source credibility (domain expertise, recency, bias)2535. Document consensus areas vs. debate areas2546. Mark verification status for each claim255256## Quality Standards:257- Core claims MUST have 3+ independent sources258- Flag any single-source claims as "unverified"259- Note information recency260- Identify potential biases261262## Output Format:263```json264{265"verified_claims": [266{267"claim": "statement",268"sources": ["source1", "source2", "source3"],269"confidence": "high|medium|low"270}271],272"unverified_claims": [...],273"contradictions": [274{275"topic": "what's contradicted",276"viewpoint1": {"claim": "...", "sources": [...]},277"viewpoint2": {"claim": "...", "sources": [...]}278}279]280}281```282""",283ResearchPhase.SYNTHESIZE: """284# Phase 5: SYNTHESIZE285286Your task: Connect insights and generate novel understanding287288## Execute:2891. Identify 5-10 key patterns across sources2902. Map relationships between concepts2913. Generate 3-5 insights that go beyond source material2924. Create conceptual frameworks or mental models2935. Build argument structures2946. Develop evidence hierarchies295296## Use Extended Reasoning:297- Explore non-obvious connections298- Consider second-order implications299- Think about what sources might be missing300- Generate novel hypotheses301302## Output Format:303```json304{305"patterns": ["pattern1", "pattern2", ...],306"concept_relationships": {"concept1": ["related_to1", "related_to2"], ...},307"novel_insights": ["insight1", "insight2", ...],308"frameworks": ["framework_description1", ...],309"key_arguments": [310{311"argument": "main claim",312"supporting_evidence": ["evidence1", "evidence2"],313"strength": "strong|moderate|weak"314}315]316}317```318""",319ResearchPhase.CRITIQUE: """320# Phase 6: CRITIQUE321322Your task: Rigorously evaluate research quality323324## Execute Red Team Analysis:3251. Check logical consistency3262. Verify citation completeness3273. Identify gaps or weaknesses3284. Assess balance and objectivity3295. Test alternative interpretations3306. Challenge assumptions331332## Red Team Questions:333- What's missing from this research?334- What could be wrong?335- What alternative explanations exist?336- What biases might be present?337- What counterfactuals should be considered?338- What would a skeptic say?339340## Output Format:341```json342{343"strengths": ["strength1", "strength2", ...],344"weaknesses": ["weakness1", "weakness2", ...],345"gaps": ["gap1", "gap2", ...],346"biases": ["bias1", "bias2", ...],347"improvements_needed": [348{349"issue": "description",350"recommendation": "how to fix",351"priority": "high|medium|low"352}353]354}355```356""",357ResearchPhase.REFINE: """358# Phase 7: REFINE359360Your task: Address gaps and strengthen weak areas361362## Execute:3631. Conduct additional research for identified gaps3642. Strengthen weak arguments with more evidence3653. Add missing perspectives3664. Resolve contradictions where possible3675. Enhance clarity and structure3686. Verify all revised content369370## Focus On:371- High priority improvements from critique372- Missing stakeholder perspectives373- Weak evidence chains374- Unclear explanations375376## Output:377Updated findings, sources, and synthesis with improvements documented.378""",379ResearchPhase.PACKAGE: """380# Phase 8: PACKAGE381382Your task: Deliver professional, actionable research report383384## Generate Complete Report:385386```markdown387# Research Report: [Topic]388389## Executive Summary390[3-5 key findings bullets]391[Primary recommendation]392[Confidence level: High/Medium/Low]393394## Introduction395### Research Question396[Original question]397398### Scope & Methodology399[What was investigated and how]400401### Key Assumptions402[Important assumptions made]403404## Main Analysis405406### Finding 1: [Title]407[Detailed explanation with evidence]408[Citations: [1], [2], [3]]409410### Finding 2: [Title]411[Detailed explanation with evidence]412[Citations: [4], [5], [6]]413414[Continue for all findings...]415416## Synthesis & Insights417[Patterns and connections]418[Novel insights]419[Implications]420421## Limitations & Caveats422[Known gaps]423[Assumptions]424[Areas of uncertainty]425426## Recommendations427[Action items]428[Next steps]429[Further research needs]430431## Bibliography432[1] Source 1 full citation433[2] Source 2 full citation434...435436## Appendix: Methodology437[Research process]438[Sources consulted]439[Verification approach]440```441442Save report to file with timestamp.443"""444}445446return instructions.get(phase, "No instructions available for this phase")447448def execute_phase(self, phase: ResearchPhase) -> Dict[str, Any]:449"""Execute a research phase"""450print(f"\n{'='*80}")451print(f"PHASE {phase.value.upper()}: Starting...")452print(f"{'='*80}\n")453454instructions = self.get_phase_instructions(phase)455print(instructions)456457# In real usage, Claude will execute these instructions458# This returns a structured result that Claude should populate459result = {460'phase': phase.value,461'status': 'instructions_displayed',462'timestamp': datetime.now().isoformat()463}464465return result466467def run_pipeline(self, query: str) -> str:468"""Run complete research pipeline"""469print(f"\n{'#'*80}")470print(f"# DEEP RESEARCH ENGINE")471print(f"# Query: {query}")472print(f"# Mode: {self.mode.value}")473print(f"{'#'*80}\n")474475# Initialize research476self.initialize_research(query)477478# Determine phases based on mode479phases = self._get_phases_for_mode()480481# Execute each phase482for phase in phases:483self.state.phase = phase484result = self.execute_phase(phase)485486# Save state after each phase487state_file = self.output_dir / f"research_state_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"488self.state.save(state_file)489print(f"\n✓ Phase {phase.value} complete. State saved to: {state_file}\n")490491# Generate report path492report_file = self.output_dir / f"research_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"493494print(f"\n{'='*80}")495print(f"RESEARCH PIPELINE COMPLETE")496print(f"Report will be saved to: {report_file}")497print(f"{'='*80}\n")498499return str(report_file)500501def _get_phases_for_mode(self) -> List[ResearchPhase]:502"""Get phases based on research mode"""503if self.mode == ResearchMode.QUICK:504return [505ResearchPhase.SCOPE,506ResearchPhase.RETRIEVE,507ResearchPhase.PACKAGE508]509elif self.mode == ResearchMode.STANDARD:510return [511ResearchPhase.SCOPE,512ResearchPhase.PLAN,513ResearchPhase.RETRIEVE,514ResearchPhase.TRIANGULATE,515ResearchPhase.SYNTHESIZE,516ResearchPhase.PACKAGE517]518elif self.mode == ResearchMode.DEEP:519return list(ResearchPhase)520elif self.mode == ResearchMode.ULTRADEEP:521# In ultradeep, we might iterate some phases522return list(ResearchPhase)523524return list(ResearchPhase)525526527def main():528"""CLI entry point"""529parser = argparse.ArgumentParser(530description="Deep Research Engine for Claude Code",531formatter_class=argparse.RawDescriptionHelpFormatter,532epilog="""533Examples:534python research_engine.py --query "state of quantum computing 2025" --mode deep535python research_engine.py --query "PostgreSQL vs Supabase comparison" --mode standard536python research_engine.py -q "longevity biotech funding trends" -m ultradeep537"""538)539540parser.add_argument(541'--query', '-q',542type=str,543required=True,544help='Research question or topic'545)546547parser.add_argument(548'--mode', '-m',549type=str,550choices=['quick', 'standard', 'deep', 'ultradeep'],551default='standard',552help='Research depth mode (default: standard)'553)554555parser.add_argument(556'--resume',557type=str,558help='Resume from saved state file'559)560561args = parser.parse_args()562563# Initialize engine564mode = ResearchMode(args.mode)565engine = ResearchEngine(mode=mode)566567if args.resume:568# Load previous state569state_file = Path(args.resume)570if not state_file.exists():571print(f"Error: State file not found: {state_file}", file=sys.stderr)572sys.exit(1)573engine.state = ResearchState.load(state_file)574print(f"Resumed research from: {state_file}")575576# Run pipeline577report_path = engine.run_pipeline(args.query)578579print(f"\nResearch complete! Report path: {report_path}")580print(f"\nNow Claude should execute each phase using the displayed instructions.")581582583if __name__ == '__main__':584main()585