Source from repo
Deep Research

Enterprise-grade research with multi-source synthesis, citation tracking, and verification. 8-phase pipeline with auto-continuation.
199-biotechnologiesGitHub 199-biotechnologiesSource repo Original GitHub link
Files
Skill
n/a
Size
221.7 KB
Entrypoint
SKILL.md
Format
git-repo
Open file
scripts/research_engine.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code585 linesFree
scripts/research_engine.py
1#!/usr/bin/env python3
2"""
3Deep Research Engine — STATE SCAFFOLD (not a runtime orchestrator)
4 
5This file provides phase instruction templates and research state persistence.
6It does NOT drive Claude Code — Claude is the orchestrator; this file provides
7data structures and CLI utilities for state management.
8 
9For the actual research workflow, see reference/methodology.md.
10For the evidence substrate, see scripts/citation_manager.py and scripts/evidence_store.py.
11"""
12 
13import argparse
14import json
15import sys
16import time
17from datetime import datetime
18from pathlib import Path
19from typing import Dict, List, Optional, Any
20from dataclasses import dataclass, asdict
21from enum import Enum
22 
23 
24class ResearchPhase(Enum):
25    """Research pipeline phases"""
26    SCOPE = "scope"
27    PLAN = "plan"
28    RETRIEVE = "retrieve"
29    TRIANGULATE = "triangulate"
30    SYNTHESIZE = "synthesize"
31    CRITIQUE = "critique"
32    REFINE = "refine"
33    PACKAGE = "package"
34 
35 
36class ResearchMode(Enum):
37    """Research depth modes"""
38    QUICK = "quick"  # 3 phases: scope, retrieve, package
39    STANDARD = "standard"  # 6 phases: skip refine and critique
40    DEEP = "deep"  # Full 8 phases
41    ULTRADEEP = "ultradeep"  # 8 phases + extended iterations
42 
43 
44@dataclass
45class Source:
46    """Represents a research source"""
47    url: str
48    title: str
49    snippet: str
50    retrieved_at: str
51    credibility_score: float = 0.0
52    source_type: str = "web"  # web, academic, documentation, code
53    verification_status: str = "unverified"  # unverified, verified, conflicted
54 
55    def to_citation(self, index: int) -> str:
56        """Generate citation string"""
57        return f"[{index}] {self.title} - {self.url} (Retrieved: {self.retrieved_at})"
58 
59 
60@dataclass
61class ResearchState:
62    """Maintains research state across phases"""
63    query: str
64    mode: ResearchMode
65    phase: ResearchPhase
66    scope: Dict[str, Any]
67    plan: Dict[str, Any]
68    sources: List[Source]
69    findings: List[Dict[str, Any]]
70    synthesis: Dict[str, Any]
71    critique: Dict[str, Any]
72    report: str
73    metadata: Dict[str, Any]
74 
75    def save(self, filepath: Path):
76        """Save research state to file with retry logic"""
77        max_retries = 3
78        for attempt in range(max_retries):
79            try:
80                with open(filepath, 'w') as f:
81                    json.dump(self._serialize(), f, indent=2)
82                return  # Success
83            except (IOError, OSError) as e:
84                if attempt == max_retries - 1:
85                    # Final attempt failed
86                    raise IOError(f"Failed to save state after {max_retries} attempts: {e}")
87                # Wait with exponential backoff before retry
88                wait_time = (attempt + 1) * 0.5  # 0.5s, 1s, 1.5s
89                time.sleep(wait_time)
90 
91    def _serialize(self) -> dict:
92        """Convert to serializable dict"""
93        return {
94            'query': self.query,
95            'mode': self.mode.value,
96            'phase': self.phase.value,
97            'scope': self.scope,
98            'plan': self.plan,
99            'sources': [asdict(s) for s in self.sources],
100            'findings': self.findings,
101            'synthesis': self.synthesis,
102            'critique': self.critique,
103            'report': self.report,
104            'metadata': self.metadata
105        }
106 
107    @classmethod
108    def load(cls, filepath: Path) -> 'ResearchState':
109        """Load research state from file"""
110        with open(filepath, 'r') as f:
111            data = json.load(f)
112 
113        return cls(
114            query=data['query'],
115            mode=ResearchMode(data['mode']),
116            phase=ResearchPhase(data['phase']),
117            scope=data['scope'],
118            plan=data['plan'],
119            sources=[Source(**s) for s in data['sources']],
120            findings=data['findings'],
121            synthesis=data['synthesis'],
122            critique=data['critique'],
123            report=data['report'],
124            metadata=data['metadata']
125        )
126 
127 
128class ResearchEngine:
129    """Main research orchestration engine"""
130 
131    def __init__(self, mode: ResearchMode = ResearchMode.STANDARD):
132        self.mode = mode
133        self.state: Optional[ResearchState] = None
134        self.output_dir = Path.home() / ".claude" / "research_output"
135        self.output_dir.mkdir(parents=True, exist_ok=True)
136 
137    def initialize_research(self, query: str) -> ResearchState:
138        """Initialize new research session"""
139        self.state = ResearchState(
140            query=query,
141            mode=self.mode,
142            phase=ResearchPhase.SCOPE,
143            scope={},
144            plan={},
145            sources=[],
146            findings=[],
147            synthesis={},
148            critique={},
149            report="",
150            metadata={
151                'started_at': datetime.now().isoformat(),
152                'version': '1.0'
153            }
154        )
155        return self.state
156 
157    def get_phase_instructions(self, phase: ResearchPhase) -> str:
158        """Get instructions for current phase"""
159        instructions = {
160            ResearchPhase.SCOPE: """
161# Phase 1: SCOPE
162 
163Your task: Define research boundaries and success criteria
164 
165## Execute:
1661. Decompose the question into 3-5 core components
1672. Identify 2-4 key stakeholder perspectives
1683. Define what's IN scope and what's OUT of scope
1694. List 3-5 success criteria for this research
1705. Document 3-5 assumptions that need validation
171 
172## Output Format:
173```json
174{
175  "core_components": ["component1", "component2", ...],
176  "stakeholder_perspectives": ["perspective1", "perspective2", ...],
177  "in_scope": ["item1", "item2", ...],
178  "out_of_scope": ["item1", "item2", ...],
179  "success_criteria": ["criteria1", "criteria2", ...],
180  "assumptions": ["assumption1", "assumption2", ...]
181}
182```
183 
184Use extended reasoning to explore multiple framings before finalizing scope.
185""",
186            ResearchPhase.PLAN: """
187# Phase 2: PLAN
188 
189Your task: Create intelligent research roadmap
190 
191## Execute:
1921. Identify 5-10 primary sources to investigate
1932. List 5-10 secondary/backup sources
1943. Map knowledge dependencies (what must be understood first)
1954. Create 10-15 search query variations
1965. Plan triangulation approach (how to verify claims)
1976. Define 3-5 quality gates
198 
199## Output Format:
200```json
201{
202  "primary_sources": ["source_type1", "source_type2", ...],
203  "secondary_sources": ["source_type1", "source_type2", ...],
204  "knowledge_dependencies": {"concept1": ["prerequisite1", "prerequisite2"], ...},
205  "search_queries": ["query1", "query2", ...],
206  "triangulation_strategy": "description of verification approach",
207  "quality_gates": ["gate1", "gate2", ...]
208}
209```
210 
211Use Graph-of-Thoughts: branch into 3-4 potential research paths, evaluate, then converge on optimal strategy.
212""",
213            ResearchPhase.RETRIEVE: """
214# Phase 3: RETRIEVE
215 
216Your task: Systematically collect information from multiple sources
217 
218## Execute:
2191. Use WebSearch with iterative query refinement (minimum 10 searches)
2202. Use WebFetch to deep-dive into 5-10 most promising sources
2213. Extract key passages with metadata
2224. Track information gaps
2235. Follow 2-3 promising tangents
2246. Ensure source diversity (different domains, perspectives)
225 
226## Tools to Use:
227- WebSearch: For current information and broad coverage
228- WebFetch: For detailed extraction from specific URLs
229- Grep/Read: For local documentation if relevant
230- Task: Spawn 2-3 parallel retrieval agents for efficiency
231 
232## Output:
233Store all sources with metadata. Each source should include:
234- URL/location
235- Title
236- Key excerpts
237- Relevance score
238- Source type
239- Retrieved timestamp
240 
241Aim for 15-30 distinct sources minimum.
242""",
243            ResearchPhase.TRIANGULATE: """
244# Phase 4: TRIANGULATE
245 
246Your task: Validate information across multiple independent sources
247 
248## Execute:
2491. List all major claims from retrieved information
2502. For each claim, find 3+ independent confirmatory sources
2513. Flag any contradictions or uncertainties
2524. Assess source credibility (domain expertise, recency, bias)
2535. Document consensus areas vs. debate areas
2546. Mark verification status for each claim
255 
256## Quality Standards:
257- Core claims MUST have 3+ independent sources
258- Flag any single-source claims as "unverified"
259- Note information recency
260- Identify potential biases
261 
262## Output Format:
263```json
264{
265  "verified_claims": [
266    {
267      "claim": "statement",
268      "sources": ["source1", "source2", "source3"],
269      "confidence": "high|medium|low"
270    }
271  ],
272  "unverified_claims": [...],
273  "contradictions": [
274    {
275      "topic": "what's contradicted",
276      "viewpoint1": {"claim": "...", "sources": [...]},
277      "viewpoint2": {"claim": "...", "sources": [...]}
278    }
279  ]
280}
281```
282""",
283            ResearchPhase.SYNTHESIZE: """
284# Phase 5: SYNTHESIZE
285 
286Your task: Connect insights and generate novel understanding
287 
288## Execute:
2891. Identify 5-10 key patterns across sources
2902. Map relationships between concepts
2913. Generate 3-5 insights that go beyond source material
2924. Create conceptual frameworks or mental models
2935. Build argument structures
2946. Develop evidence hierarchies
295 
296## Use Extended Reasoning:
297- Explore non-obvious connections
298- Consider second-order implications
299- Think about what sources might be missing
300- Generate novel hypotheses
301 
302## Output Format:
303```json
304{
305  "patterns": ["pattern1", "pattern2", ...],
306  "concept_relationships": {"concept1": ["related_to1", "related_to2"], ...},
307  "novel_insights": ["insight1", "insight2", ...],
308  "frameworks": ["framework_description1", ...],
309  "key_arguments": [
310    {
311      "argument": "main claim",
312      "supporting_evidence": ["evidence1", "evidence2"],
313      "strength": "strong|moderate|weak"
314    }
315  ]
316}
317```
318""",
319            ResearchPhase.CRITIQUE: """
320# Phase 6: CRITIQUE
321 
322Your task: Rigorously evaluate research quality
323 
324## Execute Red Team Analysis:
3251. Check logical consistency
3262. Verify citation completeness
3273. Identify gaps or weaknesses
3284. Assess balance and objectivity
3295. Test alternative interpretations
3306. Challenge assumptions
331 
332## Red Team Questions:
333- What's missing from this research?
334- What could be wrong?
335- What alternative explanations exist?
336- What biases might be present?
337- What counterfactuals should be considered?
338- What would a skeptic say?
339 
340## Output Format:
341```json
342{
343  "strengths": ["strength1", "strength2", ...],
344  "weaknesses": ["weakness1", "weakness2", ...],
345  "gaps": ["gap1", "gap2", ...],
346  "biases": ["bias1", "bias2", ...],
347  "improvements_needed": [
348    {
349      "issue": "description",
350      "recommendation": "how to fix",
351      "priority": "high|medium|low"
352    }
353  ]
354}
355```
356""",
357            ResearchPhase.REFINE: """
358# Phase 7: REFINE
359 
360Your task: Address gaps and strengthen weak areas
361 
362## Execute:
3631. Conduct additional research for identified gaps
3642. Strengthen weak arguments with more evidence
3653. Add missing perspectives
3664. Resolve contradictions where possible
3675. Enhance clarity and structure
3686. Verify all revised content
369 
370## Focus On:
371- High priority improvements from critique
372- Missing stakeholder perspectives
373- Weak evidence chains
374- Unclear explanations
375 
376## Output:
377Updated findings, sources, and synthesis with improvements documented.
378""",
379            ResearchPhase.PACKAGE: """
380# Phase 8: PACKAGE
381 
382Your task: Deliver professional, actionable research report
383 
384## Generate Complete Report:
385 
386```markdown
387# Research Report: [Topic]
388 
389## Executive Summary
390[3-5 key findings bullets]
391[Primary recommendation]
392[Confidence level: High/Medium/Low]
393 
394## Introduction
395### Research Question
396[Original question]
397 
398### Scope & Methodology
399[What was investigated and how]
400 
401### Key Assumptions
402[Important assumptions made]
403 
404## Main Analysis
405 
406### Finding 1: [Title]
407[Detailed explanation with evidence]
408[Citations: [1], [2], [3]]
409 
410### Finding 2: [Title]
411[Detailed explanation with evidence]
412[Citations: [4], [5], [6]]
413 
414[Continue for all findings...]
415 
416## Synthesis & Insights
417[Patterns and connections]
418[Novel insights]
419[Implications]
420 
421## Limitations & Caveats
422[Known gaps]
423[Assumptions]
424[Areas of uncertainty]
425 
426## Recommendations
427[Action items]
428[Next steps]
429[Further research needs]
430 
431## Bibliography
432[1] Source 1 full citation
433[2] Source 2 full citation
434...
435 
436## Appendix: Methodology
437[Research process]
438[Sources consulted]
439[Verification approach]
440```
441 
442Save report to file with timestamp.
443"""
444        }
445 
446        return instructions.get(phase, "No instructions available for this phase")
447 
448    def execute_phase(self, phase: ResearchPhase) -> Dict[str, Any]:
449        """Execute a research phase"""
450        print(f"\n{'='*80}")
451        print(f"PHASE {phase.value.upper()}: Starting...")
452        print(f"{'='*80}\n")
453 
454        instructions = self.get_phase_instructions(phase)
455        print(instructions)
456 
457        # In real usage, Claude will execute these instructions
458        # This returns a structured result that Claude should populate
459        result = {
460            'phase': phase.value,
461            'status': 'instructions_displayed',
462            'timestamp': datetime.now().isoformat()
463        }
464 
465        return result
466 
467    def run_pipeline(self, query: str) -> str:
468        """Run complete research pipeline"""
469        print(f"\n{'#'*80}")
470        print(f"# DEEP RESEARCH ENGINE")
471        print(f"# Query: {query}")
472        print(f"# Mode: {self.mode.value}")
473        print(f"{'#'*80}\n")
474 
475        # Initialize research
476        self.initialize_research(query)
477 
478        # Determine phases based on mode
479        phases = self._get_phases_for_mode()
480 
481        # Execute each phase
482        for phase in phases:
483            self.state.phase = phase
484            result = self.execute_phase(phase)
485 
486            # Save state after each phase
487            state_file = self.output_dir / f"research_state_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
488            self.state.save(state_file)
489            print(f"\n✓ Phase {phase.value} complete. State saved to: {state_file}\n")
490 
491        # Generate report path
492        report_file = self.output_dir / f"research_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
493 
494        print(f"\n{'='*80}")
495        print(f"RESEARCH PIPELINE COMPLETE")
496        print(f"Report will be saved to: {report_file}")
497        print(f"{'='*80}\n")
498 
499        return str(report_file)
500 
501    def _get_phases_for_mode(self) -> List[ResearchPhase]:
502        """Get phases based on research mode"""
503        if self.mode == ResearchMode.QUICK:
504            return [
505                ResearchPhase.SCOPE,
506                ResearchPhase.RETRIEVE,
507                ResearchPhase.PACKAGE
508            ]
509        elif self.mode == ResearchMode.STANDARD:
510            return [
511                ResearchPhase.SCOPE,
512                ResearchPhase.PLAN,
513                ResearchPhase.RETRIEVE,
514                ResearchPhase.TRIANGULATE,
515                ResearchPhase.SYNTHESIZE,
516                ResearchPhase.PACKAGE
517            ]
518        elif self.mode == ResearchMode.DEEP:
519            return list(ResearchPhase)
520        elif self.mode == ResearchMode.ULTRADEEP:
521            # In ultradeep, we might iterate some phases
522            return list(ResearchPhase)
523 
524        return list(ResearchPhase)
525 
526 
527def main():
528    """CLI entry point"""
529    parser = argparse.ArgumentParser(
530        description="Deep Research Engine for Claude Code",
531        formatter_class=argparse.RawDescriptionHelpFormatter,
532        epilog="""
533Examples:
534  python research_engine.py --query "state of quantum computing 2025" --mode deep
535  python research_engine.py --query "PostgreSQL vs Supabase comparison" --mode standard
536  python research_engine.py -q "longevity biotech funding trends" -m ultradeep
537        """
538    )
539 
540    parser.add_argument(
541        '--query', '-q',
542        type=str,
543        required=True,
544        help='Research question or topic'
545    )
546 
547    parser.add_argument(
548        '--mode', '-m',
549        type=str,
550        choices=['quick', 'standard', 'deep', 'ultradeep'],
551        default='standard',
552        help='Research depth mode (default: standard)'
553    )
554 
555    parser.add_argument(
556        '--resume',
557        type=str,
558        help='Resume from saved state file'
559    )
560 
561    args = parser.parse_args()
562 
563    # Initialize engine
564    mode = ResearchMode(args.mode)
565    engine = ResearchEngine(mode=mode)
566 
567    if args.resume:
568        # Load previous state
569        state_file = Path(args.resume)
570        if not state_file.exists():
571            print(f"Error: State file not found: {state_file}", file=sys.stderr)
572            sys.exit(1)
573        engine.state = ResearchState.load(state_file)
574        print(f"Resumed research from: {state_file}")
575 
576    # Run pipeline
577    report_path = engine.run_pipeline(args.query)
578 
579    print(f"\nResearch complete! Report path: {report_path}")
580    print(f"\nNow Claude should execute each phase using the displayed instructions.")
581 
582 
583if __name__ == '__main__':
584    main()
585
Preparing the source view

Deep Research

scripts/research_engine.py