Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
examples/interleaved-thinking/reasoning_trace_optimizer/loop.py
1"""2OptimizationLoop: Orchestrates the full capture → analyze → improve → re-run cycle.34This is the main entry point for automated prompt optimization,5running iterative improvements until convergence or max iterations.6"""78import json9from dataclasses import dataclass, field10from datetime import datetime11from pathlib import Path12from typing import Any, Callable1314from rich.console import Console15from rich.panel import Panel16from rich.progress import Progress, SpinnerColumn, TextColumn17from rich.table import Table1819from reasoning_trace_optimizer.analyzer import TraceAnalyzer, format_analysis_report20from reasoning_trace_optimizer.capture import TraceCapture, format_trace_for_display21from reasoning_trace_optimizer.models import (22AnalysisResult,23LoopIteration,24LoopResult,25OptimizationResult,26ReasoningTrace,27)28from reasoning_trace_optimizer.optimizer import PromptOptimizer, format_optimization_report293031console = Console()323334@dataclass35class LoopConfig:36"""Configuration for the optimization loop."""3738max_iterations: int = 539convergence_threshold: float = 3.0 # Stop if improvement < this %40min_score_threshold: float = 75.0 # Stop if score >= this (realistic for complex tasks)41regression_threshold: float = 8.0 # Rollback if score drops by this much4243# Scoring weights44success_weight: float = 0.445score_weight: float = 0.446error_weight: float = 0.24748# Optimization behavior49use_best_prompt: bool = True # Use best performing prompt, not final50max_prompt_growth: float = 5.0 # Max ratio of new prompt length to original5152# Output options53save_artifacts: bool = True54artifacts_dir: str = "./optimization_artifacts"55verbose: bool = True565758class OptimizationLoop:59"""60Orchestrates the full optimization cycle.6162Runs iterative loops of:631. Execute agent with current prompt642. Capture reasoning trace653. Analyze trace for issues664. Generate optimized prompt675. Repeat until convergence6869Example:70```python71loop = OptimizationLoop()72result = loop.run(73task="Search for Python tutorials and summarize them",74initial_prompt="You are a helpful research assistant.",75tools=[search_tool],76tool_executor=execute_search77)7879print(f"Improved from {result.initial_score} to {result.final_score}")80print(f"Final prompt:\\n{result.final_prompt}")81```82"""8384def __init__(85self,86config: LoopConfig | None = None,87api_key: str | None = None,88base_url: str = "https://api.minimax.io/anthropic",89model: str = "MiniMax-M2.1",90):91"""92Initialize the optimization loop.9394Args:95config: Loop configuration96api_key: MiniMax API key97base_url: API endpoint98model: Model to use for all components99"""100self.config = config or LoopConfig()101102# Initialize components with same configuration103self.capture = TraceCapture(api_key=api_key, base_url=base_url, model=model)104self.analyzer = TraceAnalyzer(api_key=api_key, base_url=base_url, model=model)105self.optimizer = PromptOptimizer(api_key=api_key, base_url=base_url, model=model)106107# Create artifacts directory108if self.config.save_artifacts:109Path(self.config.artifacts_dir).mkdir(parents=True, exist_ok=True)110111def run(112self,113task: str,114initial_prompt: str,115tools: list[dict[str, Any]] | None = None,116tool_executor: Callable[[str, dict], str] | None = None,117on_iteration: Callable[[LoopIteration], None] | None = None,118) -> LoopResult:119"""120Run the full optimization loop.121122Args:123task: The task to optimize for124initial_prompt: Starting system prompt125tools: Tool definitions for the agent126tool_executor: Function to execute tool calls127on_iteration: Optional callback after each iteration128129Returns:130LoopResult with all iterations and final optimized prompt131"""132result = LoopResult(task=task, final_prompt=initial_prompt)133current_prompt = initial_prompt134135# Track best performing iteration136best_score = 0.0137best_prompt = initial_prompt138best_iteration = 0139consecutive_regressions = 0140141if self.config.verbose:142console.print(Panel(143f"[bold]Starting Optimization Loop[/bold]\n\n"144f"Task: {task}\n"145f"Max Iterations: {self.config.max_iterations}\n"146f"Convergence Threshold: {self.config.convergence_threshold}%",147title="Reasoning Trace Optimizer"148))149150with Progress(151SpinnerColumn(),152TextColumn("[progress.description]{task.description}"),153console=console,154disable=not self.config.verbose,155) as progress:156157for i in range(self.config.max_iterations):158task_id = progress.add_task(f"Iteration {i + 1}/{self.config.max_iterations}", total=4)159160# Step 1: Capture trace161progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Capturing trace...")162trace = self.capture.run(163task=task,164system_prompt=current_prompt,165tools=tools,166tool_executor=tool_executor,167)168progress.advance(task_id)169170# Step 2: Analyze trace171progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Analyzing trace...")172analysis = self.analyzer.analyze(trace)173progress.advance(task_id)174175# Calculate iteration score176iteration_score = self._calculate_score(trace, analysis)177178# Record initial score179if i == 0:180result.initial_score = iteration_score181best_score = iteration_score182best_prompt = current_prompt183184# Step 3: Check convergence185should_continue, reason = self._check_convergence(186iteration=i,187score=iteration_score,188prev_score=result.iterations[-1].analysis.overall_score if result.iterations else 0,189best_score=best_score,190consecutive_regressions=consecutive_regressions,191)192193# Step 4: Optimize if continuing194optimization = None195if should_continue:196progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Optimizing prompt...")197optimization = self.optimizer.optimize(198original_prompt=current_prompt,199analysis=analysis,200trace=trace,201)202203# Check for excessive prompt growth204new_prompt = optimization.optimized_prompt205if len(new_prompt) > len(initial_prompt) * self.config.max_prompt_growth:206if self.config.verbose:207console.print(f"[yellow]Warning: Prompt grew too large ({len(new_prompt)} chars), limiting growth[/yellow]")208# Keep the current prompt instead of the bloated one209new_prompt = current_prompt210211current_prompt = new_prompt212progress.advance(task_id)213214# Track best performing iteration AFTER optimization215# This ensures we capture the optimized prompt, not the input prompt216if iteration_score > best_score:217best_score = iteration_score218# Use the optimized prompt if available, otherwise the current prompt219if optimization and optimization.optimized_prompt != initial_prompt:220best_prompt = optimization.optimized_prompt221else:222best_prompt = current_prompt223best_iteration = i + 1224consecutive_regressions = 0225elif iteration_score < best_score - self.config.regression_threshold:226consecutive_regressions += 1227if self.config.verbose:228console.print(f"[yellow]Warning: Score regressed from {best_score:.1f} to {iteration_score:.1f}[/yellow]")229230# Record iteration231iteration = LoopIteration(232iteration=i + 1,233trace=trace,234analysis=analysis,235optimization=optimization,236task_completed=trace.success or False,237error_count=len([tc for tc in trace.tool_calls if not tc.success]),238token_usage=trace.total_tokens,239)240result.iterations.append(iteration)241242# Callback243if on_iteration:244on_iteration(iteration)245246# Print iteration summary247if self.config.verbose:248self._print_iteration_summary(iteration)249250# Save artifacts251if self.config.save_artifacts:252self._save_iteration_artifacts(iteration, i + 1)253254# Check if we should stop255if not should_continue:256if self.config.verbose:257console.print(f"\n[green]Stopping: {reason}[/green]")258result.converged = True259break260261progress.remove_task(task_id)262263# Finalize result - use best prompt if configured264if self.config.use_best_prompt and best_score > result.iterations[-1].analysis.overall_score:265result.final_prompt = best_prompt266result.final_score = best_score267if self.config.verbose:268console.print(f"[green]Using best prompt from iteration {best_iteration} (score: {best_score:.1f})[/green]")269else:270result.final_prompt = current_prompt271result.final_score = result.iterations[-1].analysis.overall_score if result.iterations else 0272273result.total_iterations = len(result.iterations)274result.improvement_percentage = (275(result.final_score - result.initial_score) / max(result.initial_score, 1) * 100276)277278# Warn if prompt was never successfully optimized279if result.final_prompt == initial_prompt:280if self.config.verbose:281console.print(282"[yellow]Warning: Final prompt unchanged from initial. "283"Optimization may have failed to parse model responses.[/yellow]"284)285# Check if any iteration actually produced a different prompt286any_optimized = any(287i.optimization and i.optimization.optimized_prompt != initial_prompt288for i in result.iterations289if i.optimization290)291if not any_optimized:292console.print(293"[yellow]No successful prompt optimizations were extracted. "294"Check artifacts for raw optimizer responses.[/yellow]"295)296297# Print final summary298if self.config.verbose:299self._print_final_summary(result)300301# Save final artifacts302if self.config.save_artifacts:303self._save_final_artifacts(result)304305return result306307def run_single(308self,309task: str,310prompt: str,311tools: list[dict[str, Any]] | None = None,312tool_executor: Callable[[str, dict], str] | None = None,313) -> tuple[ReasoningTrace, AnalysisResult]:314"""315Run a single capture + analysis cycle (no optimization).316317Useful for debugging or when you just want analysis without318automatic optimization.319320Returns:321Tuple of (trace, analysis)322"""323trace = self.capture.run(324task=task,325system_prompt=prompt,326tools=tools,327tool_executor=tool_executor,328)329analysis = self.analyzer.analyze(trace)330return trace, analysis331332def _calculate_score(333self,334trace: ReasoningTrace,335analysis: AnalysisResult,336) -> float:337"""Calculate weighted score from trace and analysis."""338success_score = 100 if trace.success else 0339error_penalty = len([tc for tc in trace.tool_calls if not tc.success]) * 10340341weighted = (342success_score * self.config.success_weight343+ analysis.overall_score * self.config.score_weight344- error_penalty * self.config.error_weight345)346347return max(0, min(100, weighted))348349def _check_convergence(350self,351iteration: int,352score: float,353prev_score: float,354best_score: float = 0.0,355consecutive_regressions: int = 0,356) -> tuple[bool, str]:357"""Check if optimization should continue."""358# Check score threshold359if score >= self.config.min_score_threshold:360return False, f"Score {score:.1f} >= threshold {self.config.min_score_threshold}"361362# Check for consecutive regressions (stop if we've regressed twice in a row)363if consecutive_regressions >= 2:364return False, f"Consecutive regressions detected (best was {best_score:.1f})"365366# Check improvement threshold (after first iteration)367if iteration > 0:368improvement = score - prev_score369if abs(improvement) < self.config.convergence_threshold and score >= prev_score:370return False, f"Converged (improvement {improvement:.1f}% < threshold)"371372# Check max iterations373if iteration >= self.config.max_iterations - 1:374return False, f"Reached max iterations ({self.config.max_iterations})"375376return True, ""377378def _print_iteration_summary(self, iteration: LoopIteration) -> None:379"""Print summary of an iteration."""380table = Table(title=f"Iteration {iteration.iteration} Summary")381table.add_column("Metric", style="cyan")382table.add_column("Value", style="green")383384table.add_row("Task Completed", "Yes" if iteration.task_completed else "No")385table.add_row("Overall Score", f"{iteration.analysis.overall_score:.1f}/100")386table.add_row("Patterns Found", str(len(iteration.analysis.patterns)))387table.add_row("Tool Errors", str(iteration.error_count))388table.add_row("Token Usage", str(iteration.token_usage))389390if iteration.optimization:391table.add_row(392"Predicted Improvement",393f"{iteration.optimization.predicted_improvement}%"394)395396console.print(table)397398def _print_final_summary(self, result: LoopResult) -> None:399"""Print final optimization summary."""400console.print("\n")401panel_content = (402f"[bold]Iterations:[/bold] {result.total_iterations}\n"403f"[bold]Converged:[/bold] {'Yes' if result.converged else 'No'}\n"404f"[bold]Initial Score:[/bold] {result.initial_score:.1f}\n"405f"[bold]Final Score:[/bold] {result.final_score:.1f}\n"406f"[bold]Improvement:[/bold] {result.improvement_percentage:+.1f}%"407)408console.print(Panel(panel_content, title="[green]Optimization Complete[/green]"))409410def _save_iteration_artifacts(self, iteration: LoopIteration, num: int) -> None:411"""Save iteration artifacts to disk."""412base_path = Path(self.config.artifacts_dir) / f"iteration_{num}"413base_path.mkdir(exist_ok=True)414415# Save trace416with open(base_path / "trace.txt", "w") as f:417f.write(format_trace_for_display(iteration.trace))418419# Save analysis420with open(base_path / "analysis.txt", "w") as f:421f.write(format_analysis_report(iteration.analysis))422423# Save optimization if present424if iteration.optimization:425with open(base_path / "optimization.txt", "w") as f:426f.write(format_optimization_report(iteration.optimization))427428with open(base_path / "optimized_prompt.txt", "w") as f:429f.write(iteration.optimization.optimized_prompt)430431def _save_final_artifacts(self, result: LoopResult) -> None:432"""Save final optimization artifacts."""433base_path = Path(self.config.artifacts_dir)434435# Save final prompt436with open(base_path / "final_prompt.txt", "w") as f:437f.write(result.final_prompt)438439# Save summary JSON440summary = {441"task": result.task,442"total_iterations": result.total_iterations,443"converged": result.converged,444"initial_score": result.initial_score,445"final_score": result.final_score,446"improvement_percentage": result.improvement_percentage,447"timestamp": datetime.now().isoformat(),448}449with open(base_path / "summary.json", "w") as f:450json.dump(summary, f, indent=2)451452453def run_quick_optimization(454task: str,455initial_prompt: str,456tools: list[dict[str, Any]] | None = None,457tool_executor: Callable[[str, dict], str] | None = None,458max_iterations: int = 3,459) -> str:460"""461Quick helper function for one-shot optimization.462463Returns the optimized prompt directly.464"""465config = LoopConfig(max_iterations=max_iterations, verbose=False)466loop = OptimizationLoop(config=config)467result = loop.run(task, initial_prompt, tools, tool_executor)468return result.final_prompt469