Source from repo

Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.

muratcankoylanGitHub muratcankoylanSource repo Original GitHub link

Files

241

Skill

n/a

Size

2.6 MB

Entrypoint

SKILL.md

Format

git-repo

Open file

examples/interleaved-thinking/reasoning_trace_optimizer/loop.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code469 linesFree

examples/interleaved-thinking/reasoning_trace_optimizer/loop.py

1"""
2OptimizationLoop: Orchestrates the full capture → analyze → improve → re-run cycle.
3 
4This is the main entry point for automated prompt optimization,
5running iterative improvements until convergence or max iterations.
6"""
7 
8import json
9from dataclasses import dataclass, field
10from datetime import datetime
11from pathlib import Path
12from typing import Any, Callable
13 
14from rich.console import Console
15from rich.panel import Panel
16from rich.progress import Progress, SpinnerColumn, TextColumn
17from rich.table import Table
18 
19from reasoning_trace_optimizer.analyzer import TraceAnalyzer, format_analysis_report
20from reasoning_trace_optimizer.capture import TraceCapture, format_trace_for_display
21from reasoning_trace_optimizer.models import (
22    AnalysisResult,
23    LoopIteration,
24    LoopResult,
25    OptimizationResult,
26    ReasoningTrace,
27)
28from reasoning_trace_optimizer.optimizer import PromptOptimizer, format_optimization_report
29 
30 
31console = Console()
32 
33 
34@dataclass
35class LoopConfig:
36    """Configuration for the optimization loop."""
37 
38    max_iterations: int = 5
39    convergence_threshold: float = 3.0  # Stop if improvement < this %
40    min_score_threshold: float = 75.0  # Stop if score >= this (realistic for complex tasks)
41    regression_threshold: float = 8.0  # Rollback if score drops by this much
42 
43    # Scoring weights
44    success_weight: float = 0.4
45    score_weight: float = 0.4
46    error_weight: float = 0.2
47 
48    # Optimization behavior
49    use_best_prompt: bool = True  # Use best performing prompt, not final
50    max_prompt_growth: float = 5.0  # Max ratio of new prompt length to original
51 
52    # Output options
53    save_artifacts: bool = True
54    artifacts_dir: str = "./optimization_artifacts"
55    verbose: bool = True
56 
57 
58class OptimizationLoop:
59    """
60    Orchestrates the full optimization cycle.
61 
62    Runs iterative loops of:
63    1. Execute agent with current prompt
64    2. Capture reasoning trace
65    3. Analyze trace for issues
66    4. Generate optimized prompt
67    5. Repeat until convergence
68 
69    Example:
70        ```python
71        loop = OptimizationLoop()
72        result = loop.run(
73            task="Search for Python tutorials and summarize them",
74            initial_prompt="You are a helpful research assistant.",
75            tools=[search_tool],
76            tool_executor=execute_search
77        )
78 
79        print(f"Improved from {result.initial_score} to {result.final_score}")
80        print(f"Final prompt:\\n{result.final_prompt}")
81        ```
82    """
83 
84    def __init__(
85        self,
86        config: LoopConfig | None = None,
87        api_key: str | None = None,
88        base_url: str = "https://api.minimax.io/anthropic",
89        model: str = "MiniMax-M2.1",
90    ):
91        """
92        Initialize the optimization loop.
93 
94        Args:
95            config: Loop configuration
96            api_key: MiniMax API key
97            base_url: API endpoint
98            model: Model to use for all components
99        """
100        self.config = config or LoopConfig()
101 
102        # Initialize components with same configuration
103        self.capture = TraceCapture(api_key=api_key, base_url=base_url, model=model)
104        self.analyzer = TraceAnalyzer(api_key=api_key, base_url=base_url, model=model)
105        self.optimizer = PromptOptimizer(api_key=api_key, base_url=base_url, model=model)
106 
107        # Create artifacts directory
108        if self.config.save_artifacts:
109            Path(self.config.artifacts_dir).mkdir(parents=True, exist_ok=True)
110 
111    def run(
112        self,
113        task: str,
114        initial_prompt: str,
115        tools: list[dict[str, Any]] | None = None,
116        tool_executor: Callable[[str, dict], str] | None = None,
117        on_iteration: Callable[[LoopIteration], None] | None = None,
118    ) -> LoopResult:
119        """
120        Run the full optimization loop.
121 
122        Args:
123            task: The task to optimize for
124            initial_prompt: Starting system prompt
125            tools: Tool definitions for the agent
126            tool_executor: Function to execute tool calls
127            on_iteration: Optional callback after each iteration
128 
129        Returns:
130            LoopResult with all iterations and final optimized prompt
131        """
132        result = LoopResult(task=task, final_prompt=initial_prompt)
133        current_prompt = initial_prompt
134 
135        # Track best performing iteration
136        best_score = 0.0
137        best_prompt = initial_prompt
138        best_iteration = 0
139        consecutive_regressions = 0
140 
141        if self.config.verbose:
142            console.print(Panel(
143                f"[bold]Starting Optimization Loop[/bold]\n\n"
144                f"Task: {task}\n"
145                f"Max Iterations: {self.config.max_iterations}\n"
146                f"Convergence Threshold: {self.config.convergence_threshold}%",
147                title="Reasoning Trace Optimizer"
148            ))
149 
150        with Progress(
151            SpinnerColumn(),
152            TextColumn("[progress.description]{task.description}"),
153            console=console,
154            disable=not self.config.verbose,
155        ) as progress:
156 
157            for i in range(self.config.max_iterations):
158                task_id = progress.add_task(f"Iteration {i + 1}/{self.config.max_iterations}", total=4)
159 
160                # Step 1: Capture trace
161                progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Capturing trace...")
162                trace = self.capture.run(
163                    task=task,
164                    system_prompt=current_prompt,
165                    tools=tools,
166                    tool_executor=tool_executor,
167                )
168                progress.advance(task_id)
169 
170                # Step 2: Analyze trace
171                progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Analyzing trace...")
172                analysis = self.analyzer.analyze(trace)
173                progress.advance(task_id)
174 
175                # Calculate iteration score
176                iteration_score = self._calculate_score(trace, analysis)
177 
178                # Record initial score
179                if i == 0:
180                    result.initial_score = iteration_score
181                    best_score = iteration_score
182                    best_prompt = current_prompt
183 
184                # Step 3: Check convergence
185                should_continue, reason = self._check_convergence(
186                    iteration=i,
187                    score=iteration_score,
188                    prev_score=result.iterations[-1].analysis.overall_score if result.iterations else 0,
189                    best_score=best_score,
190                    consecutive_regressions=consecutive_regressions,
191                )
192 
193                # Step 4: Optimize if continuing
194                optimization = None
195                if should_continue:
196                    progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Optimizing prompt...")
197                    optimization = self.optimizer.optimize(
198                        original_prompt=current_prompt,
199                        analysis=analysis,
200                        trace=trace,
201                    )
202 
203                    # Check for excessive prompt growth
204                    new_prompt = optimization.optimized_prompt
205                    if len(new_prompt) > len(initial_prompt) * self.config.max_prompt_growth:
206                        if self.config.verbose:
207                            console.print(f"[yellow]Warning: Prompt grew too large ({len(new_prompt)} chars), limiting growth[/yellow]")
208                        # Keep the current prompt instead of the bloated one
209                        new_prompt = current_prompt
210 
211                    current_prompt = new_prompt
212                    progress.advance(task_id)
213 
214                # Track best performing iteration AFTER optimization
215                # This ensures we capture the optimized prompt, not the input prompt
216                if iteration_score > best_score:
217                    best_score = iteration_score
218                    # Use the optimized prompt if available, otherwise the current prompt
219                    if optimization and optimization.optimized_prompt != initial_prompt:
220                        best_prompt = optimization.optimized_prompt
221                    else:
222                        best_prompt = current_prompt
223                    best_iteration = i + 1
224                    consecutive_regressions = 0
225                elif iteration_score < best_score - self.config.regression_threshold:
226                    consecutive_regressions += 1
227                    if self.config.verbose:
228                        console.print(f"[yellow]Warning: Score regressed from {best_score:.1f} to {iteration_score:.1f}[/yellow]")
229 
230                # Record iteration
231                iteration = LoopIteration(
232                    iteration=i + 1,
233                    trace=trace,
234                    analysis=analysis,
235                    optimization=optimization,
236                    task_completed=trace.success or False,
237                    error_count=len([tc for tc in trace.tool_calls if not tc.success]),
238                    token_usage=trace.total_tokens,
239                )
240                result.iterations.append(iteration)
241 
242                # Callback
243                if on_iteration:
244                    on_iteration(iteration)
245 
246                # Print iteration summary
247                if self.config.verbose:
248                    self._print_iteration_summary(iteration)
249 
250                # Save artifacts
251                if self.config.save_artifacts:
252                    self._save_iteration_artifacts(iteration, i + 1)
253 
254                # Check if we should stop
255                if not should_continue:
256                    if self.config.verbose:
257                        console.print(f"\n[green]Stopping: {reason}[/green]")
258                    result.converged = True
259                    break
260 
261                progress.remove_task(task_id)
262 
263        # Finalize result - use best prompt if configured
264        if self.config.use_best_prompt and best_score > result.iterations[-1].analysis.overall_score:
265            result.final_prompt = best_prompt
266            result.final_score = best_score
267            if self.config.verbose:
268                console.print(f"[green]Using best prompt from iteration {best_iteration} (score: {best_score:.1f})[/green]")
269        else:
270            result.final_prompt = current_prompt
271            result.final_score = result.iterations[-1].analysis.overall_score if result.iterations else 0
272 
273        result.total_iterations = len(result.iterations)
274        result.improvement_percentage = (
275            (result.final_score - result.initial_score) / max(result.initial_score, 1) * 100
276        )
277 
278        # Warn if prompt was never successfully optimized
279        if result.final_prompt == initial_prompt:
280            if self.config.verbose:
281                console.print(
282                    "[yellow]Warning: Final prompt unchanged from initial. "
283                    "Optimization may have failed to parse model responses.[/yellow]"
284                )
285            # Check if any iteration actually produced a different prompt
286            any_optimized = any(
287                i.optimization and i.optimization.optimized_prompt != initial_prompt
288                for i in result.iterations
289                if i.optimization
290            )
291            if not any_optimized:
292                console.print(
293                    "[yellow]No successful prompt optimizations were extracted. "
294                    "Check artifacts for raw optimizer responses.[/yellow]"
295                )
296 
297        # Print final summary
298        if self.config.verbose:
299            self._print_final_summary(result)
300 
301        # Save final artifacts
302        if self.config.save_artifacts:
303            self._save_final_artifacts(result)
304 
305        return result
306 
307    def run_single(
308        self,
309        task: str,
310        prompt: str,
311        tools: list[dict[str, Any]] | None = None,
312        tool_executor: Callable[[str, dict], str] | None = None,
313    ) -> tuple[ReasoningTrace, AnalysisResult]:
314        """
315        Run a single capture + analysis cycle (no optimization).
316 
317        Useful for debugging or when you just want analysis without
318        automatic optimization.
319 
320        Returns:
321            Tuple of (trace, analysis)
322        """
323        trace = self.capture.run(
324            task=task,
325            system_prompt=prompt,
326            tools=tools,
327            tool_executor=tool_executor,
328        )
329        analysis = self.analyzer.analyze(trace)
330        return trace, analysis
331 
332    def _calculate_score(
333        self,
334        trace: ReasoningTrace,
335        analysis: AnalysisResult,
336    ) -> float:
337        """Calculate weighted score from trace and analysis."""
338        success_score = 100 if trace.success else 0
339        error_penalty = len([tc for tc in trace.tool_calls if not tc.success]) * 10
340 
341        weighted = (
342            success_score * self.config.success_weight
343            + analysis.overall_score * self.config.score_weight
344            - error_penalty * self.config.error_weight
345        )
346 
347        return max(0, min(100, weighted))
348 
349    def _check_convergence(
350        self,
351        iteration: int,
352        score: float,
353        prev_score: float,
354        best_score: float = 0.0,
355        consecutive_regressions: int = 0,
356    ) -> tuple[bool, str]:
357        """Check if optimization should continue."""
358        # Check score threshold
359        if score >= self.config.min_score_threshold:
360            return False, f"Score {score:.1f} >= threshold {self.config.min_score_threshold}"
361 
362        # Check for consecutive regressions (stop if we've regressed twice in a row)
363        if consecutive_regressions >= 2:
364            return False, f"Consecutive regressions detected (best was {best_score:.1f})"
365 
366        # Check improvement threshold (after first iteration)
367        if iteration > 0:
368            improvement = score - prev_score
369            if abs(improvement) < self.config.convergence_threshold and score >= prev_score:
370                return False, f"Converged (improvement {improvement:.1f}% < threshold)"
371 
372        # Check max iterations
373        if iteration >= self.config.max_iterations - 1:
374            return False, f"Reached max iterations ({self.config.max_iterations})"
375 
376        return True, ""
377 
378    def _print_iteration_summary(self, iteration: LoopIteration) -> None:
379        """Print summary of an iteration."""
380        table = Table(title=f"Iteration {iteration.iteration} Summary")
381        table.add_column("Metric", style="cyan")
382        table.add_column("Value", style="green")
383 
384        table.add_row("Task Completed", "Yes" if iteration.task_completed else "No")
385        table.add_row("Overall Score", f"{iteration.analysis.overall_score:.1f}/100")
386        table.add_row("Patterns Found", str(len(iteration.analysis.patterns)))
387        table.add_row("Tool Errors", str(iteration.error_count))
388        table.add_row("Token Usage", str(iteration.token_usage))
389 
390        if iteration.optimization:
391            table.add_row(
392                "Predicted Improvement",
393                f"{iteration.optimization.predicted_improvement}%"
394            )
395 
396        console.print(table)
397 
398    def _print_final_summary(self, result: LoopResult) -> None:
399        """Print final optimization summary."""
400        console.print("\n")
401        panel_content = (
402            f"[bold]Iterations:[/bold] {result.total_iterations}\n"
403            f"[bold]Converged:[/bold] {'Yes' if result.converged else 'No'}\n"
404            f"[bold]Initial Score:[/bold] {result.initial_score:.1f}\n"
405            f"[bold]Final Score:[/bold] {result.final_score:.1f}\n"
406            f"[bold]Improvement:[/bold] {result.improvement_percentage:+.1f}%"
407        )
408        console.print(Panel(panel_content, title="[green]Optimization Complete[/green]"))
409 
410    def _save_iteration_artifacts(self, iteration: LoopIteration, num: int) -> None:
411        """Save iteration artifacts to disk."""
412        base_path = Path(self.config.artifacts_dir) / f"iteration_{num}"
413        base_path.mkdir(exist_ok=True)
414 
415        # Save trace
416        with open(base_path / "trace.txt", "w") as f:
417            f.write(format_trace_for_display(iteration.trace))
418 
419        # Save analysis
420        with open(base_path / "analysis.txt", "w") as f:
421            f.write(format_analysis_report(iteration.analysis))
422 
423        # Save optimization if present
424        if iteration.optimization:
425            with open(base_path / "optimization.txt", "w") as f:
426                f.write(format_optimization_report(iteration.optimization))
427 
428            with open(base_path / "optimized_prompt.txt", "w") as f:
429                f.write(iteration.optimization.optimized_prompt)
430 
431    def _save_final_artifacts(self, result: LoopResult) -> None:
432        """Save final optimization artifacts."""
433        base_path = Path(self.config.artifacts_dir)
434 
435        # Save final prompt
436        with open(base_path / "final_prompt.txt", "w") as f:
437            f.write(result.final_prompt)
438 
439        # Save summary JSON
440        summary = {
441            "task": result.task,
442            "total_iterations": result.total_iterations,
443            "converged": result.converged,
444            "initial_score": result.initial_score,
445            "final_score": result.final_score,
446            "improvement_percentage": result.improvement_percentage,
447            "timestamp": datetime.now().isoformat(),
448        }
449        with open(base_path / "summary.json", "w") as f:
450            json.dump(summary, f, indent=2)
451 
452 
453def run_quick_optimization(
454    task: str,
455    initial_prompt: str,
456    tools: list[dict[str, Any]] | None = None,
457    tool_executor: Callable[[str, dict], str] | None = None,
458    max_iterations: int = 3,
459) -> str:
460    """
461    Quick helper function for one-shot optimization.
462 
463    Returns the optimized prompt directly.
464    """
465    config = LoopConfig(max_iterations=max_iterations, verbose=False)
466    loop = OptimizationLoop(config=config)
467    result = loop.run(task, initial_prompt, tools, tool_executor)
468    return result.final_prompt
469

Marketplace

Source from repo

Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.

muratcankoylanGitHub muratcankoylanSource repo Original GitHub link

Files

241

Skill

n/a

Size

2.6 MB

Entrypoint

SKILL.md

Format

git-repo

Open file

examples/interleaved-thinking/reasoning_trace_optimizer/loop.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code469 linesFree

examples/interleaved-thinking/reasoning_trace_optimizer/loop.py

1"""
2OptimizationLoop: Orchestrates the full capture → analyze → improve → re-run cycle.
3 
4This is the main entry point for automated prompt optimization,
5running iterative improvements until convergence or max iterations.
6"""
7 
8import json
9from dataclasses import dataclass, field
10from datetime import datetime
11from pathlib import Path
12from typing import Any, Callable
13 
14from rich.console import Console
15from rich.panel import Panel
16from rich.progress import Progress, SpinnerColumn, TextColumn
17from rich.table import Table
18 
19from reasoning_trace_optimizer.analyzer import TraceAnalyzer, format_analysis_report
20from reasoning_trace_optimizer.capture import TraceCapture, format_trace_for_display
21from reasoning_trace_optimizer.models import (
22    AnalysisResult,
23    LoopIteration,
24    LoopResult,
25    OptimizationResult,
26    ReasoningTrace,
27)
28from reasoning_trace_optimizer.optimizer import PromptOptimizer, format_optimization_report
29 
30 
31console = Console()
32 
33 
34@dataclass
35class LoopConfig:
36    """Configuration for the optimization loop."""
37 
38    max_iterations: int = 5
39    convergence_threshold: float = 3.0  # Stop if improvement < this %
40    min_score_threshold: float = 75.0  # Stop if score >= this (realistic for complex tasks)
41    regression_threshold: float = 8.0  # Rollback if score drops by this much
42 
43    # Scoring weights
44    success_weight: float = 0.4
45    score_weight: float = 0.4
46    error_weight: float = 0.2
47 
48    # Optimization behavior
49    use_best_prompt: bool = True  # Use best performing prompt, not final
50    max_prompt_growth: float = 5.0  # Max ratio of new prompt length to original
51 
52    # Output options
53    save_artifacts: bool = True
54    artifacts_dir: str = "./optimization_artifacts"
55    verbose: bool = True
56 
57 
58class OptimizationLoop:
59    """
60    Orchestrates the full optimization cycle.
61 
62    Runs iterative loops of:
63    1. Execute agent with current prompt
64    2. Capture reasoning trace
65    3. Analyze trace for issues
66    4. Generate optimized prompt
67    5. Repeat until convergence
68 
69    Example:
70        ```python
71        loop = OptimizationLoop()
72        result = loop.run(
73            task="Search for Python tutorials and summarize them",
74            initial_prompt="You are a helpful research assistant.",
75            tools=[search_tool],
76            tool_executor=execute_search
77        )
78 
79        print(f"Improved from {result.initial_score} to {result.final_score}")
80        print(f"Final prompt:\\n{result.final_prompt}")
81        ```
82    """
83 
84    def __init__(
85        self,
86        config: LoopConfig | None = None,
87        api_key: str | None = None,
88        base_url: str = "https://api.minimax.io/anthropic",
89        model: str = "MiniMax-M2.1",
90    ):
91        """
92        Initialize the optimization loop.
93 
94        Args:
95            config: Loop configuration
96            api_key: MiniMax API key
97            base_url: API endpoint
98            model: Model to use for all components
99        """
100        self.config = config or LoopConfig()
101 
102        # Initialize components with same configuration
103        self.capture = TraceCapture(api_key=api_key, base_url=base_url, model=model)
104        self.analyzer = TraceAnalyzer(api_key=api_key, base_url=base_url, model=model)
105        self.optimizer = PromptOptimizer(api_key=api_key, base_url=base_url, model=model)
106 
107        # Create artifacts directory
108        if self.config.save_artifacts:
109            Path(self.config.artifacts_dir).mkdir(parents=True, exist_ok=True)
110 
111    def run(
112        self,
113        task: str,
114        initial_prompt: str,
115        tools: list[dict[str, Any]] | None = None,
116        tool_executor: Callable[[str, dict], str] | None = None,
117        on_iteration: Callable[[LoopIteration], None] | None = None,
118    ) -> LoopResult:
119        """
120        Run the full optimization loop.
121 
122        Args:
123            task: The task to optimize for
124            initial_prompt: Starting system prompt
125            tools: Tool definitions for the agent
126            tool_executor: Function to execute tool calls
127            on_iteration: Optional callback after each iteration
128 
129        Returns:
130            LoopResult with all iterations and final optimized prompt
131        """
132        result = LoopResult(task=task, final_prompt=initial_prompt)
133        current_prompt = initial_prompt
134 
135        # Track best performing iteration
136        best_score = 0.0
137        best_prompt = initial_prompt
138        best_iteration = 0
139        consecutive_regressions = 0
140 
141        if self.config.verbose:
142            console.print(Panel(
143                f"[bold]Starting Optimization Loop[/bold]\n\n"
144                f"Task: {task}\n"
145                f"Max Iterations: {self.config.max_iterations}\n"
146                f"Convergence Threshold: {self.config.convergence_threshold}%",
147                title="Reasoning Trace Optimizer"
148            ))
149 
150        with Progress(
151            SpinnerColumn(),
152            TextColumn("[progress.description]{task.description}"),
153            console=console,
154            disable=not self.config.verbose,
155        ) as progress:
156 
157            for i in range(self.config.max_iterations):
158                task_id = progress.add_task(f"Iteration {i + 1}/{self.config.max_iterations}", total=4)
159 
160                # Step 1: Capture trace
161                progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Capturing trace...")
162                trace = self.capture.run(
163                    task=task,
164                    system_prompt=current_prompt,
165                    tools=tools,
166                    tool_executor=tool_executor,
167                )
168                progress.advance(task_id)
169 
170                # Step 2: Analyze trace
171                progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Analyzing trace...")
172                analysis = self.analyzer.analyze(trace)
173                progress.advance(task_id)
174 
175                # Calculate iteration score
176                iteration_score = self._calculate_score(trace, analysis)
177 
178                # Record initial score
179                if i == 0:
180                    result.initial_score = iteration_score
181                    best_score = iteration_score
182                    best_prompt = current_prompt
183 
184                # Step 3: Check convergence
185                should_continue, reason = self._check_convergence(
186                    iteration=i,
187                    score=iteration_score,
188                    prev_score=result.iterations[-1].analysis.overall_score if result.iterations else 0,
189                    best_score=best_score,
190                    consecutive_regressions=consecutive_regressions,
191                )
192 
193                # Step 4: Optimize if continuing
194                optimization = None
195                if should_continue:
196                    progress.update(task_id, description=f"[cyan]Iteration {i + 1}: Optimizing prompt...")
197                    optimization = self.optimizer.optimize(
198                        original_prompt=current_prompt,
199                        analysis=analysis,
200                        trace=trace,
201                    )
202 
203                    # Check for excessive prompt growth
204                    new_prompt = optimization.optimized_prompt
205                    if len(new_prompt) > len(initial_prompt) * self.config.max_prompt_growth:
206                        if self.config.verbose:
207                            console.print(f"[yellow]Warning: Prompt grew too large ({len(new_prompt)} chars), limiting growth[/yellow]")
208                        # Keep the current prompt instead of the bloated one
209                        new_prompt = current_prompt
210 
211                    current_prompt = new_prompt
212                    progress.advance(task_id)
213 
214                # Track best performing iteration AFTER optimization
215                # This ensures we capture the optimized prompt, not the input prompt
216                if iteration_score > best_score:
217                    best_score = iteration_score
218                    # Use the optimized prompt if available, otherwise the current prompt
219                    if optimization and optimization.optimized_prompt != initial_prompt:
220                        best_prompt = optimization.optimized_prompt
221                    else:
222                        best_prompt = current_prompt
223                    best_iteration = i + 1
224                    consecutive_regressions = 0
225                elif iteration_score < best_score - self.config.regression_threshold:
226                    consecutive_regressions += 1
227                    if self.config.verbose:
228                        console.print(f"[yellow]Warning: Score regressed from {best_score:.1f} to {iteration_score:.1f}[/yellow]")
229 
230                # Record iteration
231                iteration = LoopIteration(
232                    iteration=i + 1,
233                    trace=trace,
234                    analysis=analysis,
235                    optimization=optimization,
236                    task_completed=trace.success or False,
237                    error_count=len([tc for tc in trace.tool_calls if not tc.success]),
238                    token_usage=trace.total_tokens,
239                )
240                result.iterations.append(iteration)
241 
242                # Callback
243                if on_iteration:
244                    on_iteration(iteration)
245 
246                # Print iteration summary
247                if self.config.verbose:
248                    self._print_iteration_summary(iteration)
249 
250                # Save artifacts
251                if self.config.save_artifacts:
252                    self._save_iteration_artifacts(iteration, i + 1)
253 
254                # Check if we should stop
255                if not should_continue:
256                    if self.config.verbose:
257                        console.print(f"\n[green]Stopping: {reason}[/green]")
258                    result.converged = True
259                    break
260 
261                progress.remove_task(task_id)
262 
263        # Finalize result - use best prompt if configured
264        if self.config.use_best_prompt and best_score > result.iterations[-1].analysis.overall_score:
265            result.final_prompt = best_prompt
266            result.final_score = best_score
267            if self.config.verbose:
268                console.print(f"[green]Using best prompt from iteration {best_iteration} (score: {best_score:.1f})[/green]")
269        else:
270            result.final_prompt = current_prompt
271            result.final_score = result.iterations[-1].analysis.overall_score if result.iterations else 0
272 
273        result.total_iterations = len(result.iterations)
274        result.improvement_percentage = (
275            (result.final_score - result.initial_score) / max(result.initial_score, 1) * 100
276        )
277 
278        # Warn if prompt was never successfully optimized
279        if result.final_prompt == initial_prompt:
280            if self.config.verbose:
281                console.print(
282                    "[yellow]Warning: Final prompt unchanged from initial. "
283                    "Optimization may have failed to parse model responses.[/yellow]"
284                )
285            # Check if any iteration actually produced a different prompt
286            any_optimized = any(
287                i.optimization and i.optimization.optimized_prompt != initial_prompt
288                for i in result.iterations
289                if i.optimization
290            )
291            if not any_optimized:
292                console.print(
293                    "[yellow]No successful prompt optimizations were extracted. "
294                    "Check artifacts for raw optimizer responses.[/yellow]"
295                )
296 
297        # Print final summary
298        if self.config.verbose:
299            self._print_final_summary(result)
300 
301        # Save final artifacts
302        if self.config.save_artifacts:
303            self._save_final_artifacts(result)
304 
305        return result
306 
307    def run_single(
308        self,
309        task: str,
310        prompt: str,
311        tools: list[dict[str, Any]] | None = None,
312        tool_executor: Callable[[str, dict], str] | None = None,
313    ) -> tuple[ReasoningTrace, AnalysisResult]:
314        """
315        Run a single capture + analysis cycle (no optimization).
316 
317        Useful for debugging or when you just want analysis without
318        automatic optimization.
319 
320        Returns:
321            Tuple of (trace, analysis)
322        """
323        trace = self.capture.run(
324            task=task,
325            system_prompt=prompt,
326            tools=tools,
327            tool_executor=tool_executor,
328        )
329        analysis = self.analyzer.analyze(trace)
330        return trace, analysis
331 
332    def _calculate_score(
333        self,
334        trace: ReasoningTrace,
335        analysis: AnalysisResult,
336    ) -> float:
337        """Calculate weighted score from trace and analysis."""
338        success_score = 100 if trace.success else 0
339        error_penalty = len([tc for tc in trace.tool_calls if not tc.success]) * 10
340 
341        weighted = (
342            success_score * self.config.success_weight
343            + analysis.overall_score * self.config.score_weight
344            - error_penalty * self.config.error_weight
345        )
346 
347        return max(0, min(100, weighted))
348 
349    def _check_convergence(
350        self,
351        iteration: int,
352        score: float,
353        prev_score: float,
354        best_score: float = 0.0,
355        consecutive_regressions: int = 0,
356    ) -> tuple[bool, str]:
357        """Check if optimization should continue."""
358        # Check score threshold
359        if score >= self.config.min_score_threshold:
360            return False, f"Score {score:.1f} >= threshold {self.config.min_score_threshold}"
361 
362        # Check for consecutive regressions (stop if we've regressed twice in a row)
363        if consecutive_regressions >= 2:
364            return False, f"Consecutive regressions detected (best was {best_score:.1f})"
365 
366        # Check improvement threshold (after first iteration)
367        if iteration > 0:
368            improvement = score - prev_score
369            if abs(improvement) < self.config.convergence_threshold and score >= prev_score:
370                return False, f"Converged (improvement {improvement:.1f}% < threshold)"
371 
372        # Check max iterations
373        if iteration >= self.config.max_iterations - 1:
374            return False, f"Reached max iterations ({self.config.max_iterations})"
375 
376        return True, ""
377 
378    def _print_iteration_summary(self, iteration: LoopIteration) -> None:
379        """Print summary of an iteration."""
380        table = Table(title=f"Iteration {iteration.iteration} Summary")
381        table.add_column("Metric", style="cyan")
382        table.add_column("Value", style="green")
383 
384        table.add_row("Task Completed", "Yes" if iteration.task_completed else "No")
385        table.add_row("Overall Score", f"{iteration.analysis.overall_score:.1f}/100")
386        table.add_row("Patterns Found", str(len(iteration.analysis.patterns)))
387        table.add_row("Tool Errors", str(iteration.error_count))
388        table.add_row("Token Usage", str(iteration.token_usage))
389 
390        if iteration.optimization:
391            table.add_row(
392                "Predicted Improvement",
393                f"{iteration.optimization.predicted_improvement}%"
394            )
395 
396        console.print(table)
397 
398    def _print_final_summary(self, result: LoopResult) -> None:
399        """Print final optimization summary."""
400        console.print("\n")
401        panel_content = (
402            f"[bold]Iterations:[/bold] {result.total_iterations}\n"
403            f"[bold]Converged:[/bold] {'Yes' if result.converged else 'No'}\n"
404            f"[bold]Initial Score:[/bold] {result.initial_score:.1f}\n"
405            f"[bold]Final Score:[/bold] {result.final_score:.1f}\n"
406            f"[bold]Improvement:[/bold] {result.improvement_percentage:+.1f}%"
407        )
408        console.print(Panel(panel_content, title="[green]Optimization Complete[/green]"))
409 
410    def _save_iteration_artifacts(self, iteration: LoopIteration, num: int) -> None:
411        """Save iteration artifacts to disk."""
412        base_path = Path(self.config.artifacts_dir) / f"iteration_{num}"
413        base_path.mkdir(exist_ok=True)
414 
415        # Save trace
416        with open(base_path / "trace.txt", "w") as f:
417            f.write(format_trace_for_display(iteration.trace))
418 
419        # Save analysis
420        with open(base_path / "analysis.txt", "w") as f:
421            f.write(format_analysis_report(iteration.analysis))
422 
423        # Save optimization if present
424        if iteration.optimization:
425            with open(base_path / "optimization.txt", "w") as f:
426                f.write(format_optimization_report(iteration.optimization))
427 
428            with open(base_path / "optimized_prompt.txt", "w") as f:
429                f.write(iteration.optimization.optimized_prompt)
430 
431    def _save_final_artifacts(self, result: LoopResult) -> None:
432        """Save final optimization artifacts."""
433        base_path = Path(self.config.artifacts_dir)
434 
435        # Save final prompt
436        with open(base_path / "final_prompt.txt", "w") as f:
437            f.write(result.final_prompt)
438 
439        # Save summary JSON
440        summary = {
441            "task": result.task,
442            "total_iterations": result.total_iterations,
443            "converged": result.converged,
444            "initial_score": result.initial_score,
445            "final_score": result.final_score,
446            "improvement_percentage": result.improvement_percentage,
447            "timestamp": datetime.now().isoformat(),
448        }
449        with open(base_path / "summary.json", "w") as f:
450            json.dump(summary, f, indent=2)
451 
452 
453def run_quick_optimization(
454    task: str,
455    initial_prompt: str,
456    tools: list[dict[str, Any]] | None = None,
457    tool_executor: Callable[[str, dict], str] | None = None,
458    max_iterations: int = 3,
459) -> str:
460    """
461    Quick helper function for one-shot optimization.
462 
463    Returns the optimized prompt directly.
464    """
465    config = LoopConfig(max_iterations=max_iterations, verbose=False)
466    loop = OptimizationLoop(config=config)
467    result = loop.run(task, initial_prompt, tools, tool_executor)
468    return result.final_prompt
469

Agent Skills for Context Engineering

examples/interleaved-thinking/reasoning_trace_optimizer/loop.py

Preparing the source view

Agent Skills for Context Engineering

examples/interleaved-thinking/reasoning_trace_optimizer/loop.py