Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
examples/interleaved-thinking/reasoning_trace_optimizer/optimizer.py
1"""2PromptOptimizer: Generates improved prompts based on trace analysis.34Uses M2.1 to synthesize analysis results into concrete prompt improvements,5with full reasoning transparency via interleaved thinking.6"""78import json9import os10from typing import Any1112import anthropic1314from reasoning_trace_optimizer.models import (15AnalysisResult,16OptimizationResult,17PromptDiff,18ReasoningTrace,19)202122OPTIMIZER_SYSTEM_PROMPT = """You are an expert prompt engineer specializing in AI agent optimization.2324Your task is to improve agent prompts based on reasoning trace analysis.25You have access to:261. The original prompt that was used272. Analysis of how the agent reasoned (its thinking trace)283. Detected patterns and issues294. Specific recommendations3031Your goal is to create an IMPROVED prompt that:32- Addresses identified weaknesses33- Maintains existing strengths34- Prevents detected failure patterns35- Improves clarity and specificity3637When optimizing, consider:38- Adding explicit guardrails for common failure modes39- Clarifying ambiguous instructions40- Adding examples for complex behaviors41- Restructuring for better context positioning42- Adding validation steps where missing4344Provide the optimized prompt with clear explanations of changes."""454647OPTIMIZATION_PROMPT_TEMPLATE = """Optimize the following agent prompt based on trace analysis:4849## Original Task50{task}5152## Original System Prompt53```54{original_prompt}55```5657## Analysis Results5859### Overall Score: {overall_score}/1006061### Detected Patterns62{patterns}6364### Weaknesses65{weaknesses}6667### Recommendations68{recommendations}6970### Analyzer's Reasoning71{analyzer_thinking}7273---7475Provide your optimization as JSON:76```json77{{78"optimized_prompt": "<the full improved prompt>",79"diffs": [80{{81"section": "<which part changed, e.g., 'instructions', 'guardrails', 'examples'>",82"original": "<original text or 'N/A' if new>",83"optimized": "<new/changed text>",84"reason": "<why this change helps>"85}}86],87"key_changes": [88"<summary of major change 1>",89"<summary of major change 2>"90],91"predicted_improvement": 15,92"confidence": 0.7593}}94```9596Think carefully about what changes will have the biggest impact on agent performance."""979899class PromptOptimizer:100"""101Optimizes agent prompts based on reasoning trace analysis.102103Uses M2.1's interleaved thinking to generate thoughtful improvements104with full transparency into the optimization reasoning.105106Example:107```python108optimizer = PromptOptimizer()109result = optimizer.optimize(110original_prompt=system_prompt,111analysis=analysis_result,112trace=reasoning_trace113)114115print(f"Predicted improvement: {result.predicted_improvement}%")116print(f"New prompt:\\n{result.optimized_prompt}")117```118"""119120def __init__(121self,122api_key: str | None = None,123base_url: str = "https://api.minimax.io/anthropic",124model: str = "MiniMax-M2.1",125):126"""127Initialize PromptOptimizer with M2.1 configuration.128129Args:130api_key: MiniMax API key131base_url: API endpoint132model: Model for optimization133"""134self.model = model135self.client = anthropic.Anthropic(136api_key=api_key or os.environ.get("ANTHROPIC_API_KEY"),137base_url=base_url,138)139140def optimize(141self,142original_prompt: str,143analysis: AnalysisResult,144trace: ReasoningTrace | None = None,145max_tokens: int = 8192,146) -> OptimizationResult:147"""148Generate an optimized prompt based on analysis.149150Args:151original_prompt: The original system prompt to improve152analysis: Analysis results from TraceAnalyzer153trace: Optional original trace for additional context154max_tokens: Maximum tokens for response155156Returns:157OptimizationResult with new prompt and change details158"""159# Format analysis for prompt160patterns_text = self._format_patterns(analysis)161weaknesses_text = "\n".join(f"- {w}" for w in analysis.weaknesses)162recommendations_text = "\n".join(f"- {r}" for r in analysis.recommendations)163164prompt = OPTIMIZATION_PROMPT_TEMPLATE.format(165task=trace.task if trace else "Unknown task",166original_prompt=original_prompt,167overall_score=analysis.overall_score,168patterns=patterns_text,169weaknesses=weaknesses_text or "None identified",170recommendations=recommendations_text or "None provided",171analyzer_thinking=analysis.analyzer_thinking[:2000] if analysis.analyzer_thinking else "Not available",172)173174# Call M2.1 for optimization175response = self.client.messages.create(176model=self.model,177max_tokens=max_tokens,178system=OPTIMIZER_SYSTEM_PROMPT,179messages=[{"role": "user", "content": prompt}],180)181182# Extract thinking and response183optimizer_thinking = ""184optimization_text = ""185186for block in response.content:187if block.type == "thinking":188optimizer_thinking = block.thinking189elif block.type == "text":190optimization_text = block.text191192# Parse the response193result = self._parse_optimization_response(optimization_text, original_prompt)194result.optimizer_thinking = optimizer_thinking195196return result197198def optimize_iterative(199self,200original_prompt: str,201analyses: list[AnalysisResult],202traces: list[ReasoningTrace],203) -> OptimizationResult:204"""205Optimize based on multiple analysis iterations.206207Synthesizes patterns across multiple runs for more robust improvements.208209Args:210original_prompt: The original system prompt211analyses: List of analysis results from multiple runs212traces: Corresponding reasoning traces213214Returns:215OptimizationResult incorporating learnings from all iterations216"""217# Aggregate patterns across all analyses218all_patterns = []219all_weaknesses = []220all_recommendations = []221avg_score = 0222223for analysis in analyses:224all_patterns.extend(analysis.patterns)225all_weaknesses.extend(analysis.weaknesses)226all_recommendations.extend(analysis.recommendations)227avg_score += analysis.overall_score228229avg_score /= len(analyses) if analyses else 1230231# Create aggregated analysis232aggregated = AnalysisResult(233trace_id="aggregated",234patterns=all_patterns,235overall_score=avg_score,236weaknesses=list(set(all_weaknesses)), # Deduplicate237recommendations=list(set(all_recommendations)),238)239240# Optimize based on aggregated analysis241return self.optimize(242original_prompt=original_prompt,243analysis=aggregated,244trace=traces[0] if traces else None,245)246247def suggest_tool_improvements(248self,249tools: list[dict[str, Any]],250analysis: AnalysisResult,251trace: ReasoningTrace,252) -> dict[str, str]:253"""254Suggest improvements for tool definitions based on analysis.255256Args:257tools: Original tool definitions258analysis: Analysis results259trace: Original reasoning trace260261Returns:262Dict mapping tool names to suggested description improvements263"""264tool_issues = [265p for p in analysis.patterns266if p.type.value in ("tool_confusion", "tool_misuse")267]268269if not tool_issues:270return {}271272prompt = f"""Based on these tool usage issues:273274{self._format_patterns_for_tools(tool_issues)}275276And the original tool definitions:277{json.dumps(tools, indent=2)}278279Suggest improved tool descriptions. Respond as JSON:280```json281{{282"tool_name": "improved description that addresses the confusion"283}}284```"""285286response = self.client.messages.create(287model=self.model,288max_tokens=2048,289messages=[{"role": "user", "content": prompt}],290)291292for block in response.content:293if block.type == "text":294try:295text = block.text296if "```json" in text:297text = text.split("```json")[1].split("```")[0]298return json.loads(text)299except json.JSONDecodeError:300pass301302return {}303304def _format_patterns(self, analysis: AnalysisResult) -> str:305"""Format patterns for optimization prompt."""306if not analysis.patterns:307return "No significant patterns detected."308309parts = []310for p in analysis.patterns:311parts.append(312f"[{p.severity.value.upper()}] {p.type.value}\n"313f" Description: {p.description}\n"314f" Evidence: {', '.join(p.evidence[:2])}\n"315f" Suggestion: {p.suggestion}"316)317return "\n\n".join(parts)318319def _format_patterns_for_tools(self, patterns: list) -> str:320"""Format tool-related patterns."""321return "\n".join(322f"- {p.type.value}: {p.description}" for p in patterns323)324325def _parse_optimization_response(326self,327response_text: str,328original_prompt: str,329) -> OptimizationResult:330"""Parse the JSON optimization response with fallback extraction."""331result = OptimizationResult(332original_prompt=original_prompt,333optimized_prompt=original_prompt, # Default to original if parsing fails334)335336try:337json_text = response_text338if "```json" in response_text:339json_text = response_text.split("```json")[1].split("```")[0]340elif "```" in response_text:341json_text = response_text.split("```")[1].split("```")[0]342343data = json.loads(json_text)344345result.optimized_prompt = data.get("optimized_prompt", original_prompt)346result.predicted_improvement = data.get("predicted_improvement", 0)347result.confidence = data.get("confidence", 0.5)348result.key_changes = data.get("key_changes", [])349350# Parse diffs351for d in data.get("diffs", []):352diff = PromptDiff(353section=d.get("section", "unknown"),354original=d.get("original", ""),355optimized=d.get("optimized", ""),356reason=d.get("reason", ""),357)358result.diffs.append(diff)359360except (json.JSONDecodeError, KeyError) as e:361# Fallback: try to extract optimized_prompt directly from response362extracted_prompt = self._fallback_extract_prompt(response_text)363if extracted_prompt and extracted_prompt != original_prompt:364result.optimized_prompt = extracted_prompt365result.key_changes = [f"JSON parsing failed ({type(e).__name__}), extracted prompt via fallback"]366result.confidence = 0.3 # Lower confidence for fallback extraction367else:368result.key_changes = [f"Optimization parsing failed ({type(e).__name__}) - using original prompt"]369370return result371372def _fallback_extract_prompt(self, response_text: str) -> str | None:373"""374Fallback method to extract optimized prompt when JSON parsing fails.375376Tries multiple strategies to find the prompt content.377"""378import re379380# Strategy 1: Look for "optimized_prompt": "..." pattern381match = re.search(r'"optimized_prompt"\s*:\s*"([^"]+)"', response_text, re.DOTALL)382if match:383# Unescape the string384return match.group(1).replace('\\n', '\n').replace('\\"', '"')385386# Strategy 2: Look for content between specific markers387markers = [388('## Optimized Prompt', '##'),389('**Optimized Prompt**', '**'),390('OPTIMIZED PROMPT:', '\n\n'),391('Here is the improved prompt:', '\n\n---'),392]393394for start_marker, end_marker in markers:395if start_marker in response_text:396start_idx = response_text.find(start_marker) + len(start_marker)397remaining = response_text[start_idx:].strip()398if end_marker in remaining:399end_idx = remaining.find(end_marker)400extracted = remaining[:end_idx].strip()401if len(extracted) > 50: # Minimum length check402return extracted403404# Strategy 3: Look for a substantial code block that might be the prompt405code_blocks = re.findall(r'```(?:text|markdown)?\n(.*?)```', response_text, re.DOTALL)406for block in code_blocks:407# Skip JSON blocks, look for prose blocks that could be prompts408if not block.strip().startswith('{') and len(block) > 100:409return block.strip()410411return None412413414def format_optimization_report(result: OptimizationResult) -> str:415"""Format an optimization result as a human-readable report."""416lines = [417"=" * 60,418"PROMPT OPTIMIZATION REPORT",419"=" * 60,420"",421f"Predicted Improvement: {result.predicted_improvement}%",422f"Confidence: {result.confidence * 100:.0f}%",423"",424]425426if result.key_changes:427lines.append("Key Changes:")428for change in result.key_changes:429lines.append(f" - {change}")430lines.append("")431432if result.diffs:433lines.append("Detailed Changes:")434for diff in result.diffs:435lines.append(f"\n [{diff.section}]")436if diff.original and diff.original != "N/A":437lines.append(f" Before: {diff.original[:100]}...")438lines.append(f" After: {diff.optimized[:100]}...")439lines.append(f" Reason: {diff.reason}")440441lines.extend([442"",443"=" * 60,444"OPTIMIZED PROMPT",445"=" * 60,446result.optimized_prompt,447])448449return "\n".join(lines)450