Source from repo

Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.

muratcankoylanGitHub muratcankoylanSource repo Original GitHub link

Files

241

Skill

n/a

Size

2.6 MB

Entrypoint

SKILL.md

Format

git-repo

Open file

examples/interleaved-thinking/reasoning_trace_optimizer/optimizer.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code450 linesFree

examples/interleaved-thinking/reasoning_trace_optimizer/optimizer.py

1"""
2PromptOptimizer: Generates improved prompts based on trace analysis.
3 
4Uses M2.1 to synthesize analysis results into concrete prompt improvements,
5with full reasoning transparency via interleaved thinking.
6"""
7 
8import json
9import os
10from typing import Any
11 
12import anthropic
13 
14from reasoning_trace_optimizer.models import (
15    AnalysisResult,
16    OptimizationResult,
17    PromptDiff,
18    ReasoningTrace,
19)
20 
21 
22OPTIMIZER_SYSTEM_PROMPT = """You are an expert prompt engineer specializing in AI agent optimization.
23 
24Your task is to improve agent prompts based on reasoning trace analysis.
25You have access to:
261. The original prompt that was used
272. Analysis of how the agent reasoned (its thinking trace)
283. Detected patterns and issues
294. Specific recommendations
30 
31Your goal is to create an IMPROVED prompt that:
32- Addresses identified weaknesses
33- Maintains existing strengths
34- Prevents detected failure patterns
35- Improves clarity and specificity
36 
37When optimizing, consider:
38- Adding explicit guardrails for common failure modes
39- Clarifying ambiguous instructions
40- Adding examples for complex behaviors
41- Restructuring for better context positioning
42- Adding validation steps where missing
43 
44Provide the optimized prompt with clear explanations of changes."""
45 
46 
47OPTIMIZATION_PROMPT_TEMPLATE = """Optimize the following agent prompt based on trace analysis:
48 
49## Original Task
50{task}
51 
52## Original System Prompt
53```
54{original_prompt}
55```
56 
57## Analysis Results
58 
59### Overall Score: {overall_score}/100
60 
61### Detected Patterns
62{patterns}
63 
64### Weaknesses
65{weaknesses}
66 
67### Recommendations
68{recommendations}
69 
70### Analyzer's Reasoning
71{analyzer_thinking}
72 
73---
74 
75Provide your optimization as JSON:
76```json
77{{
78    "optimized_prompt": "<the full improved prompt>",
79    "diffs": [
80        {{
81            "section": "<which part changed, e.g., 'instructions', 'guardrails', 'examples'>",
82            "original": "<original text or 'N/A' if new>",
83            "optimized": "<new/changed text>",
84            "reason": "<why this change helps>"
85        }}
86    ],
87    "key_changes": [
88        "<summary of major change 1>",
89        "<summary of major change 2>"
90    ],
91    "predicted_improvement": 15,
92    "confidence": 0.75
93}}
94```
95 
96Think carefully about what changes will have the biggest impact on agent performance."""
97 
98 
99class PromptOptimizer:
100    """
101    Optimizes agent prompts based on reasoning trace analysis.
102 
103    Uses M2.1's interleaved thinking to generate thoughtful improvements
104    with full transparency into the optimization reasoning.
105 
106    Example:
107        ```python
108        optimizer = PromptOptimizer()
109        result = optimizer.optimize(
110            original_prompt=system_prompt,
111            analysis=analysis_result,
112            trace=reasoning_trace
113        )
114 
115        print(f"Predicted improvement: {result.predicted_improvement}%")
116        print(f"New prompt:\\n{result.optimized_prompt}")
117        ```
118    """
119 
120    def __init__(
121        self,
122        api_key: str | None = None,
123        base_url: str = "https://api.minimax.io/anthropic",
124        model: str = "MiniMax-M2.1",
125    ):
126        """
127        Initialize PromptOptimizer with M2.1 configuration.
128 
129        Args:
130            api_key: MiniMax API key
131            base_url: API endpoint
132            model: Model for optimization
133        """
134        self.model = model
135        self.client = anthropic.Anthropic(
136            api_key=api_key or os.environ.get("ANTHROPIC_API_KEY"),
137            base_url=base_url,
138        )
139 
140    def optimize(
141        self,
142        original_prompt: str,
143        analysis: AnalysisResult,
144        trace: ReasoningTrace | None = None,
145        max_tokens: int = 8192,
146    ) -> OptimizationResult:
147        """
148        Generate an optimized prompt based on analysis.
149 
150        Args:
151            original_prompt: The original system prompt to improve
152            analysis: Analysis results from TraceAnalyzer
153            trace: Optional original trace for additional context
154            max_tokens: Maximum tokens for response
155 
156        Returns:
157            OptimizationResult with new prompt and change details
158        """
159        # Format analysis for prompt
160        patterns_text = self._format_patterns(analysis)
161        weaknesses_text = "\n".join(f"- {w}" for w in analysis.weaknesses)
162        recommendations_text = "\n".join(f"- {r}" for r in analysis.recommendations)
163 
164        prompt = OPTIMIZATION_PROMPT_TEMPLATE.format(
165            task=trace.task if trace else "Unknown task",
166            original_prompt=original_prompt,
167            overall_score=analysis.overall_score,
168            patterns=patterns_text,
169            weaknesses=weaknesses_text or "None identified",
170            recommendations=recommendations_text or "None provided",
171            analyzer_thinking=analysis.analyzer_thinking[:2000] if analysis.analyzer_thinking else "Not available",
172        )
173 
174        # Call M2.1 for optimization
175        response = self.client.messages.create(
176            model=self.model,
177            max_tokens=max_tokens,
178            system=OPTIMIZER_SYSTEM_PROMPT,
179            messages=[{"role": "user", "content": prompt}],
180        )
181 
182        # Extract thinking and response
183        optimizer_thinking = ""
184        optimization_text = ""
185 
186        for block in response.content:
187            if block.type == "thinking":
188                optimizer_thinking = block.thinking
189            elif block.type == "text":
190                optimization_text = block.text
191 
192        # Parse the response
193        result = self._parse_optimization_response(optimization_text, original_prompt)
194        result.optimizer_thinking = optimizer_thinking
195 
196        return result
197 
198    def optimize_iterative(
199        self,
200        original_prompt: str,
201        analyses: list[AnalysisResult],
202        traces: list[ReasoningTrace],
203    ) -> OptimizationResult:
204        """
205        Optimize based on multiple analysis iterations.
206 
207        Synthesizes patterns across multiple runs for more robust improvements.
208 
209        Args:
210            original_prompt: The original system prompt
211            analyses: List of analysis results from multiple runs
212            traces: Corresponding reasoning traces
213 
214        Returns:
215            OptimizationResult incorporating learnings from all iterations
216        """
217        # Aggregate patterns across all analyses
218        all_patterns = []
219        all_weaknesses = []
220        all_recommendations = []
221        avg_score = 0
222 
223        for analysis in analyses:
224            all_patterns.extend(analysis.patterns)
225            all_weaknesses.extend(analysis.weaknesses)
226            all_recommendations.extend(analysis.recommendations)
227            avg_score += analysis.overall_score
228 
229        avg_score /= len(analyses) if analyses else 1
230 
231        # Create aggregated analysis
232        aggregated = AnalysisResult(
233            trace_id="aggregated",
234            patterns=all_patterns,
235            overall_score=avg_score,
236            weaknesses=list(set(all_weaknesses)),  # Deduplicate
237            recommendations=list(set(all_recommendations)),
238        )
239 
240        # Optimize based on aggregated analysis
241        return self.optimize(
242            original_prompt=original_prompt,
243            analysis=aggregated,
244            trace=traces[0] if traces else None,
245        )
246 
247    def suggest_tool_improvements(
248        self,
249        tools: list[dict[str, Any]],
250        analysis: AnalysisResult,
251        trace: ReasoningTrace,
252    ) -> dict[str, str]:
253        """
254        Suggest improvements for tool definitions based on analysis.
255 
256        Args:
257            tools: Original tool definitions
258            analysis: Analysis results
259            trace: Original reasoning trace
260 
261        Returns:
262            Dict mapping tool names to suggested description improvements
263        """
264        tool_issues = [
265            p for p in analysis.patterns
266            if p.type.value in ("tool_confusion", "tool_misuse")
267        ]
268 
269        if not tool_issues:
270            return {}
271 
272        prompt = f"""Based on these tool usage issues:
273 
274{self._format_patterns_for_tools(tool_issues)}
275 
276And the original tool definitions:
277{json.dumps(tools, indent=2)}
278 
279Suggest improved tool descriptions. Respond as JSON:
280```json
281{{
282    "tool_name": "improved description that addresses the confusion"
283}}
284```"""
285 
286        response = self.client.messages.create(
287            model=self.model,
288            max_tokens=2048,
289            messages=[{"role": "user", "content": prompt}],
290        )
291 
292        for block in response.content:
293            if block.type == "text":
294                try:
295                    text = block.text
296                    if "```json" in text:
297                        text = text.split("```json")[1].split("```")[0]
298                    return json.loads(text)
299                except json.JSONDecodeError:
300                    pass
301 
302        return {}
303 
304    def _format_patterns(self, analysis: AnalysisResult) -> str:
305        """Format patterns for optimization prompt."""
306        if not analysis.patterns:
307            return "No significant patterns detected."
308 
309        parts = []
310        for p in analysis.patterns:
311            parts.append(
312                f"[{p.severity.value.upper()}] {p.type.value}\n"
313                f"  Description: {p.description}\n"
314                f"  Evidence: {', '.join(p.evidence[:2])}\n"
315                f"  Suggestion: {p.suggestion}"
316            )
317        return "\n\n".join(parts)
318 
319    def _format_patterns_for_tools(self, patterns: list) -> str:
320        """Format tool-related patterns."""
321        return "\n".join(
322            f"- {p.type.value}: {p.description}" for p in patterns
323        )
324 
325    def _parse_optimization_response(
326        self,
327        response_text: str,
328        original_prompt: str,
329    ) -> OptimizationResult:
330        """Parse the JSON optimization response with fallback extraction."""
331        result = OptimizationResult(
332            original_prompt=original_prompt,
333            optimized_prompt=original_prompt,  # Default to original if parsing fails
334        )
335 
336        try:
337            json_text = response_text
338            if "```json" in response_text:
339                json_text = response_text.split("```json")[1].split("```")[0]
340            elif "```" in response_text:
341                json_text = response_text.split("```")[1].split("```")[0]
342 
343            data = json.loads(json_text)
344 
345            result.optimized_prompt = data.get("optimized_prompt", original_prompt)
346            result.predicted_improvement = data.get("predicted_improvement", 0)
347            result.confidence = data.get("confidence", 0.5)
348            result.key_changes = data.get("key_changes", [])
349 
350            # Parse diffs
351            for d in data.get("diffs", []):
352                diff = PromptDiff(
353                    section=d.get("section", "unknown"),
354                    original=d.get("original", ""),
355                    optimized=d.get("optimized", ""),
356                    reason=d.get("reason", ""),
357                )
358                result.diffs.append(diff)
359 
360        except (json.JSONDecodeError, KeyError) as e:
361            # Fallback: try to extract optimized_prompt directly from response
362            extracted_prompt = self._fallback_extract_prompt(response_text)
363            if extracted_prompt and extracted_prompt != original_prompt:
364                result.optimized_prompt = extracted_prompt
365                result.key_changes = [f"JSON parsing failed ({type(e).__name__}), extracted prompt via fallback"]
366                result.confidence = 0.3  # Lower confidence for fallback extraction
367            else:
368                result.key_changes = [f"Optimization parsing failed ({type(e).__name__}) - using original prompt"]
369 
370        return result
371 
372    def _fallback_extract_prompt(self, response_text: str) -> str | None:
373        """
374        Fallback method to extract optimized prompt when JSON parsing fails.
375 
376        Tries multiple strategies to find the prompt content.
377        """
378        import re
379 
380        # Strategy 1: Look for "optimized_prompt": "..." pattern
381        match = re.search(r'"optimized_prompt"\s*:\s*"([^"]+)"', response_text, re.DOTALL)
382        if match:
383            # Unescape the string
384            return match.group(1).replace('\\n', '\n').replace('\\"', '"')
385 
386        # Strategy 2: Look for content between specific markers
387        markers = [
388            ('## Optimized Prompt', '##'),
389            ('**Optimized Prompt**', '**'),
390            ('OPTIMIZED PROMPT:', '\n\n'),
391            ('Here is the improved prompt:', '\n\n---'),
392        ]
393 
394        for start_marker, end_marker in markers:
395            if start_marker in response_text:
396                start_idx = response_text.find(start_marker) + len(start_marker)
397                remaining = response_text[start_idx:].strip()
398                if end_marker in remaining:
399                    end_idx = remaining.find(end_marker)
400                    extracted = remaining[:end_idx].strip()
401                    if len(extracted) > 50:  # Minimum length check
402                        return extracted
403 
404        # Strategy 3: Look for a substantial code block that might be the prompt
405        code_blocks = re.findall(r'```(?:text|markdown)?\n(.*?)```', response_text, re.DOTALL)
406        for block in code_blocks:
407            # Skip JSON blocks, look for prose blocks that could be prompts
408            if not block.strip().startswith('{') and len(block) > 100:
409                return block.strip()
410 
411        return None
412 
413 
414def format_optimization_report(result: OptimizationResult) -> str:
415    """Format an optimization result as a human-readable report."""
416    lines = [
417        "=" * 60,
418        "PROMPT OPTIMIZATION REPORT",
419        "=" * 60,
420        "",
421        f"Predicted Improvement: {result.predicted_improvement}%",
422        f"Confidence: {result.confidence * 100:.0f}%",
423        "",
424    ]
425 
426    if result.key_changes:
427        lines.append("Key Changes:")
428        for change in result.key_changes:
429            lines.append(f"  - {change}")
430        lines.append("")
431 
432    if result.diffs:
433        lines.append("Detailed Changes:")
434        for diff in result.diffs:
435            lines.append(f"\n  [{diff.section}]")
436            if diff.original and diff.original != "N/A":
437                lines.append(f"    Before: {diff.original[:100]}...")
438            lines.append(f"    After: {diff.optimized[:100]}...")
439            lines.append(f"    Reason: {diff.reason}")
440 
441    lines.extend([
442        "",
443        "=" * 60,
444        "OPTIMIZED PROMPT",
445        "=" * 60,
446        result.optimized_prompt,
447    ])
448 
449    return "\n".join(lines)
450

Loading source

Preparing the source view

Pulling the file list, source metadata, and syntax-aware rendering for this listing.

Marketplace

Source from repo

Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.

muratcankoylanGitHub muratcankoylanSource repo Original GitHub link

Files

241

Skill

n/a

Size

2.6 MB

Entrypoint

SKILL.md

Format

git-repo

Open file

examples/interleaved-thinking/reasoning_trace_optimizer/optimizer.py

Syntax-highlighted preview of this file as included in the skill package.

Rendered Source

code450 linesFree

examples/interleaved-thinking/reasoning_trace_optimizer/optimizer.py

1"""
2PromptOptimizer: Generates improved prompts based on trace analysis.
3 
4Uses M2.1 to synthesize analysis results into concrete prompt improvements,
5with full reasoning transparency via interleaved thinking.
6"""
7 
8import json
9import os
10from typing import Any
11 
12import anthropic
13 
14from reasoning_trace_optimizer.models import (
15    AnalysisResult,
16    OptimizationResult,
17    PromptDiff,
18    ReasoningTrace,
19)
20 
21 
22OPTIMIZER_SYSTEM_PROMPT = """You are an expert prompt engineer specializing in AI agent optimization.
23 
24Your task is to improve agent prompts based on reasoning trace analysis.
25You have access to:
261. The original prompt that was used
272. Analysis of how the agent reasoned (its thinking trace)
283. Detected patterns and issues
294. Specific recommendations
30 
31Your goal is to create an IMPROVED prompt that:
32- Addresses identified weaknesses
33- Maintains existing strengths
34- Prevents detected failure patterns
35- Improves clarity and specificity
36 
37When optimizing, consider:
38- Adding explicit guardrails for common failure modes
39- Clarifying ambiguous instructions
40- Adding examples for complex behaviors
41- Restructuring for better context positioning
42- Adding validation steps where missing
43 
44Provide the optimized prompt with clear explanations of changes."""
45 
46 
47OPTIMIZATION_PROMPT_TEMPLATE = """Optimize the following agent prompt based on trace analysis:
48 
49## Original Task
50{task}
51 
52## Original System Prompt
53```
54{original_prompt}
55```
56 
57## Analysis Results
58 
59### Overall Score: {overall_score}/100
60 
61### Detected Patterns
62{patterns}
63 
64### Weaknesses
65{weaknesses}
66 
67### Recommendations
68{recommendations}
69 
70### Analyzer's Reasoning
71{analyzer_thinking}
72 
73---
74 
75Provide your optimization as JSON:
76```json
77{{
78    "optimized_prompt": "<the full improved prompt>",
79    "diffs": [
80        {{
81            "section": "<which part changed, e.g., 'instructions', 'guardrails', 'examples'>",
82            "original": "<original text or 'N/A' if new>",
83            "optimized": "<new/changed text>",
84            "reason": "<why this change helps>"
85        }}
86    ],
87    "key_changes": [
88        "<summary of major change 1>",
89        "<summary of major change 2>"
90    ],
91    "predicted_improvement": 15,
92    "confidence": 0.75
93}}
94```
95 
96Think carefully about what changes will have the biggest impact on agent performance."""
97 
98 
99class PromptOptimizer:
100    """
101    Optimizes agent prompts based on reasoning trace analysis.
102 
103    Uses M2.1's interleaved thinking to generate thoughtful improvements
104    with full transparency into the optimization reasoning.
105 
106    Example:
107        ```python
108        optimizer = PromptOptimizer()
109        result = optimizer.optimize(
110            original_prompt=system_prompt,
111            analysis=analysis_result,
112            trace=reasoning_trace
113        )
114 
115        print(f"Predicted improvement: {result.predicted_improvement}%")
116        print(f"New prompt:\\n{result.optimized_prompt}")
117        ```
118    """
119 
120    def __init__(
121        self,
122        api_key: str | None = None,
123        base_url: str = "https://api.minimax.io/anthropic",
124        model: str = "MiniMax-M2.1",
125    ):
126        """
127        Initialize PromptOptimizer with M2.1 configuration.
128 
129        Args:
130            api_key: MiniMax API key
131            base_url: API endpoint
132            model: Model for optimization
133        """
134        self.model = model
135        self.client = anthropic.Anthropic(
136            api_key=api_key or os.environ.get("ANTHROPIC_API_KEY"),
137            base_url=base_url,
138        )
139 
140    def optimize(
141        self,
142        original_prompt: str,
143        analysis: AnalysisResult,
144        trace: ReasoningTrace | None = None,
145        max_tokens: int = 8192,
146    ) -> OptimizationResult:
147        """
148        Generate an optimized prompt based on analysis.
149 
150        Args:
151            original_prompt: The original system prompt to improve
152            analysis: Analysis results from TraceAnalyzer
153            trace: Optional original trace for additional context
154            max_tokens: Maximum tokens for response
155 
156        Returns:
157            OptimizationResult with new prompt and change details
158        """
159        # Format analysis for prompt
160        patterns_text = self._format_patterns(analysis)
161        weaknesses_text = "\n".join(f"- {w}" for w in analysis.weaknesses)
162        recommendations_text = "\n".join(f"- {r}" for r in analysis.recommendations)
163 
164        prompt = OPTIMIZATION_PROMPT_TEMPLATE.format(
165            task=trace.task if trace else "Unknown task",
166            original_prompt=original_prompt,
167            overall_score=analysis.overall_score,
168            patterns=patterns_text,
169            weaknesses=weaknesses_text or "None identified",
170            recommendations=recommendations_text or "None provided",
171            analyzer_thinking=analysis.analyzer_thinking[:2000] if analysis.analyzer_thinking else "Not available",
172        )
173 
174        # Call M2.1 for optimization
175        response = self.client.messages.create(
176            model=self.model,
177            max_tokens=max_tokens,
178            system=OPTIMIZER_SYSTEM_PROMPT,
179            messages=[{"role": "user", "content": prompt}],
180        )
181 
182        # Extract thinking and response
183        optimizer_thinking = ""
184        optimization_text = ""
185 
186        for block in response.content:
187            if block.type == "thinking":
188                optimizer_thinking = block.thinking
189            elif block.type == "text":
190                optimization_text = block.text
191 
192        # Parse the response
193        result = self._parse_optimization_response(optimization_text, original_prompt)
194        result.optimizer_thinking = optimizer_thinking
195 
196        return result
197 
198    def optimize_iterative(
199        self,
200        original_prompt: str,
201        analyses: list[AnalysisResult],
202        traces: list[ReasoningTrace],
203    ) -> OptimizationResult:
204        """
205        Optimize based on multiple analysis iterations.
206 
207        Synthesizes patterns across multiple runs for more robust improvements.
208 
209        Args:
210            original_prompt: The original system prompt
211            analyses: List of analysis results from multiple runs
212            traces: Corresponding reasoning traces
213 
214        Returns:
215            OptimizationResult incorporating learnings from all iterations
216        """
217        # Aggregate patterns across all analyses
218        all_patterns = []
219        all_weaknesses = []
220        all_recommendations = []
221        avg_score = 0
222 
223        for analysis in analyses:
224            all_patterns.extend(analysis.patterns)
225            all_weaknesses.extend(analysis.weaknesses)
226            all_recommendations.extend(analysis.recommendations)
227            avg_score += analysis.overall_score
228 
229        avg_score /= len(analyses) if analyses else 1
230 
231        # Create aggregated analysis
232        aggregated = AnalysisResult(
233            trace_id="aggregated",
234            patterns=all_patterns,
235            overall_score=avg_score,
236            weaknesses=list(set(all_weaknesses)),  # Deduplicate
237            recommendations=list(set(all_recommendations)),
238        )
239 
240        # Optimize based on aggregated analysis
241        return self.optimize(
242            original_prompt=original_prompt,
243            analysis=aggregated,
244            trace=traces[0] if traces else None,
245        )
246 
247    def suggest_tool_improvements(
248        self,
249        tools: list[dict[str, Any]],
250        analysis: AnalysisResult,
251        trace: ReasoningTrace,
252    ) -> dict[str, str]:
253        """
254        Suggest improvements for tool definitions based on analysis.
255 
256        Args:
257            tools: Original tool definitions
258            analysis: Analysis results
259            trace: Original reasoning trace
260 
261        Returns:
262            Dict mapping tool names to suggested description improvements
263        """
264        tool_issues = [
265            p for p in analysis.patterns
266            if p.type.value in ("tool_confusion", "tool_misuse")
267        ]
268 
269        if not tool_issues:
270            return {}
271 
272        prompt = f"""Based on these tool usage issues:
273 
274{self._format_patterns_for_tools(tool_issues)}
275 
276And the original tool definitions:
277{json.dumps(tools, indent=2)}
278 
279Suggest improved tool descriptions. Respond as JSON:
280```json
281{{
282    "tool_name": "improved description that addresses the confusion"
283}}
284```"""
285 
286        response = self.client.messages.create(
287            model=self.model,
288            max_tokens=2048,
289            messages=[{"role": "user", "content": prompt}],
290        )
291 
292        for block in response.content:
293            if block.type == "text":
294                try:
295                    text = block.text
296                    if "```json" in text:
297                        text = text.split("```json")[1].split("```")[0]
298                    return json.loads(text)
299                except json.JSONDecodeError:
300                    pass
301 
302        return {}
303 
304    def _format_patterns(self, analysis: AnalysisResult) -> str:
305        """Format patterns for optimization prompt."""
306        if not analysis.patterns:
307            return "No significant patterns detected."
308 
309        parts = []
310        for p in analysis.patterns:
311            parts.append(
312                f"[{p.severity.value.upper()}] {p.type.value}\n"
313                f"  Description: {p.description}\n"
314                f"  Evidence: {', '.join(p.evidence[:2])}\n"
315                f"  Suggestion: {p.suggestion}"
316            )
317        return "\n\n".join(parts)
318 
319    def _format_patterns_for_tools(self, patterns: list) -> str:
320        """Format tool-related patterns."""
321        return "\n".join(
322            f"- {p.type.value}: {p.description}" for p in patterns
323        )
324 
325    def _parse_optimization_response(
326        self,
327        response_text: str,
328        original_prompt: str,
329    ) -> OptimizationResult:
330        """Parse the JSON optimization response with fallback extraction."""
331        result = OptimizationResult(
332            original_prompt=original_prompt,
333            optimized_prompt=original_prompt,  # Default to original if parsing fails
334        )
335 
336        try:
337            json_text = response_text
338            if "```json" in response_text:
339                json_text = response_text.split("```json")[1].split("```")[0]
340            elif "```" in response_text:
341                json_text = response_text.split("```")[1].split("```")[0]
342 
343            data = json.loads(json_text)
344 
345            result.optimized_prompt = data.get("optimized_prompt", original_prompt)
346            result.predicted_improvement = data.get("predicted_improvement", 0)
347            result.confidence = data.get("confidence", 0.5)
348            result.key_changes = data.get("key_changes", [])
349 
350            # Parse diffs
351            for d in data.get("diffs", []):
352                diff = PromptDiff(
353                    section=d.get("section", "unknown"),
354                    original=d.get("original", ""),
355                    optimized=d.get("optimized", ""),
356                    reason=d.get("reason", ""),
357                )
358                result.diffs.append(diff)
359 
360        except (json.JSONDecodeError, KeyError) as e:
361            # Fallback: try to extract optimized_prompt directly from response
362            extracted_prompt = self._fallback_extract_prompt(response_text)
363            if extracted_prompt and extracted_prompt != original_prompt:
364                result.optimized_prompt = extracted_prompt
365                result.key_changes = [f"JSON parsing failed ({type(e).__name__}), extracted prompt via fallback"]
366                result.confidence = 0.3  # Lower confidence for fallback extraction
367            else:
368                result.key_changes = [f"Optimization parsing failed ({type(e).__name__}) - using original prompt"]
369 
370        return result
371 
372    def _fallback_extract_prompt(self, response_text: str) -> str | None:
373        """
374        Fallback method to extract optimized prompt when JSON parsing fails.
375 
376        Tries multiple strategies to find the prompt content.
377        """
378        import re
379 
380        # Strategy 1: Look for "optimized_prompt": "..." pattern
381        match = re.search(r'"optimized_prompt"\s*:\s*"([^"]+)"', response_text, re.DOTALL)
382        if match:
383            # Unescape the string
384            return match.group(1).replace('\\n', '\n').replace('\\"', '"')
385 
386        # Strategy 2: Look for content between specific markers
387        markers = [
388            ('## Optimized Prompt', '##'),
389            ('**Optimized Prompt**', '**'),
390            ('OPTIMIZED PROMPT:', '\n\n'),
391            ('Here is the improved prompt:', '\n\n---'),
392        ]
393 
394        for start_marker, end_marker in markers:
395            if start_marker in response_text:
396                start_idx = response_text.find(start_marker) + len(start_marker)
397                remaining = response_text[start_idx:].strip()
398                if end_marker in remaining:
399                    end_idx = remaining.find(end_marker)
400                    extracted = remaining[:end_idx].strip()
401                    if len(extracted) > 50:  # Minimum length check
402                        return extracted
403 
404        # Strategy 3: Look for a substantial code block that might be the prompt
405        code_blocks = re.findall(r'```(?:text|markdown)?\n(.*?)```', response_text, re.DOTALL)
406        for block in code_blocks:
407            # Skip JSON blocks, look for prose blocks that could be prompts
408            if not block.strip().startswith('{') and len(block) > 100:
409                return block.strip()
410 
411        return None
412 
413 
414def format_optimization_report(result: OptimizationResult) -> str:
415    """Format an optimization result as a human-readable report."""
416    lines = [
417        "=" * 60,
418        "PROMPT OPTIMIZATION REPORT",
419        "=" * 60,
420        "",
421        f"Predicted Improvement: {result.predicted_improvement}%",
422        f"Confidence: {result.confidence * 100:.0f}%",
423        "",
424    ]
425 
426    if result.key_changes:
427        lines.append("Key Changes:")
428        for change in result.key_changes:
429            lines.append(f"  - {change}")
430        lines.append("")
431 
432    if result.diffs:
433        lines.append("Detailed Changes:")
434        for diff in result.diffs:
435            lines.append(f"\n  [{diff.section}]")
436            if diff.original and diff.original != "N/A":
437                lines.append(f"    Before: {diff.original[:100]}...")
438            lines.append(f"    After: {diff.optimized[:100]}...")
439            lines.append(f"    Reason: {diff.reason}")
440 
441    lines.extend([
442        "",
443        "=" * 60,
444        "OPTIMIZED PROMPT",
445        "=" * 60,
446        result.optimized_prompt,
447    ])
448 
449    return "\n".join(lines)
450