Source from repo
Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
muratcankoylanGitHub muratcankoylanSource repo Original GitHub link
Files
241
Skill
n/a
Size
2.6 MB
Entrypoint
SKILL.md
Format
git-repo
Open file
skills/context-optimization/scripts/compaction.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code563 linesFree
skills/context-optimization/scripts/compaction.py
1"""
2Context Optimization Utilities — compaction, masking, budgeting, and cache optimization.
3 
4Public API
5----------
6Functions:
7    estimate_token_count(text) -> int
8    estimate_message_tokens(messages) -> int
9    categorize_messages(messages) -> dict
10    summarize_content(content, category, max_length) -> str
11    design_stable_prompt(template, dynamic_values) -> str
12    calculate_cache_metrics(requests, cache) -> dict
13 
14Classes:
15    ObservationStore   — Store and mask verbose tool outputs with retrievable references.
16    ContextBudget      — Token budget allocation and optimization trigger detection.
17 
18PRODUCTION NOTES:
19- Token estimation uses simplified heuristics (~4 chars/token for English).
20  Production systems should use model-specific tokenizers:
21  - OpenAI: tiktoken library
22  - Anthropic: anthropic tokenizer
23  - Local models: HuggingFace tokenizers
24 
25- Summarization functions use simple heuristics for demonstration.
26  Production systems should use:
27  - LLM-based summarization for high-quality compression
28  - Domain-specific summarization models
29  - Schema-based summarization for structured outputs
30 
31- Cache metrics are illustrative. Production systems should integrate
32  with actual inference infrastructure metrics.
33"""
34 
35from typing import List, Dict, Optional, Tuple
36import hashlib
37import re
38import time
39 
40__all__ = [
41    "estimate_token_count",
42    "estimate_message_tokens",
43    "categorize_messages",
44    "summarize_content",
45    "summarize_tool_output",
46    "summarize_conversation",
47    "summarize_document",
48    "summarize_general",
49    "ObservationStore",
50    "ContextBudget",
51    "design_stable_prompt",
52    "calculate_cache_metrics",
53    "generate_cache_recommendations",
54]
55 
56 
57# ---------------------------------------------------------------------------
58# Token estimation
59# ---------------------------------------------------------------------------
60 
61def estimate_token_count(text: str) -> int:
62    """
63    Estimate token count for text.
64 
65    Use when: a quick token budget check is needed and a model-specific
66    tokenizer is unavailable or too slow for the hot path.
67 
68    Uses approximation: ~4 characters per token for English.
69 
70    WARNING: This is a rough estimate. Actual tokenization varies by:
71    - Model (GPT-5.2, Claude 4.5, Gemini 3 have different tokenizers)
72    - Content type (code typically has higher token density)
73    - Language (non-English may have 2-3x higher token/char ratio)
74 
75    Production usage::
76 
77        import tiktoken
78        enc = tiktoken.encoding_for_model("gpt-4")
79        token_count = len(enc.encode(text))
80    """
81    return len(text) // 4
82 
83 
84def estimate_message_tokens(messages: List[Dict[str, str]]) -> int:
85    """
86    Estimate token count for a message list.
87 
88    Use when: checking whether the current conversation is approaching
89    the context budget threshold before deciding to compact or mask.
90    """
91    total = 0
92    for msg in messages:
93        content = msg.get("content", "")
94        total += estimate_token_count(content)
95        # Add overhead for role/formatting
96        total += 10
97    return total
98 
99 
100# ---------------------------------------------------------------------------
101# Compaction functions
102# ---------------------------------------------------------------------------
103 
104def categorize_messages(messages: List[Dict]) -> Dict[str, List[Dict]]:
105    """
106    Categorize messages for selective compaction.
107 
108    Use when: preparing to compact context and needing to apply different
109    summarization strategies per category (tool outputs first, then old
110    conversation turns, then retrieved documents — never the system prompt).
111 
112    Returns a dict mapping category name to list of messages.
113    """
114    categories: Dict[str, List[Dict]] = {
115        "system_prompt": [],
116        "tool_definition": [],
117        "tool_output": [],
118        "conversation": [],
119        "retrieved_document": [],
120        "other": [],
121    }
122 
123    for msg in messages:
124        role = msg.get("role", "user")
125        content = msg.get("content", "")
126 
127        if role == "system":
128            categories["system_prompt"].append({**msg, "category": "system_prompt"})
129        elif "tool_use" in msg.get("type", ""):
130            categories["tool_output"].append({**msg, "category": "tool_output"})
131        elif role == "user":
132            categories["conversation"].append({**msg, "category": "conversation"})
133        elif "retrieved" in msg.get("tags", []):
134            categories["retrieved_document"].append({**msg, "category": "retrieved_document"})
135        else:
136            categories["other"].append({**msg, "category": "other"})
137 
138    return categories
139 
140 
141def summarize_content(content: str, category: str, max_length: int = 500) -> str:
142    """
143    Summarize content for compaction, dispatching by category.
144 
145    Use when: compacting context and needing category-aware summarization
146    (tool outputs get metric extraction, conversations get decision
147    extraction, documents get lead-paragraph extraction).
148    """
149    if category == "tool_output":
150        return summarize_tool_output(content, max_length)
151    elif category == "conversation":
152        return summarize_conversation(content, max_length)
153    elif category == "retrieved_document":
154        return summarize_document(content, max_length)
155    else:
156        return summarize_general(content, max_length)
157 
158 
159def summarize_tool_output(content: str, max_length: int = 500) -> str:
160    """
161    Summarize tool output by extracting metrics and key findings.
162 
163    Use when: a tool output has served its immediate purpose and needs
164    to be compacted while preserving actionable data points.
165    """
166    # Look for metrics (numbers with context)
167    metrics = re.findall(r'(\w+):\s*([\d.,]+)', content)
168 
169    # Look for key findings (lines with important keywords)
170    keywords = ["result", "found", "total", "success", "error", "value"]
171    findings = []
172    for line in content.split('\n'):
173        if any(kw in line.lower() for kw in keywords):
174            findings.append(line.strip())
175 
176    summary_parts = []
177    if metrics:
178        summary_parts.append(f"Metrics: {', '.join([f'{k}={v}' for k, v in metrics])}")
179    if findings:
180        summary_parts.append("Key findings: " + "; ".join(findings[:3]))
181 
182    result = " | ".join(summary_parts) if summary_parts else "[Tool output summarized]"
183    return result[:max_length]
184 
185 
186def summarize_conversation(content: str, max_length: int = 500) -> str:
187    """
188    Summarize conversational content by extracting decisions and questions.
189 
190    Use when: older conversation turns need compaction and the key
191    decisions/commitments must survive while filler is removed.
192    """
193    decisions = re.findall(r'(?i)(?:decided|decision|chose|chosen)[:\s]+([^.]+)', content)
194    questions = re.findall(r'(?:\?|question)[:\s]+([^.]+)', content)
195 
196    summary_parts = []
197    if decisions:
198        decision_texts = [d.strip() for d in decisions[:5]]
199        summary_parts.append(f"Decisions: {'; '.join(decision_texts)}")
200    if questions:
201        question_texts = [q.strip() for q in questions[:3]]
202        summary_parts.append(f"Open questions: {'; '.join(question_texts)}")
203 
204    if not summary_parts:
205        # Fallback: extract the first few substantive sentences
206        sentences = [s.strip() for s in content.split('.') if len(s.strip()) > 20]
207        if sentences:
208            summary_parts.append('. '.join(sentences[:3]) + '.')
209 
210    result = " | ".join(summary_parts) if summary_parts else "[Conversation summarized]"
211    return result[:max_length]
212 
213 
214def summarize_document(content: str, max_length: int = 500) -> str:
215    """
216    Summarize document content using lead-paragraph extraction.
217 
218    Use when: a retrieved document has been consumed for reasoning and
219    only a brief reference needs to remain in context.
220    """
221    paragraphs = content.split('\n\n')
222    if paragraphs:
223        first_para = paragraphs[0].strip()
224        sentences = first_para.split('. ')
225        if len(sentences) > 2:
226            first_para = '. '.join(sentences[:2]) + '.'
227        return first_para[:max_length]
228    return "[Document summarized]"
229 
230 
231def summarize_general(content: str, max_length: int = 500) -> str:
232    """
233    General-purpose summarization via truncation.
234 
235    Use when: content does not fit a specific category and a simple
236    truncation with ellipsis is acceptable.
237    """
238    return content[:max_length] + "..." if len(content) > max_length else content
239 
240 
241# ---------------------------------------------------------------------------
242# Observation masking
243# ---------------------------------------------------------------------------
244 
245class ObservationStore:
246    """
247    Store and mask verbose tool outputs with retrievable references.
248 
249    Use when: tool outputs dominate context (>50% of tokens) and older
250    observations have already served their reasoning purpose. Stores the
251    full content externally and replaces it with a compact reference
252    containing a key-point summary.
253 
254    Example::
255 
256        store = ObservationStore(max_size=500)
257        masked, ref_id = store.mask(long_tool_output, max_length=200)
258        # masked: "[Obs:a1b2c3d4 elided. Key: ... Full content retrievable.]"
259        # Later retrieval:
260        original = store.retrieve(ref_id)
261    """
262 
263    def __init__(self, max_size: int = 1000) -> None:
264        self.observations: Dict[str, Dict] = {}
265        self.order: List[str] = []
266        self.max_size = max_size
267 
268    def store(self, content: str, metadata: Optional[Dict] = None) -> str:
269        """Store observation and return reference ID."""
270        ref_id = self._generate_ref_id(content)
271 
272        self.observations[ref_id] = {
273            "content": content,
274            "metadata": metadata or {},
275            "stored_at": time.time(),
276            "last_accessed": time.time(),
277        }
278        self.order.append(ref_id)
279 
280        # Evict oldest if over limit
281        if len(self.order) > self.max_size:
282            oldest = self.order.pop(0)
283            del self.observations[oldest]
284 
285        return ref_id
286 
287    def retrieve(self, ref_id: str) -> Optional[str]:
288        """Retrieve observation by reference ID."""
289        if ref_id in self.observations:
290            self.observations[ref_id]["last_accessed"] = time.time()
291            return self.observations[ref_id]["content"]
292        return None
293 
294    def mask(self, content: str, max_length: int = 200) -> Tuple[str, Optional[str]]:
295        """
296        Mask observation if longer than max_length.
297 
298        Use when: deciding per-observation whether to keep inline or
299        replace with a compact reference. Returns (masked_content, ref_id)
300        where ref_id is None if the content was short enough to keep.
301        """
302        if len(content) <= max_length:
303            return content, None
304 
305        ref_id = self.store(content)
306        key_point = self._extract_key_point(content)
307        masked = f"[Obs:{ref_id} elided. Key: {key_point}. Full content retrievable.]"
308        return masked, ref_id
309 
310    def _generate_ref_id(self, content: str) -> str:
311        """Generate unique reference ID."""
312        hash_input = f"{content[:100]}{time.time()}"
313        return hashlib.md5(hash_input.encode()).hexdigest()[:8]
314 
315    def _extract_key_point(self, content: str) -> str:
316        """Extract key point from observation."""
317        lines = [line for line in content.split('\n') if len(line) > 20]
318        if lines:
319            return lines[0][:50] + "..."
320        sentences = content.split('. ')
321        if sentences:
322            return sentences[0][:50] + "..."
323        return content[:50] + "..."
324 
325 
326# ---------------------------------------------------------------------------
327# Context budget management
328# ---------------------------------------------------------------------------
329 
330class ContextBudget:
331    """
332    Token budget allocation and optimization trigger detection.
333 
334    Use when: building an agent loop that needs to monitor context usage
335    across categories and trigger compaction/masking at the right thresholds
336    rather than waiting until the window overflows.
337 
338    Example::
339 
340        budget = ContextBudget(total_limit=128_000)
341        budget.allocate("system_prompt", 1500)
342        budget.allocate("tool_definitions", 3000)
343        # ... after each agent turn:
344        should_act, reasons = budget.should_optimize(current_usage)
345        if should_act:
346            # apply masking or compaction based on reasons
347            pass
348    """
349 
350    def __init__(self, total_limit: int) -> None:
351        self.total_limit = total_limit
352        self.allocated: Dict[str, int] = {
353            "system_prompt": 0,
354            "tool_definitions": 0,
355            "retrieved_docs": 0,
356            "message_history": 0,
357            "tool_outputs": 0,
358            "other": 0,
359        }
360        self.reserved = 5000  # Reserved buffer
361        self.reservation_limit = total_limit - self.reserved
362 
363    def allocate(self, category: str, amount: int) -> bool:
364        """
365        Allocate budget to category. Returns True on success, False if
366        the allocation would exceed the reservation limit.
367        """
368        if category not in self.allocated:
369            category = "other"
370 
371        current = sum(self.allocated.values())
372        proposed = current + amount
373 
374        if proposed > self.reservation_limit:
375            return False
376 
377        self.allocated[category] += amount
378        return True
379 
380    def remaining(self) -> int:
381        """Get remaining unallocated budget."""
382        current = sum(self.allocated.values())
383        return self.reservation_limit - current
384 
385    def get_usage(self) -> Dict[str, object]:
386        """
387        Get current usage breakdown.
388 
389        Use when: logging or displaying context budget state for
390        monitoring dashboards or debug output.
391        """
392        total = sum(self.allocated.values())
393        return {
394            "total_used": total,
395            "total_limit": self.total_limit,
396            "remaining": self.remaining(),
397            "by_category": dict(self.allocated),
398            "utilization_ratio": total / self.total_limit,
399        }
400 
401    def should_optimize(
402        self, current_usage: int, metrics: Optional[Dict[str, float]] = None
403    ) -> Tuple[bool, List[Tuple[str, object]]]:
404        """
405        Determine if optimization should trigger.
406 
407        Use when: called at the end of each agent loop iteration to
408        decide whether to apply compaction, masking, or both before
409        the next model call.
410 
411        Returns (should_optimize, list_of_reasons).
412        """
413        reasons: List[Tuple[str, object]] = []
414 
415        # Check utilization
416        utilization = current_usage / self.total_limit
417        if utilization > 0.8:
418            reasons.append(("high_utilization", utilization))
419 
420        # Check degradation metrics if provided
421        if metrics:
422            if metrics.get("attention_degradation", 0) > 0.3:
423                reasons.append(("attention_degradation", True))
424            if metrics.get("quality_score", 1.0) < 0.8:
425                reasons.append(("quality_degradation", True))
426 
427        return len(reasons) > 0, reasons
428 
429 
430# ---------------------------------------------------------------------------
431# Cache optimization
432# ---------------------------------------------------------------------------
433 
434def design_stable_prompt(template: str, dynamic_values: Optional[Dict] = None) -> str:
435    """
436    Stabilize a prompt template for maximum KV-cache hit rate.
437 
438    Use when: constructing system prompts or few-shot prefixes that will
439    be reused across many requests. Replaces dynamic content (timestamps,
440    session IDs, counters) with stable placeholders so the prefix hash
441    remains constant.
442    """
443    result = template
444 
445    # Replace timestamps
446    date_pattern = r'\d{4}-\d{2}-\d{2}'
447    result = re.sub(date_pattern, '[DATE_STABLE]', result)
448 
449    # Replace session IDs
450    session_pattern = r'Session \d+'
451    result = re.sub(session_pattern, 'Session [STABLE]', result)
452 
453    # Replace counters
454    counter_pattern = r'\d+/\d+'
455    result = re.sub(counter_pattern, '[COUNTER_STABLE]', result)
456 
457    return result
458 
459 
460def calculate_cache_metrics(
461    requests: List[Dict], cache: Dict[str, Dict]
462) -> Dict[str, object]:
463    """
464    Calculate KV-cache hit metrics for a request sequence.
465 
466    Use when: evaluating whether prompt restructuring improved cache
467    utilization. Feed in the request log and current cache state to
468    get hit/miss rates and actionable recommendations.
469    """
470    hits = 0
471    misses = 0
472 
473    for req in requests:
474        prefix = req.get("prefix_hash", "")
475        token_count = req.get("token_count", 0)
476 
477        if prefix in cache:
478            hits += token_count * cache[prefix].get("hit_ratio", 0)
479        else:
480            misses += token_count
481 
482    total = hits + misses
483 
484    return {
485        "hit_rate": hits / total if total > 0 else 0,
486        "cache_hits": hits,
487        "cache_misses": misses,
488        "recommendations": generate_cache_recommendations(hits, misses),
489    }
490 
491 
492def generate_cache_recommendations(hits: int, misses: int) -> List[str]:
493    """
494    Generate recommendations for cache optimization based on hit/miss ratio.
495 
496    Use when: cache metrics indicate sub-optimal hit rates and concrete
497    next steps are needed.
498    """
499    recommendations: List[str] = []
500 
501    hit_rate = hits / (hits + misses) if (hits + misses) > 0 else 0
502 
503    if hit_rate < 0.5:
504        recommendations.append("Consider stabilizing system prompts")
505        recommendations.append("Reduce variation in request prefixes")
506 
507    if hit_rate < 0.8:
508        recommendations.append("Group similar requests together")
509        recommendations.append("Use consistent formatting across requests")
510 
511    return recommendations
512 
513 
514# ---------------------------------------------------------------------------
515# Demo / smoke test
516# ---------------------------------------------------------------------------
517 
518if __name__ == "__main__":
519    print("=== Context Optimization Utilities — Demo ===\n")
520 
521    # 1. Token estimation
522    sample_text = "The quick brown fox jumps over the lazy dog. " * 20
523    tokens = estimate_token_count(sample_text)
524    print(f"1. Token estimate for {len(sample_text)}-char text: ~{tokens} tokens\n")
525 
526    # 2. Observation masking
527    store = ObservationStore(max_size=100)
528    long_output = (
529        "Result: 42 items found\n"
530        "Total processing time: 3.2s\n"
531        "Details:\n" + "\n".join([f"  Item {i}: value={i*10}" for i in range(20)])
532    )
533    masked, ref_id = store.mask(long_output, max_length=100)
534    print(f"2. Masked observation:\n   {masked}")
535    print(f"   Ref ID: {ref_id}")
536    retrieved = store.retrieve(ref_id)
537    print(f"   Retrievable: {retrieved is not None}\n")
538 
539    # 3. Context budget
540    budget = ContextBudget(total_limit=128_000)
541    budget.allocate("system_prompt", 1500)
542    budget.allocate("tool_definitions", 3000)
543    budget.allocate("message_history", 95_000)
544    usage = budget.get_usage()
545    print(f"3. Budget utilization: {usage['utilization_ratio']:.1%}")
546    should_opt, reasons = budget.should_optimize(
547        current_usage=int(128_000 * 0.85)
548    )
549    print(f"   Should optimize: {should_opt}, reasons: {reasons}\n")
550 
551    # 4. Cache-stable prompt
552    raw_prompt = "Session 42 started on 2025-12-20. Progress: 3/10 tasks."
553    stable = design_stable_prompt(raw_prompt)
554    print(f"4. Original prompt:  {raw_prompt}")
555    print(f"   Stabilized:       {stable}\n")
556 
557    # 5. Summarization
558    tool_out = "count: 150\nstatus: success\nFound 3 errors in module A."
559    summary = summarize_content(tool_out, "tool_output", max_length=200)
560    print(f"5. Tool output summary: {summary}\n")
561 
562    print("=== Demo complete ===")
563
Preparing the source view

Agent Skills for Context Engineering

skills/context-optimization/scripts/compaction.py