Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
skills/context-optimization/scripts/compaction.py
1"""2Context Optimization Utilities — compaction, masking, budgeting, and cache optimization.34Public API5----------6Functions:7estimate_token_count(text) -> int8estimate_message_tokens(messages) -> int9categorize_messages(messages) -> dict10summarize_content(content, category, max_length) -> str11design_stable_prompt(template, dynamic_values) -> str12calculate_cache_metrics(requests, cache) -> dict1314Classes:15ObservationStore — Store and mask verbose tool outputs with retrievable references.16ContextBudget — Token budget allocation and optimization trigger detection.1718PRODUCTION NOTES:19- Token estimation uses simplified heuristics (~4 chars/token for English).20Production systems should use model-specific tokenizers:21- OpenAI: tiktoken library22- Anthropic: anthropic tokenizer23- Local models: HuggingFace tokenizers2425- Summarization functions use simple heuristics for demonstration.26Production systems should use:27- LLM-based summarization for high-quality compression28- Domain-specific summarization models29- Schema-based summarization for structured outputs3031- Cache metrics are illustrative. Production systems should integrate32with actual inference infrastructure metrics.33"""3435from typing import List, Dict, Optional, Tuple36import hashlib37import re38import time3940__all__ = [41"estimate_token_count",42"estimate_message_tokens",43"categorize_messages",44"summarize_content",45"summarize_tool_output",46"summarize_conversation",47"summarize_document",48"summarize_general",49"ObservationStore",50"ContextBudget",51"design_stable_prompt",52"calculate_cache_metrics",53"generate_cache_recommendations",54]555657# ---------------------------------------------------------------------------58# Token estimation59# ---------------------------------------------------------------------------6061def estimate_token_count(text: str) -> int:62"""63Estimate token count for text.6465Use when: a quick token budget check is needed and a model-specific66tokenizer is unavailable or too slow for the hot path.6768Uses approximation: ~4 characters per token for English.6970WARNING: This is a rough estimate. Actual tokenization varies by:71- Model (GPT-5.2, Claude 4.5, Gemini 3 have different tokenizers)72- Content type (code typically has higher token density)73- Language (non-English may have 2-3x higher token/char ratio)7475Production usage::7677import tiktoken78enc = tiktoken.encoding_for_model("gpt-4")79token_count = len(enc.encode(text))80"""81return len(text) // 4828384def estimate_message_tokens(messages: List[Dict[str, str]]) -> int:85"""86Estimate token count for a message list.8788Use when: checking whether the current conversation is approaching89the context budget threshold before deciding to compact or mask.90"""91total = 092for msg in messages:93content = msg.get("content", "")94total += estimate_token_count(content)95# Add overhead for role/formatting96total += 1097return total9899100# ---------------------------------------------------------------------------101# Compaction functions102# ---------------------------------------------------------------------------103104def categorize_messages(messages: List[Dict]) -> Dict[str, List[Dict]]:105"""106Categorize messages for selective compaction.107108Use when: preparing to compact context and needing to apply different109summarization strategies per category (tool outputs first, then old110conversation turns, then retrieved documents — never the system prompt).111112Returns a dict mapping category name to list of messages.113"""114categories: Dict[str, List[Dict]] = {115"system_prompt": [],116"tool_definition": [],117"tool_output": [],118"conversation": [],119"retrieved_document": [],120"other": [],121}122123for msg in messages:124role = msg.get("role", "user")125content = msg.get("content", "")126127if role == "system":128categories["system_prompt"].append({**msg, "category": "system_prompt"})129elif "tool_use" in msg.get("type", ""):130categories["tool_output"].append({**msg, "category": "tool_output"})131elif role == "user":132categories["conversation"].append({**msg, "category": "conversation"})133elif "retrieved" in msg.get("tags", []):134categories["retrieved_document"].append({**msg, "category": "retrieved_document"})135else:136categories["other"].append({**msg, "category": "other"})137138return categories139140141def summarize_content(content: str, category: str, max_length: int = 500) -> str:142"""143Summarize content for compaction, dispatching by category.144145Use when: compacting context and needing category-aware summarization146(tool outputs get metric extraction, conversations get decision147extraction, documents get lead-paragraph extraction).148"""149if category == "tool_output":150return summarize_tool_output(content, max_length)151elif category == "conversation":152return summarize_conversation(content, max_length)153elif category == "retrieved_document":154return summarize_document(content, max_length)155else:156return summarize_general(content, max_length)157158159def summarize_tool_output(content: str, max_length: int = 500) -> str:160"""161Summarize tool output by extracting metrics and key findings.162163Use when: a tool output has served its immediate purpose and needs164to be compacted while preserving actionable data points.165"""166# Look for metrics (numbers with context)167metrics = re.findall(r'(\w+):\s*([\d.,]+)', content)168169# Look for key findings (lines with important keywords)170keywords = ["result", "found", "total", "success", "error", "value"]171findings = []172for line in content.split('\n'):173if any(kw in line.lower() for kw in keywords):174findings.append(line.strip())175176summary_parts = []177if metrics:178summary_parts.append(f"Metrics: {', '.join([f'{k}={v}' for k, v in metrics])}")179if findings:180summary_parts.append("Key findings: " + "; ".join(findings[:3]))181182result = " | ".join(summary_parts) if summary_parts else "[Tool output summarized]"183return result[:max_length]184185186def summarize_conversation(content: str, max_length: int = 500) -> str:187"""188Summarize conversational content by extracting decisions and questions.189190Use when: older conversation turns need compaction and the key191decisions/commitments must survive while filler is removed.192"""193decisions = re.findall(r'(?i)(?:decided|decision|chose|chosen)[:\s]+([^.]+)', content)194questions = re.findall(r'(?:\?|question)[:\s]+([^.]+)', content)195196summary_parts = []197if decisions:198decision_texts = [d.strip() for d in decisions[:5]]199summary_parts.append(f"Decisions: {'; '.join(decision_texts)}")200if questions:201question_texts = [q.strip() for q in questions[:3]]202summary_parts.append(f"Open questions: {'; '.join(question_texts)}")203204if not summary_parts:205# Fallback: extract the first few substantive sentences206sentences = [s.strip() for s in content.split('.') if len(s.strip()) > 20]207if sentences:208summary_parts.append('. '.join(sentences[:3]) + '.')209210result = " | ".join(summary_parts) if summary_parts else "[Conversation summarized]"211return result[:max_length]212213214def summarize_document(content: str, max_length: int = 500) -> str:215"""216Summarize document content using lead-paragraph extraction.217218Use when: a retrieved document has been consumed for reasoning and219only a brief reference needs to remain in context.220"""221paragraphs = content.split('\n\n')222if paragraphs:223first_para = paragraphs[0].strip()224sentences = first_para.split('. ')225if len(sentences) > 2:226first_para = '. '.join(sentences[:2]) + '.'227return first_para[:max_length]228return "[Document summarized]"229230231def summarize_general(content: str, max_length: int = 500) -> str:232"""233General-purpose summarization via truncation.234235Use when: content does not fit a specific category and a simple236truncation with ellipsis is acceptable.237"""238return content[:max_length] + "..." if len(content) > max_length else content239240241# ---------------------------------------------------------------------------242# Observation masking243# ---------------------------------------------------------------------------244245class ObservationStore:246"""247Store and mask verbose tool outputs with retrievable references.248249Use when: tool outputs dominate context (>50% of tokens) and older250observations have already served their reasoning purpose. Stores the251full content externally and replaces it with a compact reference252containing a key-point summary.253254Example::255256store = ObservationStore(max_size=500)257masked, ref_id = store.mask(long_tool_output, max_length=200)258# masked: "[Obs:a1b2c3d4 elided. Key: ... Full content retrievable.]"259# Later retrieval:260original = store.retrieve(ref_id)261"""262263def __init__(self, max_size: int = 1000) -> None:264self.observations: Dict[str, Dict] = {}265self.order: List[str] = []266self.max_size = max_size267268def store(self, content: str, metadata: Optional[Dict] = None) -> str:269"""Store observation and return reference ID."""270ref_id = self._generate_ref_id(content)271272self.observations[ref_id] = {273"content": content,274"metadata": metadata or {},275"stored_at": time.time(),276"last_accessed": time.time(),277}278self.order.append(ref_id)279280# Evict oldest if over limit281if len(self.order) > self.max_size:282oldest = self.order.pop(0)283del self.observations[oldest]284285return ref_id286287def retrieve(self, ref_id: str) -> Optional[str]:288"""Retrieve observation by reference ID."""289if ref_id in self.observations:290self.observations[ref_id]["last_accessed"] = time.time()291return self.observations[ref_id]["content"]292return None293294def mask(self, content: str, max_length: int = 200) -> Tuple[str, Optional[str]]:295"""296Mask observation if longer than max_length.297298Use when: deciding per-observation whether to keep inline or299replace with a compact reference. Returns (masked_content, ref_id)300where ref_id is None if the content was short enough to keep.301"""302if len(content) <= max_length:303return content, None304305ref_id = self.store(content)306key_point = self._extract_key_point(content)307masked = f"[Obs:{ref_id} elided. Key: {key_point}. Full content retrievable.]"308return masked, ref_id309310def _generate_ref_id(self, content: str) -> str:311"""Generate unique reference ID."""312hash_input = f"{content[:100]}{time.time()}"313return hashlib.md5(hash_input.encode()).hexdigest()[:8]314315def _extract_key_point(self, content: str) -> str:316"""Extract key point from observation."""317lines = [line for line in content.split('\n') if len(line) > 20]318if lines:319return lines[0][:50] + "..."320sentences = content.split('. ')321if sentences:322return sentences[0][:50] + "..."323return content[:50] + "..."324325326# ---------------------------------------------------------------------------327# Context budget management328# ---------------------------------------------------------------------------329330class ContextBudget:331"""332Token budget allocation and optimization trigger detection.333334Use when: building an agent loop that needs to monitor context usage335across categories and trigger compaction/masking at the right thresholds336rather than waiting until the window overflows.337338Example::339340budget = ContextBudget(total_limit=128_000)341budget.allocate("system_prompt", 1500)342budget.allocate("tool_definitions", 3000)343# ... after each agent turn:344should_act, reasons = budget.should_optimize(current_usage)345if should_act:346# apply masking or compaction based on reasons347pass348"""349350def __init__(self, total_limit: int) -> None:351self.total_limit = total_limit352self.allocated: Dict[str, int] = {353"system_prompt": 0,354"tool_definitions": 0,355"retrieved_docs": 0,356"message_history": 0,357"tool_outputs": 0,358"other": 0,359}360self.reserved = 5000 # Reserved buffer361self.reservation_limit = total_limit - self.reserved362363def allocate(self, category: str, amount: int) -> bool:364"""365Allocate budget to category. Returns True on success, False if366the allocation would exceed the reservation limit.367"""368if category not in self.allocated:369category = "other"370371current = sum(self.allocated.values())372proposed = current + amount373374if proposed > self.reservation_limit:375return False376377self.allocated[category] += amount378return True379380def remaining(self) -> int:381"""Get remaining unallocated budget."""382current = sum(self.allocated.values())383return self.reservation_limit - current384385def get_usage(self) -> Dict[str, object]:386"""387Get current usage breakdown.388389Use when: logging or displaying context budget state for390monitoring dashboards or debug output.391"""392total = sum(self.allocated.values())393return {394"total_used": total,395"total_limit": self.total_limit,396"remaining": self.remaining(),397"by_category": dict(self.allocated),398"utilization_ratio": total / self.total_limit,399}400401def should_optimize(402self, current_usage: int, metrics: Optional[Dict[str, float]] = None403) -> Tuple[bool, List[Tuple[str, object]]]:404"""405Determine if optimization should trigger.406407Use when: called at the end of each agent loop iteration to408decide whether to apply compaction, masking, or both before409the next model call.410411Returns (should_optimize, list_of_reasons).412"""413reasons: List[Tuple[str, object]] = []414415# Check utilization416utilization = current_usage / self.total_limit417if utilization > 0.8:418reasons.append(("high_utilization", utilization))419420# Check degradation metrics if provided421if metrics:422if metrics.get("attention_degradation", 0) > 0.3:423reasons.append(("attention_degradation", True))424if metrics.get("quality_score", 1.0) < 0.8:425reasons.append(("quality_degradation", True))426427return len(reasons) > 0, reasons428429430# ---------------------------------------------------------------------------431# Cache optimization432# ---------------------------------------------------------------------------433434def design_stable_prompt(template: str, dynamic_values: Optional[Dict] = None) -> str:435"""436Stabilize a prompt template for maximum KV-cache hit rate.437438Use when: constructing system prompts or few-shot prefixes that will439be reused across many requests. Replaces dynamic content (timestamps,440session IDs, counters) with stable placeholders so the prefix hash441remains constant.442"""443result = template444445# Replace timestamps446date_pattern = r'\d{4}-\d{2}-\d{2}'447result = re.sub(date_pattern, '[DATE_STABLE]', result)448449# Replace session IDs450session_pattern = r'Session \d+'451result = re.sub(session_pattern, 'Session [STABLE]', result)452453# Replace counters454counter_pattern = r'\d+/\d+'455result = re.sub(counter_pattern, '[COUNTER_STABLE]', result)456457return result458459460def calculate_cache_metrics(461requests: List[Dict], cache: Dict[str, Dict]462) -> Dict[str, object]:463"""464Calculate KV-cache hit metrics for a request sequence.465466Use when: evaluating whether prompt restructuring improved cache467utilization. Feed in the request log and current cache state to468get hit/miss rates and actionable recommendations.469"""470hits = 0471misses = 0472473for req in requests:474prefix = req.get("prefix_hash", "")475token_count = req.get("token_count", 0)476477if prefix in cache:478hits += token_count * cache[prefix].get("hit_ratio", 0)479else:480misses += token_count481482total = hits + misses483484return {485"hit_rate": hits / total if total > 0 else 0,486"cache_hits": hits,487"cache_misses": misses,488"recommendations": generate_cache_recommendations(hits, misses),489}490491492def generate_cache_recommendations(hits: int, misses: int) -> List[str]:493"""494Generate recommendations for cache optimization based on hit/miss ratio.495496Use when: cache metrics indicate sub-optimal hit rates and concrete497next steps are needed.498"""499recommendations: List[str] = []500501hit_rate = hits / (hits + misses) if (hits + misses) > 0 else 0502503if hit_rate < 0.5:504recommendations.append("Consider stabilizing system prompts")505recommendations.append("Reduce variation in request prefixes")506507if hit_rate < 0.8:508recommendations.append("Group similar requests together")509recommendations.append("Use consistent formatting across requests")510511return recommendations512513514# ---------------------------------------------------------------------------515# Demo / smoke test516# ---------------------------------------------------------------------------517518if __name__ == "__main__":519print("=== Context Optimization Utilities — Demo ===\n")520521# 1. Token estimation522sample_text = "The quick brown fox jumps over the lazy dog. " * 20523tokens = estimate_token_count(sample_text)524print(f"1. Token estimate for {len(sample_text)}-char text: ~{tokens} tokens\n")525526# 2. Observation masking527store = ObservationStore(max_size=100)528long_output = (529"Result: 42 items found\n"530"Total processing time: 3.2s\n"531"Details:\n" + "\n".join([f" Item {i}: value={i*10}" for i in range(20)])532)533masked, ref_id = store.mask(long_output, max_length=100)534print(f"2. Masked observation:\n {masked}")535print(f" Ref ID: {ref_id}")536retrieved = store.retrieve(ref_id)537print(f" Retrievable: {retrieved is not None}\n")538539# 3. Context budget540budget = ContextBudget(total_limit=128_000)541budget.allocate("system_prompt", 1500)542budget.allocate("tool_definitions", 3000)543budget.allocate("message_history", 95_000)544usage = budget.get_usage()545print(f"3. Budget utilization: {usage['utilization_ratio']:.1%}")546should_opt, reasons = budget.should_optimize(547current_usage=int(128_000 * 0.85)548)549print(f" Should optimize: {should_opt}, reasons: {reasons}\n")550551# 4. Cache-stable prompt552raw_prompt = "Session 42 started on 2025-12-20. Progress: 3/10 tasks."553stable = design_stable_prompt(raw_prompt)554print(f"4. Original prompt: {raw_prompt}")555print(f" Stabilized: {stable}\n")556557# 5. Summarization558tool_out = "count: 150\nstatus: success\nFound 3 errors in module A."559summary = summarize_content(tool_out, "tool_output", max_length=200)560print(f"5. Tool output summary: {summary}\n")561562print("=== Demo complete ===")563