Source from repo
Agent Skills for Context Engineering

A comprehensive collection of Agent Skills for context engineering, multi-agent architectures, and production agent systems.
muratcankoylanGitHub muratcankoylanSource repo Original GitHub link
Files
241
Skill
n/a
Size
2.6 MB
Entrypoint
SKILL.md
Format
git-repo
Open file
skills/context-fundamentals/scripts/context_manager.py

Syntax-highlighted preview of this file as included in the skill package.
Rendered Source
code534 linesFree
skills/context-fundamentals/scripts/context_manager.py
1"""
2Context Management Utilities for Agent Systems.
3 
4Public API
5----------
6Functions:
7    estimate_token_count     — Rough token estimate from text (demo only).
8    estimate_message_tokens  — Token estimate for a message list.
9    count_tokens_by_type     — Break down token usage by context component.
10    truncate_context         — Trim a context string to a token budget.
11    truncate_messages        — Trim message history while preserving structure.
12    validate_context_structure — Detect empty, oversized, or duplicate sections.
13    build_agent_context      — Assemble an optimized context dict from parts.
14 
15Classes:
16    ContextBuilder               — Priority-aware context assembly with budgets.
17    ProgressiveDisclosureManager — Lazy file loading with caching.
18 
19Usage
20-----
21Import individual utilities or use `build_agent_context` as the high-level
22entry point:
23 
24    from context_manager import build_agent_context
25    result = build_agent_context(
26        task="Refactor auth module",
27        system_prompt="You are a senior Python engineer.",
28        documents=["# Auth module docs ..."],
29    )
30    print(result["usage_report"])
31 
32Run this module directly (`python context_manager.py`) for an interactive demo
33that builds a sample context and prints the usage report.
34 
35Note: Token estimation in this module uses a character-ratio heuristic. For
36production systems, replace `estimate_token_count` with a real tokenizer
37(tiktoken for OpenAI, Anthropic's token-counting API, etc.).
38"""
39 
40from __future__ import annotations
41 
42import hashlib
43from typing import Any, Dict, List, Optional
44 
45__all__ = [
46    "estimate_token_count",
47    "estimate_message_tokens",
48    "count_tokens_by_type",
49    "truncate_context",
50    "truncate_messages",
51    "validate_context_structure",
52    "build_agent_context",
53    "ContextBuilder",
54    "ProgressiveDisclosureManager",
55]
56 
57 
58# ---------------------------------------------------------------------------
59# Token estimation
60# ---------------------------------------------------------------------------
61 
62def estimate_token_count(text: str) -> int:
63    """Return a rough token estimate for *text*.
64 
65    Uses the ~4 characters-per-token heuristic for English prose.
66 
67    Use when: quick budget checks during development or logging. Do NOT rely
68    on this for hard budget enforcement — code, URLs, and non-English text
69    tokenize at very different ratios (see module docstring).
70 
71    WARNING: Production systems must use a real tokenizer:
72    - OpenAI models  → ``tiktoken``
73    - Anthropic      → Anthropic token-counting API
74    - Others         → provider-specific tokenizer
75    """
76    return len(text) // 4
77 
78 
79def estimate_message_tokens(messages: List[Dict[str, Any]]) -> int:
80    """Estimate total tokens across a list of chat messages.
81 
82    Use when: deciding whether to trigger compaction on message history.
83    Each message adds ~10 tokens of role/formatting overhead on top of
84    its content tokens.
85    """
86    total = 0
87    for msg in messages:
88        content = msg.get("content", "")
89        total += estimate_token_count(content)
90        total += 10  # Overhead for role/formatting
91    return total
92 
93 
94def count_tokens_by_type(context: Dict[str, Any]) -> Dict[str, int]:
95    """Break down token usage by context component type.
96 
97    Use when: profiling where tokens are spent so the highest-cost
98    component can be targeted for compression first.
99 
100    Recognized keys in *context*: ``system``, ``tools`` (list),
101    ``documents`` (list), ``messages`` (list).
102    """
103    breakdown: Dict[str, int] = {
104        "system_prompt": 0,
105        "tool_definitions": 0,
106        "retrieved_documents": 0,
107        "message_history": 0,
108        "tool_outputs": 0,
109        "other": 0,
110    }
111 
112    if "system" in context:
113        breakdown["system_prompt"] = estimate_token_count(context["system"])
114 
115    if "tools" in context:
116        for tool in context["tools"]:
117            breakdown["tool_definitions"] += estimate_token_count(str(tool))
118 
119    if "documents" in context:
120        for doc in context["documents"]:
121            breakdown["retrieved_documents"] += estimate_token_count(doc)
122 
123    if "messages" in context:
124        breakdown["message_history"] = estimate_message_tokens(context["messages"])
125 
126    return breakdown
127 
128 
129# ---------------------------------------------------------------------------
130# Context Builder
131# ---------------------------------------------------------------------------
132 
133class ContextBuilder:
134    """Build context with priority-aware budget management.
135 
136    Use when: assembling context from multiple sources (system prompt,
137    retrieved documents, task description) and enforcing a hard token
138    ceiling. Higher-priority sections are kept first when the budget is
139    tight.
140 
141    Example::
142 
143        builder = ContextBuilder(context_limit=80_000)
144        builder.add_section("system", prompt, priority=10)
145        builder.add_section("task", task_text, priority=9)
146        built = builder.build()
147    """
148 
149    def __init__(self, context_limit: int = 100_000) -> None:
150        self.context_limit: int = context_limit
151        self.sections: Dict[str, Dict[str, Any]] = {}
152        self.order: List[str] = []
153 
154    def add_section(
155        self,
156        name: str,
157        content: str,
158        priority: int = 0,
159        category: str = "other",
160    ) -> None:
161        """Add or replace a named section.
162 
163        Higher *priority* values are kept first when the budget is tight.
164        """
165        if name not in self.sections:
166            self.order.append(name)
167 
168        self.sections[name] = {
169            "content": content,
170            "priority": priority,
171            "category": category,
172            "tokens": estimate_token_count(content),
173        }
174 
175    def build(self, max_tokens: Optional[int] = None) -> str:
176        """Assemble context string within the token budget.
177 
178        Sections are included in descending priority order until the
179        budget is exhausted. Returns the concatenated text of all
180        included sections.
181        """
182        limit = max_tokens or self.context_limit
183 
184        sorted_sections = sorted(
185            self.order,
186            key=lambda n: self.sections[n]["priority"],
187            reverse=True,
188        )
189 
190        context_parts: List[str] = []
191        current_tokens = 0
192 
193        for name in sorted_sections:
194            section = self.sections[name]
195            section_tokens = section["tokens"]
196 
197            if current_tokens + section_tokens <= limit:
198                context_parts.append(section["content"])
199                current_tokens += section_tokens
200 
201        return "\n\n".join(context_parts)
202 
203    def get_usage_report(self) -> Dict[str, Any]:
204        """Return a summary of current context utilization.
205 
206        Use when: logging context composition during development or
207        deciding whether to trigger compaction.
208        """
209        total = sum(s["tokens"] for s in self.sections.values())
210        return {
211            "total_tokens": total,
212            "limit": self.context_limit,
213            "utilization": total / self.context_limit if self.context_limit else 0,
214            "by_section": {
215                name: s["tokens"] for name, s in self.sections.items()
216            },
217            "status": self._get_status(total),
218        }
219 
220    def _get_status(self, total: int) -> str:
221        """Return 'critical', 'warning', or 'healthy' based on utilization."""
222        ratio = total / self.context_limit if self.context_limit else 0
223        if ratio > 0.9:
224            return "critical"
225        elif ratio > 0.7:
226            return "warning"
227        else:
228            return "healthy"
229 
230 
231# ---------------------------------------------------------------------------
232# Context Truncation
233# ---------------------------------------------------------------------------
234 
235def truncate_context(
236    context: str,
237    max_tokens: int,
238    preserve_start: bool = True,
239) -> str:
240    """Truncate *context* to approximately *max_tokens*.
241 
242    Use when: a single large text block must fit a hard budget and
243    semantic chunking is not available.
244 
245    Set *preserve_start* to ``True`` (default) to keep the beginning
246    (system prompts, top-of-file content) or ``False`` to keep the end
247    (most recent information).
248    """
249    tokens = context.split()
250    if len(tokens) <= max_tokens:
251        return context
252 
253    if preserve_start:
254        kept = tokens[:max_tokens]
255    else:
256        kept = tokens[-max_tokens:]
257 
258    return " ".join(kept)
259 
260 
261def truncate_messages(
262    messages: List[Dict[str, Any]],
263    max_tokens: int,
264) -> List[Dict[str, Any]]:
265    """Truncate message history while preserving structural integrity.
266 
267    Use when: message history exceeds budget and compaction has not yet
268    been implemented. Keeps: (1) the system prompt, (2) any existing
269    summary message, and (3) the most recent messages that fit.
270 
271    Strategy:
272    1. Always keep the system prompt.
273    2. Keep any existing summary message.
274    3. Fill remaining budget with the most recent messages.
275    """
276    system_prompt: Optional[Dict[str, Any]] = None
277    recent_messages: List[Dict[str, Any]] = []
278    summary: Optional[Dict[str, Any]] = None
279 
280    for msg in messages:
281        if msg.get("role") == "system":
282            system_prompt = msg
283        elif msg.get("is_summary"):
284            summary = msg
285        else:
286            recent_messages.append(msg)
287 
288    tokens_for_system = (
289        estimate_token_count(system_prompt["content"]) if system_prompt else 0
290    )
291    tokens_for_summary = (
292        estimate_token_count(summary["content"]) if summary else 0
293    )
294    available = max_tokens - tokens_for_system - tokens_for_summary
295 
296    tokens_for_recent = estimate_message_tokens(recent_messages)
297    if tokens_for_recent > available:
298        truncated_recent: List[Dict[str, Any]] = []
299        current_tokens = 0
300        for msg in reversed(recent_messages):
301            msg_tokens = estimate_token_count(msg.get("content", ""))
302            if current_tokens + msg_tokens <= available:
303                truncated_recent.insert(0, msg)
304                current_tokens += msg_tokens
305        recent_messages = truncated_recent
306 
307    result: List[Dict[str, Any]] = []
308    if system_prompt:
309        result.append(system_prompt)
310    if summary:
311        result.append(summary)
312    result.extend(recent_messages)
313    return result
314 
315 
316# ---------------------------------------------------------------------------
317# Context Validation
318# ---------------------------------------------------------------------------
319 
320def validate_context_structure(context: Dict[str, Any]) -> Dict[str, Any]:
321    """Validate a context dict for common structural issues.
322 
323    Use when: testing context assembly before sending to the model.
324    Checks for empty sections, excessive length, missing recommended
325    sections, and potential duplicate content.
326 
327    Returns a dict with ``valid`` (bool), ``issues`` (list), and
328    ``recommendations`` (list).
329    """
330    issues: List[str] = []
331    recommendations: List[str] = []
332 
333    # Check for empty sections (skip list-type values like documents
334    # which are legitimately empty when no documents are retrieved)
335    for section, content in context.items():
336        if content is None or (isinstance(content, str) and not content):
337            issues.append(f"Empty {section} section")
338            recommendations.append(f"Remove or populate {section}")
339 
340    # Check for excessive length
341    total_tokens = sum(estimate_token_count(str(c)) for c in context.values())
342    if total_tokens > 80_000:
343        issues.append(
344            f"Context length ({total_tokens} tokens) exceeds recommended limit"
345        )
346        recommendations.append("Consider context compaction or partitioning")
347 
348    # Check for missing sections
349    recommended_sections = ["system", "task"]
350    for section in recommended_sections:
351        if section not in context:
352            issues.append(f"Missing recommended section: {section}")
353            recommendations.append(
354                f"Add {section} section with relevant information"
355            )
356 
357    # Check for duplicate content (first 1000 chars, hashed for consistency)
358    seen_content: set[str] = set()
359    for section, content in context.items():
360        content_str = str(content)[:1000]
361        content_hash = hashlib.md5(content_str.encode()).hexdigest()
362        if content_hash in seen_content:
363            issues.append(f"Potential duplicate content in {section}")
364        seen_content.add(content_hash)
365 
366    return {
367        "valid": len(issues) == 0,
368        "issues": issues,
369        "recommendations": recommendations,
370    }
371 
372 
373# ---------------------------------------------------------------------------
374# Progressive Disclosure
375# ---------------------------------------------------------------------------
376 
377class ProgressiveDisclosureManager:
378    """Lazy loader for progressive disclosure of file-based context.
379 
380    Use when: an agent has access to many reference files but should
381    only pay the token cost for files that the current task actually
382    needs. Summaries are loaded first; detail files are loaded on demand
383    and cached for the session.
384 
385    Example::
386 
387        pdm = ProgressiveDisclosureManager(base_dir="docs")
388        overview = pdm.load_summary("docs/api_summary.md")
389        # ... later, when detail is needed ...
390        detail = pdm.load_detail("docs/api/endpoints.md")
391    """
392 
393    def __init__(self, base_dir: str = ".") -> None:
394        self.base_dir: str = base_dir
395        self.loaded_files: Dict[str, str] = {}
396 
397    def load_summary(self, summary_path: str) -> str:
398        """Load a summary file, returning cached content if available."""
399        if summary_path in self.loaded_files:
400            return self.loaded_files[summary_path]
401        try:
402            with open(summary_path, "r") as f:
403                content = f.read()
404            self.loaded_files[summary_path] = content
405            return content
406        except FileNotFoundError:
407            return ""
408 
409    def load_detail(self, detail_path: str, force: bool = False) -> str:
410        """Load a detail file on demand.
411 
412        Set *force* to ``True`` to bypass the cache and re-read the file
413        (useful when the underlying file may have changed).
414        """
415        if not force and detail_path in self.loaded_files:
416            return self.loaded_files[detail_path]
417        try:
418            with open(detail_path, "r") as f:
419                content = f.read()
420            self.loaded_files[detail_path] = content
421            return content
422        except FileNotFoundError:
423            return ""
424 
425    def get_contextual_info(self, reference: Dict[str, Any]) -> str:
426        """Return summary or detail based on the reference's flags.
427 
428        Use when: a reference dict carries both ``summary_path`` and
429        ``detail_path`` and the caller sets ``need_detail=True`` only
430        when full content is required.
431        """
432        summary_path = reference.get("summary_path")
433        detail_path = reference.get("detail_path")
434        need_detail = reference.get("need_detail", False)
435 
436        if need_detail and detail_path:
437            return self.load_detail(detail_path)
438        elif summary_path:
439            return self.load_summary(summary_path)
440        else:
441            return ""
442 
443 
444# ---------------------------------------------------------------------------
445# High-level entry point
446# ---------------------------------------------------------------------------
447 
448def build_agent_context(
449    task: str,
450    system_prompt: str,
451    documents: Optional[List[str]] = None,
452    context_limit: int = 80_000,
453) -> Dict[str, Any]:
454    """Build an optimized, validated context dict for an agent task.
455 
456    Use when: assembling context for a single inference call. Combines
457    system prompt, task description, and optional retrieved documents
458    into a priority-ordered context string, then validates the result.
459 
460    Returns a dict with keys ``context`` (str), ``usage_report`` (dict),
461    and ``validation`` (dict).
462    """
463    builder = ContextBuilder(context_limit=context_limit)
464 
465    # System prompt — highest priority, persists across turns
466    builder.add_section("system", system_prompt, priority=10, category="system")
467 
468    # Task description — second priority
469    builder.add_section("task", task, priority=9, category="task")
470 
471    # Retrieved documents — loaded just-in-time
472    if documents:
473        for i, doc in enumerate(documents):
474            builder.add_section(
475                f"document_{i}",
476                doc,
477                priority=5,
478                category="retrieved",
479            )
480 
481    context_dict: Dict[str, Any] = {
482        "system": system_prompt,
483        "task": task,
484        "documents": documents or [],
485    }
486 
487    validation = validate_context_structure(context_dict)
488 
489    return {
490        "context": builder.build(),
491        "usage_report": builder.get_usage_report(),
492        "validation": validation,
493    }
494 
495 
496# ---------------------------------------------------------------------------
497# Demo
498# ---------------------------------------------------------------------------
499 
500if __name__ == "__main__":
501    print("=== Context Manager Demo ===\n")
502 
503    sample_prompt = (
504        "You are a senior Python engineer. Follow PEP 8, use type hints, "
505        "and write docstrings for all public functions."
506    )
507    sample_task = "Refactor the authentication module to use OAuth 2.0."
508    sample_docs = [
509        "# OAuth 2.0 Reference\nThe OAuth 2.0 authorization framework...",
510        "# Current Auth Module\ndef login(user, password): ...",
511    ]
512 
513    result = build_agent_context(
514        task=sample_task,
515        system_prompt=sample_prompt,
516        documents=sample_docs,
517    )
518 
519    report = result["usage_report"]
520    print(f"Total tokens : {report['total_tokens']}")
521    print(f"Utilization  : {report['utilization']:.1%}")
522    print(f"Status       : {report['status']}")
523    print(f"\nBreakdown by section:")
524    for section, tokens in report["by_section"].items():
525        print(f"  {section:20s} : {tokens:,} tokens")
526 
527    validation = result["validation"]
528    if validation["valid"]:
529        print("\nValidation   : PASSED")
530    else:
531        print(f"\nValidation   : FAILED")
532        for issue in validation["issues"]:
533            print(f"  - {issue}")
534
Preparing the source view

Agent Skills for Context Engineering

skills/context-fundamentals/scripts/context_manager.py