Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from bundle
Telegram MTProto MCP server with userbot watcher, chat/DM parser and context builders
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
context_builder.py
1"""2context_builder.py — строит саммари по чатам и людям из messages.jsonl3БЕЗ LLM — чистая статистика и топ-фразы.45Запускать периодически: python3 context_builder.py6"""78import json9import os10from pathlib import Path11from collections import defaultdict, Counter12from datetime import datetime, timezone1314CONTEXT_DIR = Path(__file__).parent / "context"15MESSAGES_FILE = CONTEXT_DIR / "messages.jsonl"16CHATS_DIR = CONTEXT_DIR / "chats"17PEOPLE_DIR = CONTEXT_DIR / "people"1819CHATS_DIR.mkdir(parents=True, exist_ok=True)20PEOPLE_DIR.mkdir(parents=True, exist_ok=True)2122OWNER_ID = int(os.getenv("TELEGRAM_OWNER_ID", "0"))23MAX_SAMPLE = 20 # последних сообщений в саммари242526def load_messages() -> list:27if not MESSAGES_FILE.exists():28return []29msgs = []30with open(MESSAGES_FILE, encoding="utf-8") as f:31for line in f:32try:33msgs.append(json.loads(line))34except Exception:35pass36return msgs373839def top_words(texts: list, n=10) -> list:40words = []41for t in texts:42for w in t.lower().split():43w = w.strip(".,!?;:\"'()[]{}—-")44if len(w) > 3 and w not in {"клав", "это", "что", "как", "для", "все", "там", "так", "тут", "нет", "ну"}:45words.append(w)46return [w for w, _ in Counter(words).most_common(n)]474849def build_chat_md(chat_id, title, chat_type, msgs) -> str:50updated = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")51senders = Counter(m.get("sender_name", "?") for m in msgs)52top_senders = senders.most_common(5)53texts = [m.get("text", "") for m in msgs if m.get("text")]54keywords = top_words(texts)55last = msgs[-MAX_SAMPLE:]5657lines = [58f"# Чат: {title}",59f"**ID:** {chat_id} | **Тип:** {chat_type} | **Сообщений в базе:** {len(msgs)}",60f"**Обновлено:** {updated}",61"",62"## Активные участники",63]64for name, count in top_senders:65lines.append(f"- {name}: {count} сообщ.")6667lines += ["", "## Ключевые слова", ", ".join(keywords) or "—", "", "## Последние сообщения"]68for m in last:69d = "→" if m.get("direction") == "out" else "←"70name = m.get("sender_name", "?")71text = (m.get("text") or "[медиа]")[:100]72ts = m.get("ts", "")[:16].replace("T", " ")73lines.append(f"[{ts}] {d} {name}: {text}")7475return "\n".join(lines)767778def build_person_md(user_id, name, msgs_all) -> str:79updated = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")80chats_seen = Counter(m.get("chat_title", "?") for m in msgs_all)81texts = [m.get("text", "") for m in msgs_all if m.get("text")]82keywords = top_words(texts)83last = msgs_all[-MAX_SAMPLE:]8485lines = [86f"# {name} (user_id: {user_id})",87f"**Сообщений:** {len(msgs_all)} | **Обновлено:** {updated}",88"",89"## Чаты где встречается",90]91for chat, count in chats_seen.most_common():92lines.append(f"- {chat}: {count} сообщ.")9394lines += ["", "## Ключевые слова", ", ".join(keywords) or "—", "", "## Последние сообщения"]95for m in last:96chat = m.get("chat_title", "?")97text = (m.get("text") or "[медиа]")[:100]98ts = m.get("ts", "")[:16].replace("T", " ")99reply = f" ↩ \"{m['reply_to_text'][:40]}\"" if m.get("reply_to_text") else ""100lines.append(f"[{ts}][{chat}]: {text}{reply}")101102return "\n".join(lines)103104105def main():106print("[context_builder] Загружаю сообщения...")107msgs = load_messages()108if not msgs:109print("[context_builder] Нет сообщений.")110return111print(f"[context_builder] Загружено {len(msgs)} записей")112113# Группируем по чатам114chats: dict = defaultdict(lambda: {"title": "", "type": "", "msgs": []})115for m in msgs:116cid = m.get("chat_id")117chats[cid]["title"] = m.get("chat_title", str(cid))118chats[cid]["type"] = m.get("chat_type", "")119chats[cid]["msgs"].append(m)120121# Группируем по людям (не owner)122people: dict = defaultdict(lambda: {"name": "", "msgs": []})123for m in msgs:124sid = m.get("sender_id")125if not sid or sid == OWNER_ID:126continue127people[sid]["name"] = m.get("sender_name", str(sid))128people[sid]["msgs"].append(m)129130# Саммари по чатам131for cid, info in chats.items():132if len(info["msgs"]) < 3:133continue134print(f"[context_builder] Чат: {info['title']}")135md = build_chat_md(cid, info["title"], info["type"], info["msgs"])136(CHATS_DIR / f"{cid}.md").write_text(md, encoding="utf-8")137138# Саммари по людям139for uid, info in people.items():140if len(info["msgs"]) < 2:141continue142print(f"[context_builder] Человек: {info['name']}")143md = build_person_md(uid, info["name"], info["msgs"])144(PEOPLE_DIR / f"{uid}.md").write_text(md, encoding="utf-8")145146# Общий summary.md147with open(CONTEXT_DIR / "summary.md", "w", encoding="utf-8") as f:148f.write(f"# Контекст Март 7\n")149f.write(f"*Обновлено: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*\n\n")150f.write(f"**Всего записей:** {len(msgs)}\n\n")151f.write("## Чаты\n")152for cid, info in chats.items():153if len(info["msgs"]) < 3:154continue155f.write(f"- [{info['title']}](chats/{cid}.md) — {len(info['msgs'])} сообщ.\n")156f.write("\n## Люди\n")157for uid, info in people.items():158if len(info["msgs"]) < 2:159continue160f.write(f"- [{info['name']}](people/{uid}.md) — {len(info['msgs'])} сообщ.\n")161162print("[context_builder] Готово!")163164165if __name__ == "__main__":166main()167