Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Reviews, improves, and writes SwiftUI code following state management, view composition, performance, and iOS 26+ Liquid Glass best practices.
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/instruments_parser/xml_utils.py
1"""Streaming XML helpers for xctrace export output.23Instruments XML deduplicates repeated values with `id`/`ref` attributes that4can span the whole document, so we stream rows with iterparse while keeping5a global id cache for later ref lookups.6"""7from __future__ import annotations89import xml.etree.ElementTree as ET10from collections.abc import Iterator11from dataclasses import dataclass121314@dataclass(frozen=True)15class Column:16mnemonic: str # e.g. "time", "weight", "stack"17engineering_type: str # e.g. "sample-time", "weight", "tagged-backtrace"181920class RowStream:21"""Iterate <row> elements of a single <table> schema export.2223Yields `dict[str, Element]` keyed by column mnemonic. Elements inside a24yielded row are live ET elements (rooted in the id cache where applicable25so ref resolution via `resolve()` remains valid after the row is yielded).26"""2728def __init__(self, xml_bytes: bytes):29self._xml = xml_bytes30self.columns: list[Column] = []31self._id_cache: dict[str, ET.Element] = {}3233def resolve(self, element: ET.Element) -> ET.Element:34"""If the element is a ref, return the referenced element; else self."""35ref = element.get("ref")36if ref is None:37return element38target = self._id_cache.get(ref)39if target is None:40return element # unresolved; return the ref element itself41return target4243def __iter__(self) -> Iterator[dict[str, ET.Element]]:44# iterparse fires `end` events once an element is fully parsed, so ids45# are visible to descendants via the cache. We only need `end` events;46# row bodies are reconstructed from the end element itself in _row_dict.47#48# NOTE: we intentionally don't call `elem.clear()` after yielding a row.49# Instruments' XML is a single shared doc where any row can `ref` an50# `id` defined earlier (threads, processes, stacks, metadata), and51# clearing would break those later lookups. The tradeoff is peak RAM52# ≈ document size. That's fine for typical traces up to a few hundred53# MB; very large exports may need a smarter pass that first indexes54# referenced ids and only retains those.55schema_seen = False5657context = ET.iterparse(_bytes_to_file(self._xml), events=("end",))58for _event, elem in context:59eid = elem.get("id")60if eid is not None:61self._id_cache[eid] = elem6263if elem.tag == "schema" and not schema_seen:64self.columns = _parse_columns(elem)65schema_seen = True66continue6768if elem.tag == "row":69yield _row_dict(elem, self.columns)70# Do not clear elem — children referenced via id may still be needed.717273def _parse_columns(schema_el: ET.Element) -> list[Column]:74cols: list[Column] = []75for col in schema_el.findall("col"):76mnemonic = (col.findtext("mnemonic") or "").strip()77etype = (col.findtext("engineering-type") or "").strip()78if mnemonic:79cols.append(Column(mnemonic=mnemonic, engineering_type=etype))80return cols818283def _row_dict(row_el: ET.Element, cols: list[Column]) -> dict[str, ET.Element]:84# Row children map positionally to columns. <sentinel/> marks a missing85# optional value for that column.86result: dict[str, ET.Element] = {}87children = list(row_el)88for idx, child in enumerate(children):89if idx >= len(cols):90break91if child.tag == "sentinel":92continue93result[cols[idx].mnemonic] = child94return result959697def _bytes_to_file(data: bytes):98import io99return io.BytesIO(data)100101102# --- Extraction helpers ---------------------------------------------------103104def int_text(elem: ET.Element | None) -> int | None:105if elem is None or elem.text is None:106return None107try:108return int(elem.text)109except ValueError:110return None111112113def str_text(elem: ET.Element | None) -> str | None:114if elem is None or elem.text is None:115return None116return elem.text117118119def fmt_attr(elem: ET.Element | None) -> str | None:120"""Return the human-readable `fmt` attribute if present."""121if elem is None:122return None123return elem.get("fmt")124125126def extract_thread(thread_el: ET.Element, stream: RowStream) -> dict:127"""Parse a <thread> element into name, tid, process dict.128129Handles ref-style threads by resolving through the stream's id cache.130"""131resolved = stream.resolve(thread_el)132name = resolved.get("fmt", "")133tid_el = resolved.find("tid")134process_el = resolved.find("process")135process = extract_process(process_el, stream) if process_el is not None else None136return {137"name": name,138"tid": int_text(tid_el),139"process": process,140"is_main": name.startswith("Main Thread") if name else False,141}142143144def extract_process(process_el: ET.Element, stream: RowStream) -> dict:145resolved = stream.resolve(process_el)146name = resolved.get("fmt", "")147pid_el = resolved.find("pid")148return {149"name": _clean_process_name(name),150"pid": int_text(pid_el),151}152153154def _clean_process_name(fmt: str) -> str:155# "NowPlaying Gigs (28401)" -> "NowPlaying Gigs"156if " (" in fmt and fmt.endswith(")"):157return fmt.rsplit(" (", 1)[0]158return fmt159160161def extract_backtrace(162bt_el: ET.Element, stream: RowStream, max_frames: int = 20163) -> list[dict]:164"""Return a list of frame dicts from a <tagged-backtrace> or <backtrace>.165166Frames are ordered leaf-first (top of stack first), matching Instruments'167display order.168"""169resolved = stream.resolve(bt_el)170inner = resolved.find("backtrace")171if inner is None:172inner = resolved173frames: list[dict] = []174for frame_el in inner.findall("frame"):175f = stream.resolve(frame_el)176frames.append({177"name": f.get("name") or "",178"addr": f.get("addr") or "",179})180if len(frames) >= max_frames:181break182return frames183184185def top_symbol(frames: list[dict]) -> str:186"""Pick the leaf symbol, falling back to addr if unsymbolicated."""187if not frames:188return "<empty-stack>"189first = frames[0]190return first.get("name") or first.get("addr") or "<unknown>"191192193def first_present(row: dict, *keys: str) -> ET.Element | None:194"""Return the first row column whose key exists.195196`row[key] or row[other_key]` is unsafe here: Element is falsy when it has197no children (a common case for leaf <event-time>, <start-time>, etc.), so198`or` short-circuits past valid leaf elements. This walks keys explicitly.199"""200for key in keys:201el = row.get(key)202if el is not None:203return el204return None205206207def in_window(time_ns: int | None, window: tuple[int, int] | None) -> bool:208"""Return True if time_ns is inside [start, end] (inclusive), or window is None."""209if window is None:210return True211if time_ns is None:212return False213start, end = window214return start <= time_ns <= end215216217def event_overlaps_window(218start_ns: int, end_ns: int, window: tuple[int, int] | None219) -> bool:220"""Return True if [start, end] overlaps [window.start, window.end]."""221if window is None:222return True223w_start, w_end = window224return not (end_ns < w_start or start_ns > w_end)225