Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Create, read, edit, and format Excel (.xlsx) spreadsheets with formulas, color coding, and financial model standards
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/office/unpack.py
1"""Unpack Office files (DOCX, PPTX, XLSX) for editing.23Extracts the ZIP archive, pretty-prints XML files, and optionally:4- Merges adjacent runs with identical formatting (DOCX only)5- Simplifies adjacent tracked changes from same author (DOCX only)67Usage:8python unpack.py <office_file> <output_dir> [options]910Examples:11python unpack.py document.docx unpacked/12python unpack.py presentation.pptx unpacked/13python unpack.py document.docx unpacked/ --merge-runs false14"""1516import argparse17import sys18import zipfile19from pathlib import Path2021import defusedxml.minidom2223from helpers.merge_runs import merge_runs as do_merge_runs24from helpers.simplify_redlines import simplify_redlines as do_simplify_redlines2526SMART_QUOTE_REPLACEMENTS = {27"\u201c": "“",28"\u201d": "”",29"\u2018": "‘",30"\u2019": "’",31}323334def unpack(35input_file: str,36output_directory: str,37merge_runs: bool = True,38simplify_redlines: bool = True,39) -> tuple[None, str]:40input_path = Path(input_file)41output_path = Path(output_directory)42suffix = input_path.suffix.lower()4344if not input_path.exists():45return None, f"Error: {input_file} does not exist"4647if suffix not in {".docx", ".pptx", ".xlsx"}:48return None, f"Error: {input_file} must be a .docx, .pptx, or .xlsx file"4950try:51output_path.mkdir(parents=True, exist_ok=True)5253with zipfile.ZipFile(input_path, "r") as zf:54zf.extractall(output_path)5556xml_files = list(output_path.rglob("*.xml")) + list(output_path.rglob("*.rels"))57for xml_file in xml_files:58_pretty_print_xml(xml_file)5960message = f"Unpacked {input_file} ({len(xml_files)} XML files)"6162if suffix == ".docx":63if simplify_redlines:64simplify_count, _ = do_simplify_redlines(str(output_path))65message += f", simplified {simplify_count} tracked changes"6667if merge_runs:68merge_count, _ = do_merge_runs(str(output_path))69message += f", merged {merge_count} runs"7071for xml_file in xml_files:72_escape_smart_quotes(xml_file)7374return None, message7576except zipfile.BadZipFile:77return None, f"Error: {input_file} is not a valid Office file"78except Exception as e:79return None, f"Error unpacking: {e}"808182def _pretty_print_xml(xml_file: Path) -> None:83try:84content = xml_file.read_text(encoding="utf-8")85dom = defusedxml.minidom.parseString(content)86xml_file.write_bytes(dom.toprettyxml(indent=" ", encoding="utf-8"))87except Exception:88pass899091def _escape_smart_quotes(xml_file: Path) -> None:92try:93content = xml_file.read_text(encoding="utf-8")94for char, entity in SMART_QUOTE_REPLACEMENTS.items():95content = content.replace(char, entity)96xml_file.write_text(content, encoding="utf-8")97except Exception:98pass99100101if __name__ == "__main__":102parser = argparse.ArgumentParser(103description="Unpack an Office file (DOCX, PPTX, XLSX) for editing"104)105parser.add_argument("input_file", help="Office file to unpack")106parser.add_argument("output_directory", help="Output directory")107parser.add_argument(108"--merge-runs",109type=lambda x: x.lower() == "true",110default=True,111metavar="true|false",112help="Merge adjacent runs with identical formatting (DOCX only, default: true)",113)114parser.add_argument(115"--simplify-redlines",116type=lambda x: x.lower() == "true",117default=True,118metavar="true|false",119help="Merge adjacent tracked changes from same author (DOCX only, default: true)",120)121args = parser.parse_args()122123_, message = unpack(124args.input_file,125args.output_directory,126merge_runs=args.merge_runs,127simplify_redlines=args.simplify_redlines,128)129print(message)130131if "Error" in message:132sys.exit(1)133