Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Create, edit, and inspect PowerPoint presentations with professional design and automated visual QA
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/office/validate.py
1"""2Command line tool to validate Office document XML files against XSD schemas and tracked changes.34Usage:5python validate.py <path> [--original <original_file>] [--auto-repair] [--author NAME]67The first argument can be either:8- An unpacked directory containing the Office document XML files9- A packed Office file (.docx/.pptx/.xlsx) which will be unpacked to a temp directory1011Auto-repair fixes:12- paraId/durableId values that exceed OOXML limits13- Missing xml:space="preserve" on w:t elements with whitespace14"""1516import argparse17import sys18import tempfile19import zipfile20from pathlib import Path2122from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator232425def main():26parser = argparse.ArgumentParser(description="Validate Office document XML files")27parser.add_argument(28"path",29help="Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)",30)31parser.add_argument(32"--original",33required=False,34default=None,35help="Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors are reported and redlining validation is skipped.",36)37parser.add_argument(38"-v",39"--verbose",40action="store_true",41help="Enable verbose output",42)43parser.add_argument(44"--auto-repair",45action="store_true",46help="Automatically repair common issues (hex IDs, whitespace preservation)",47)48parser.add_argument(49"--author",50default="Claude",51help="Author name for redlining validation (default: Claude)",52)53args = parser.parse_args()5455path = Path(args.path)56assert path.exists(), f"Error: {path} does not exist"5758original_file = None59if args.original:60original_file = Path(args.original)61assert original_file.is_file(), f"Error: {original_file} is not a file"62assert original_file.suffix.lower() in [".docx", ".pptx", ".xlsx"], (63f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"64)6566file_extension = (original_file or path).suffix.lower()67assert file_extension in [".docx", ".pptx", ".xlsx"], (68f"Error: Cannot determine file type from {path}. Use --original or provide a .docx/.pptx/.xlsx file."69)7071if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]:72temp_dir = tempfile.mkdtemp()73with zipfile.ZipFile(path, "r") as zf:74zf.extractall(temp_dir)75unpacked_dir = Path(temp_dir)76else:77assert path.is_dir(), f"Error: {path} is not a directory or Office file"78unpacked_dir = path7980match file_extension:81case ".docx":82validators = [83DOCXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),84]85if original_file:86validators.append(87RedliningValidator(unpacked_dir, original_file, verbose=args.verbose, author=args.author)88)89case ".pptx":90validators = [91PPTXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),92]93case _:94print(f"Error: Validation not supported for file type {file_extension}")95sys.exit(1)9697if args.auto_repair:98total_repairs = sum(v.repair() for v in validators)99if total_repairs:100print(f"Auto-repaired {total_repairs} issue(s)")101102success = all(v.validate() for v in validators)103104if success:105print("All validations PASSED!")106107sys.exit(0 if success else 1)108109110if __name__ == "__main__":111main()112