Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Create, read, edit, and format Excel (.xlsx) spreadsheets with formulas, color coding, and financial model standards
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/office/validate.py
1"""2Command line tool to validate Office document XML files against XSD schemas and tracked changes.34Usage:5python validate.py <path> [--original <original_file>] [--auto-repair] [--author NAME]67The first argument can be either:8- An unpacked directory containing the Office document XML files9- A packed Office file (.docx/.pptx/.xlsx) which will be unpacked to a temp directory1011Auto-repair fixes:12- paraId/durableId values that exceed OOXML limits13- Missing xml:space="preserve" on w:t elements with whitespace14"""1516import argparse17import sys18import tempfile19import zipfile20from pathlib import Path2122from validators import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator232425def main():26parser = argparse.ArgumentParser(description="Validate Office document XML files")27parser.add_argument(28"path",29help="Path to unpacked directory or packed Office file (.docx/.pptx/.xlsx)",30)31parser.add_argument(32"--original",33required=False,34default=None,35help="Path to original file (.docx/.pptx/.xlsx). If omitted, all XSD errors are reported and redlining validation is skipped.",36)37parser.add_argument(38"-v",39"--verbose",40action="store_true",41help="Enable verbose output",42)43parser.add_argument(44"--auto-repair",45action="store_true",46help="Automatically repair common issues (hex IDs, whitespace preservation)",47)48parser.add_argument(49"--author",50default="Claude",51help="Author name for redlining validation (default: Claude)",52)53args = parser.parse_args()5455path = Path(args.path)56assert path.exists(), f"Error: {path} does not exist"5758original_file = None59if args.original:60original_file = Path(args.original)61assert original_file.is_file(), f"Error: {original_file} is not a file"62assert original_file.suffix.lower() in [".docx", ".pptx", ".xlsx"], (63f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"64)6566file_extension = (original_file or path).suffix.lower()67assert file_extension in [".docx", ".pptx", ".xlsx"], (68f"Error: Cannot determine file type from {path}. Use --original or provide a .docx/.pptx/.xlsx file."69)7071if path.is_file() and path.suffix.lower() in [".docx", ".pptx", ".xlsx"]:72temp_dir = tempfile.mkdtemp()73with zipfile.ZipFile(path, "r") as zf:74zf.extractall(temp_dir)75unpacked_dir = Path(temp_dir)76else:77assert path.is_dir(), f"Error: {path} is not a directory or Office file"78unpacked_dir = path7980match file_extension:81case ".docx":82validators = [83DOCXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),84]85if original_file:86validators.append(87RedliningValidator(unpacked_dir, original_file, verbose=args.verbose, author=args.author)88)89case ".pptx":90validators = [91PPTXSchemaValidator(unpacked_dir, original_file, verbose=args.verbose),92]93case _:94print(f"Error: Validation not supported for file type {file_extension}")95sys.exit(1)9697if args.auto_repair:98total_repairs = sum(v.repair() for v in validators)99if total_repairs:100print(f"Auto-repaired {total_repairs} issue(s)")101102success = all(v.validate() for v in validators)103104if success:105print("All validations PASSED!")106107sys.exit(0 if success else 1)108109110if __name__ == "__main__":111main()112