Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Create, read, edit, and format Excel (.xlsx) spreadsheets with formulas, color coding, and financial model standards
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/office/validators/pptx.py
1"""2Validator for PowerPoint presentation XML files against XSD schemas.3"""45import re67from .base import BaseSchemaValidator8910class PPTXSchemaValidator(BaseSchemaValidator):1112PRESENTATIONML_NAMESPACE = (13"http://schemas.openxmlformats.org/presentationml/2006/main"14)1516ELEMENT_RELATIONSHIP_TYPES = {17"sldid": "slide",18"sldmasterid": "slidemaster",19"notesmasterid": "notesmaster",20"sldlayoutid": "slidelayout",21"themeid": "theme",22"tablestyleid": "tablestyles",23}2425def validate(self):26if not self.validate_xml():27return False2829all_valid = True30if not self.validate_namespaces():31all_valid = False3233if not self.validate_unique_ids():34all_valid = False3536if not self.validate_uuid_ids():37all_valid = False3839if not self.validate_file_references():40all_valid = False4142if not self.validate_slide_layout_ids():43all_valid = False4445if not self.validate_content_types():46all_valid = False4748if not self.validate_against_xsd():49all_valid = False5051if not self.validate_notes_slide_references():52all_valid = False5354if not self.validate_all_relationship_ids():55all_valid = False5657if not self.validate_no_duplicate_slide_layouts():58all_valid = False5960return all_valid6162def validate_uuid_ids(self):63import lxml.etree6465errors = []66uuid_pattern = re.compile(67r"^[\{\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\}\)]?$"68)6970for xml_file in self.xml_files:71try:72root = lxml.etree.parse(str(xml_file)).getroot()7374for elem in root.iter():75for attr, value in elem.attrib.items():76attr_name = attr.split("}")[-1].lower()77if attr_name == "id" or attr_name.endswith("id"):78if self._looks_like_uuid(value):79if not uuid_pattern.match(value):80errors.append(81f" {xml_file.relative_to(self.unpacked_dir)}: "82f"Line {elem.sourceline}: ID '{value}' appears to be a UUID but contains invalid hex characters"83)8485except (lxml.etree.XMLSyntaxError, Exception) as e:86errors.append(87f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"88)8990if errors:91print(f"FAILED - Found {len(errors)} UUID ID validation errors:")92for error in errors:93print(error)94return False95else:96if self.verbose:97print("PASSED - All UUID-like IDs contain valid hex values")98return True99100def _looks_like_uuid(self, value):101clean_value = value.strip("{}()").replace("-", "")102return len(clean_value) == 32 and all(c.isalnum() for c in clean_value)103104def validate_slide_layout_ids(self):105import lxml.etree106107errors = []108109slide_masters = list(self.unpacked_dir.glob("ppt/slideMasters/*.xml"))110111if not slide_masters:112if self.verbose:113print("PASSED - No slide masters found")114return True115116for slide_master in slide_masters:117try:118root = lxml.etree.parse(str(slide_master)).getroot()119120rels_file = slide_master.parent / "_rels" / f"{slide_master.name}.rels"121122if not rels_file.exists():123errors.append(124f" {slide_master.relative_to(self.unpacked_dir)}: "125f"Missing relationships file: {rels_file.relative_to(self.unpacked_dir)}"126)127continue128129rels_root = lxml.etree.parse(str(rels_file)).getroot()130131valid_layout_rids = set()132for rel in rels_root.findall(133f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"134):135rel_type = rel.get("Type", "")136if "slideLayout" in rel_type:137valid_layout_rids.add(rel.get("Id"))138139for sld_layout_id in root.findall(140f".//{{{self.PRESENTATIONML_NAMESPACE}}}sldLayoutId"141):142r_id = sld_layout_id.get(143f"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id"144)145layout_id = sld_layout_id.get("id")146147if r_id and r_id not in valid_layout_rids:148errors.append(149f" {slide_master.relative_to(self.unpacked_dir)}: "150f"Line {sld_layout_id.sourceline}: sldLayoutId with id='{layout_id}' "151f"references r:id='{r_id}' which is not found in slide layout relationships"152)153154except (lxml.etree.XMLSyntaxError, Exception) as e:155errors.append(156f" {slide_master.relative_to(self.unpacked_dir)}: Error: {e}"157)158159if errors:160print(f"FAILED - Found {len(errors)} slide layout ID validation errors:")161for error in errors:162print(error)163print(164"Remove invalid references or add missing slide layouts to the relationships file."165)166return False167else:168if self.verbose:169print("PASSED - All slide layout IDs reference valid slide layouts")170return True171172def validate_no_duplicate_slide_layouts(self):173import lxml.etree174175errors = []176slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))177178for rels_file in slide_rels_files:179try:180root = lxml.etree.parse(str(rels_file)).getroot()181182layout_rels = [183rel184for rel in root.findall(185f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"186)187if "slideLayout" in rel.get("Type", "")188]189190if len(layout_rels) > 1:191errors.append(192f" {rels_file.relative_to(self.unpacked_dir)}: has {len(layout_rels)} slideLayout references"193)194195except Exception as e:196errors.append(197f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}"198)199200if errors:201print("FAILED - Found slides with duplicate slideLayout references:")202for error in errors:203print(error)204return False205else:206if self.verbose:207print("PASSED - All slides have exactly one slideLayout reference")208return True209210def validate_notes_slide_references(self):211import lxml.etree212213errors = []214notes_slide_references = {}215216slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))217218if not slide_rels_files:219if self.verbose:220print("PASSED - No slide relationship files found")221return True222223for rels_file in slide_rels_files:224try:225root = lxml.etree.parse(str(rels_file)).getroot()226227for rel in root.findall(228f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"229):230rel_type = rel.get("Type", "")231if "notesSlide" in rel_type:232target = rel.get("Target", "")233if target:234normalized_target = target.replace("../", "")235236slide_name = rels_file.stem.replace(237".xml", ""238)239240if normalized_target not in notes_slide_references:241notes_slide_references[normalized_target] = []242notes_slide_references[normalized_target].append(243(slide_name, rels_file)244)245246except (lxml.etree.XMLSyntaxError, Exception) as e:247errors.append(248f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}"249)250251for target, references in notes_slide_references.items():252if len(references) > 1:253slide_names = [ref[0] for ref in references]254errors.append(255f" Notes slide '{target}' is referenced by multiple slides: {', '.join(slide_names)}"256)257for slide_name, rels_file in references:258errors.append(f" - {rels_file.relative_to(self.unpacked_dir)}")259260if errors:261print(262f"FAILED - Found {len([e for e in errors if not e.startswith(' ')])} notes slide reference validation errors:"263)264for error in errors:265print(error)266print("Each slide may optionally have its own slide file.")267return False268else:269if self.verbose:270print("PASSED - All notes slide references are unique")271return True272273274if __name__ == "__main__":275raise RuntimeError("This module should not be run directly.")276