Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Create, read, edit, and format Excel (.xlsx) spreadsheets with formulas, color coding, and financial model standards
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/office/validators/redlining.py
1"""2Validator for tracked changes in Word documents.3"""45import subprocess6import tempfile7import zipfile8from pathlib import Path91011class RedliningValidator:1213def __init__(self, unpacked_dir, original_docx, verbose=False, author="Claude"):14self.unpacked_dir = Path(unpacked_dir)15self.original_docx = Path(original_docx)16self.verbose = verbose17self.author = author18self.namespaces = {19"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"20}2122def repair(self) -> int:23return 02425def validate(self):26modified_file = self.unpacked_dir / "word" / "document.xml"27if not modified_file.exists():28print(f"FAILED - Modified document.xml not found at {modified_file}")29return False3031try:32import xml.etree.ElementTree as ET3334tree = ET.parse(modified_file)35root = tree.getroot()3637del_elements = root.findall(".//w:del", self.namespaces)38ins_elements = root.findall(".//w:ins", self.namespaces)3940author_del_elements = [41elem42for elem in del_elements43if elem.get(f"{{{self.namespaces['w']}}}author") == self.author44]45author_ins_elements = [46elem47for elem in ins_elements48if elem.get(f"{{{self.namespaces['w']}}}author") == self.author49]5051if not author_del_elements and not author_ins_elements:52if self.verbose:53print(f"PASSED - No tracked changes by {self.author} found.")54return True5556except Exception:57pass5859with tempfile.TemporaryDirectory() as temp_dir:60temp_path = Path(temp_dir)6162try:63with zipfile.ZipFile(self.original_docx, "r") as zip_ref:64zip_ref.extractall(temp_path)65except Exception as e:66print(f"FAILED - Error unpacking original docx: {e}")67return False6869original_file = temp_path / "word" / "document.xml"70if not original_file.exists():71print(72f"FAILED - Original document.xml not found in {self.original_docx}"73)74return False7576try:77import xml.etree.ElementTree as ET7879modified_tree = ET.parse(modified_file)80modified_root = modified_tree.getroot()81original_tree = ET.parse(original_file)82original_root = original_tree.getroot()83except ET.ParseError as e:84print(f"FAILED - Error parsing XML files: {e}")85return False8687self._remove_author_tracked_changes(original_root)88self._remove_author_tracked_changes(modified_root)8990modified_text = self._extract_text_content(modified_root)91original_text = self._extract_text_content(original_root)9293if modified_text != original_text:94error_message = self._generate_detailed_diff(95original_text, modified_text96)97print(error_message)98return False99100if self.verbose:101print(f"PASSED - All changes by {self.author} are properly tracked")102return True103104def _generate_detailed_diff(self, original_text, modified_text):105error_parts = [106f"FAILED - Document text doesn't match after removing {self.author}'s tracked changes",107"",108"Likely causes:",109" 1. Modified text inside another author's <w:ins> or <w:del> tags",110" 2. Made edits without proper tracked changes",111" 3. Didn't nest <w:del> inside <w:ins> when deleting another's insertion",112"",113"For pre-redlined documents, use correct patterns:",114" - To reject another's INSERTION: Nest <w:del> inside their <w:ins>",115" - To restore another's DELETION: Add new <w:ins> AFTER their <w:del>",116"",117]118119git_diff = self._get_git_word_diff(original_text, modified_text)120if git_diff:121error_parts.extend(["Differences:", "============", git_diff])122else:123error_parts.append("Unable to generate word diff (git not available)")124125return "\n".join(error_parts)126127def _get_git_word_diff(self, original_text, modified_text):128try:129with tempfile.TemporaryDirectory() as temp_dir:130temp_path = Path(temp_dir)131132original_file = temp_path / "original.txt"133modified_file = temp_path / "modified.txt"134135original_file.write_text(original_text, encoding="utf-8")136modified_file.write_text(modified_text, encoding="utf-8")137138result = subprocess.run(139[140"git",141"diff",142"--word-diff=plain",143"--word-diff-regex=.",144"-U0",145"--no-index",146str(original_file),147str(modified_file),148],149capture_output=True,150text=True,151)152153if result.stdout.strip():154lines = result.stdout.split("\n")155content_lines = []156in_content = False157for line in lines:158if line.startswith("@@"):159in_content = True160continue161if in_content and line.strip():162content_lines.append(line)163164if content_lines:165return "\n".join(content_lines)166167result = subprocess.run(168[169"git",170"diff",171"--word-diff=plain",172"-U0",173"--no-index",174str(original_file),175str(modified_file),176],177capture_output=True,178text=True,179)180181if result.stdout.strip():182lines = result.stdout.split("\n")183content_lines = []184in_content = False185for line in lines:186if line.startswith("@@"):187in_content = True188continue189if in_content and line.strip():190content_lines.append(line)191return "\n".join(content_lines)192193except (subprocess.CalledProcessError, FileNotFoundError, Exception):194pass195196return None197198def _remove_author_tracked_changes(self, root):199ins_tag = f"{{{self.namespaces['w']}}}ins"200del_tag = f"{{{self.namespaces['w']}}}del"201author_attr = f"{{{self.namespaces['w']}}}author"202203for parent in root.iter():204to_remove = []205for child in parent:206if child.tag == ins_tag and child.get(author_attr) == self.author:207to_remove.append(child)208for elem in to_remove:209parent.remove(elem)210211deltext_tag = f"{{{self.namespaces['w']}}}delText"212t_tag = f"{{{self.namespaces['w']}}}t"213214for parent in root.iter():215to_process = []216for child in parent:217if child.tag == del_tag and child.get(author_attr) == self.author:218to_process.append((child, list(parent).index(child)))219220for del_elem, del_index in reversed(to_process):221for elem in del_elem.iter():222if elem.tag == deltext_tag:223elem.tag = t_tag224225for child in reversed(list(del_elem)):226parent.insert(del_index, child)227parent.remove(del_elem)228229def _extract_text_content(self, root):230p_tag = f"{{{self.namespaces['w']}}}p"231t_tag = f"{{{self.namespaces['w']}}}t"232233paragraphs = []234for p_elem in root.findall(f".//{p_tag}"):235text_parts = []236for t_elem in p_elem.findall(f".//{t_tag}"):237if t_elem.text:238text_parts.append(t_elem.text)239paragraph_text = "".join(text_parts)240if paragraph_text:241paragraphs.append(paragraph_text)242243return "\n".join(paragraphs)244245246if __name__ == "__main__":247raise RuntimeError("This module should not be run directly.")248