Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Read, create, merge, split, watermark, encrypt, OCR, and fill PDF files using Python and CLI tools
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/check_bounding_boxes.py
1from dataclasses import dataclass2import json3import sys45678@dataclass9class RectAndField:10rect: list[float]11rect_type: str12field: dict131415def get_bounding_box_messages(fields_json_stream) -> list[str]:16messages = []17fields = json.load(fields_json_stream)18messages.append(f"Read {len(fields['form_fields'])} fields")1920def rects_intersect(r1, r2):21disjoint_horizontal = r1[0] >= r2[2] or r1[2] <= r2[0]22disjoint_vertical = r1[1] >= r2[3] or r1[3] <= r2[1]23return not (disjoint_horizontal or disjoint_vertical)2425rects_and_fields = []26for f in fields["form_fields"]:27rects_and_fields.append(RectAndField(f["label_bounding_box"], "label", f))28rects_and_fields.append(RectAndField(f["entry_bounding_box"], "entry", f))2930has_error = False31for i, ri in enumerate(rects_and_fields):32for j in range(i + 1, len(rects_and_fields)):33rj = rects_and_fields[j]34if ri.field["page_number"] == rj.field["page_number"] and rects_intersect(ri.rect, rj.rect):35has_error = True36if ri.field is rj.field:37messages.append(f"FAILURE: intersection between label and entry bounding boxes for `{ri.field['description']}` ({ri.rect}, {rj.rect})")38else:39messages.append(f"FAILURE: intersection between {ri.rect_type} bounding box for `{ri.field['description']}` ({ri.rect}) and {rj.rect_type} bounding box for `{rj.field['description']}` ({rj.rect})")40if len(messages) >= 20:41messages.append("Aborting further checks; fix bounding boxes and try again")42return messages43if ri.rect_type == "entry":44if "entry_text" in ri.field:45font_size = ri.field["entry_text"].get("font_size", 14)46entry_height = ri.rect[3] - ri.rect[1]47if entry_height < font_size:48has_error = True49messages.append(f"FAILURE: entry bounding box height ({entry_height}) for `{ri.field['description']}` is too short for the text content (font size: {font_size}). Increase the box height or decrease the font size.")50if len(messages) >= 20:51messages.append("Aborting further checks; fix bounding boxes and try again")52return messages5354if not has_error:55messages.append("SUCCESS: All bounding boxes are valid")56return messages5758if __name__ == "__main__":59if len(sys.argv) != 2:60print("Usage: check_bounding_boxes.py [fields.json]")61sys.exit(1)62with open(sys.argv[1]) as f:63messages = get_bounding_box_messages(f)64for msg in messages:65print(msg)66