Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Read, create, merge, split, watermark, encrypt, OCR, and fill PDF files using Python and CLI tools
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/fill_pdf_form_with_annotations.py
1import json2import sys34from pypdf import PdfReader, PdfWriter5from pypdf.annotations import FreeText678910def transform_from_image_coords(bbox, image_width, image_height, pdf_width, pdf_height):11x_scale = pdf_width / image_width12y_scale = pdf_height / image_height1314left = bbox[0] * x_scale15right = bbox[2] * x_scale1617top = pdf_height - (bbox[1] * y_scale)18bottom = pdf_height - (bbox[3] * y_scale)1920return left, bottom, right, top212223def transform_from_pdf_coords(bbox, pdf_height):24left = bbox[0]25right = bbox[2]2627pypdf_top = pdf_height - bbox[1]28pypdf_bottom = pdf_height - bbox[3]2930return left, pypdf_bottom, right, pypdf_top313233def fill_pdf_form(input_pdf_path, fields_json_path, output_pdf_path):3435with open(fields_json_path, "r") as f:36fields_data = json.load(f)3738reader = PdfReader(input_pdf_path)39writer = PdfWriter()4041writer.append(reader)4243pdf_dimensions = {}44for i, page in enumerate(reader.pages):45mediabox = page.mediabox46pdf_dimensions[i + 1] = [mediabox.width, mediabox.height]4748annotations = []49for field in fields_data["form_fields"]:50page_num = field["page_number"]5152page_info = next(p for p in fields_data["pages"] if p["page_number"] == page_num)53pdf_width, pdf_height = pdf_dimensions[page_num]5455if "pdf_width" in page_info:56transformed_entry_box = transform_from_pdf_coords(57field["entry_bounding_box"],58float(pdf_height)59)60else:61image_width = page_info["image_width"]62image_height = page_info["image_height"]63transformed_entry_box = transform_from_image_coords(64field["entry_bounding_box"],65image_width, image_height,66float(pdf_width), float(pdf_height)67)6869if "entry_text" not in field or "text" not in field["entry_text"]:70continue71entry_text = field["entry_text"]72text = entry_text["text"]73if not text:74continue7576font_name = entry_text.get("font", "Arial")77font_size = str(entry_text.get("font_size", 14)) + "pt"78font_color = entry_text.get("font_color", "000000")7980annotation = FreeText(81text=text,82rect=transformed_entry_box,83font=font_name,84font_size=font_size,85font_color=font_color,86border_color=None,87background_color=None,88)89annotations.append(annotation)90writer.add_annotation(page_number=page_num - 1, annotation=annotation)9192with open(output_pdf_path, "wb") as output:93writer.write(output)9495print(f"Successfully filled PDF form and saved to {output_pdf_path}")96print(f"Added {len(annotations)} text annotations")979899if __name__ == "__main__":100if len(sys.argv) != 4:101print("Usage: fill_pdf_form_with_annotations.py [input pdf] [fields.json] [output pdf]")102sys.exit(1)103input_pdf = sys.argv[1]104fields_json = sys.argv[2]105output_pdf = sys.argv[3]106107fill_pdf_form(input_pdf, fields_json, output_pdf)108