Loading source
Pulling the file list, source metadata, and syntax-aware rendering for this listing.
Source from repo
Read, create, merge, split, watermark, encrypt, OCR, and fill PDF files using Python and CLI tools
Files
Skill
Size
Entrypoint
Format
Open file
Syntax-highlighted preview of this file as included in the skill package.
scripts/fill_fillable_fields.py
1import json2import sys34from pypdf import PdfReader, PdfWriter56from extract_form_field_info import get_field_info7891011def fill_pdf_fields(input_pdf_path: str, fields_json_path: str, output_pdf_path: str):12with open(fields_json_path) as f:13fields = json.load(f)14fields_by_page = {}15for field in fields:16if "value" in field:17field_id = field["field_id"]18page = field["page"]19if page not in fields_by_page:20fields_by_page[page] = {}21fields_by_page[page][field_id] = field["value"]2223reader = PdfReader(input_pdf_path)2425has_error = False26field_info = get_field_info(reader)27fields_by_ids = {f["field_id"]: f for f in field_info}28for field in fields:29existing_field = fields_by_ids.get(field["field_id"])30if not existing_field:31has_error = True32print(f"ERROR: `{field['field_id']}` is not a valid field ID")33elif field["page"] != existing_field["page"]:34has_error = True35print(f"ERROR: Incorrect page number for `{field['field_id']}` (got {field['page']}, expected {existing_field['page']})")36else:37if "value" in field:38err = validation_error_for_field_value(existing_field, field["value"])39if err:40print(err)41has_error = True42if has_error:43sys.exit(1)4445writer = PdfWriter(clone_from=reader)46for page, field_values in fields_by_page.items():47writer.update_page_form_field_values(writer.pages[page - 1], field_values, auto_regenerate=False)4849writer.set_need_appearances_writer(True)5051with open(output_pdf_path, "wb") as f:52writer.write(f)535455def validation_error_for_field_value(field_info, field_value):56field_type = field_info["type"]57field_id = field_info["field_id"]58if field_type == "checkbox":59checked_val = field_info["checked_value"]60unchecked_val = field_info["unchecked_value"]61if field_value != checked_val and field_value != unchecked_val:62return f'ERROR: Invalid value "{field_value}" for checkbox field "{field_id}". The checked value is "{checked_val}" and the unchecked value is "{unchecked_val}"'63elif field_type == "radio_group":64option_values = [opt["value"] for opt in field_info["radio_options"]]65if field_value not in option_values:66return f'ERROR: Invalid value "{field_value}" for radio group field "{field_id}". Valid values are: {option_values}'67elif field_type == "choice":68choice_values = [opt["value"] for opt in field_info["choice_options"]]69if field_value not in choice_values:70return f'ERROR: Invalid value "{field_value}" for choice field "{field_id}". Valid values are: {choice_values}'71return None727374def monkeypatch_pydpf_method():75from pypdf.generic import DictionaryObject76from pypdf.constants import FieldDictionaryAttributes7778original_get_inherited = DictionaryObject.get_inherited7980def patched_get_inherited(self, key: str, default = None):81result = original_get_inherited(self, key, default)82if key == FieldDictionaryAttributes.Opt:83if isinstance(result, list) and all(isinstance(v, list) and len(v) == 2 for v in result):84result = [r[0] for r in result]85return result8687DictionaryObject.get_inherited = patched_get_inherited888990if __name__ == "__main__":91if len(sys.argv) != 4:92print("Usage: fill_fillable_fields.py [input pdf] [field_values.json] [output pdf]")93sys.exit(1)94monkeypatch_pydpf_method()95input_pdf = sys.argv[1]96fields_json = sys.argv[2]97output_pdf = sys.argv[3]98fill_pdf_fields(input_pdf, fields_json, output_pdf)99