lundi 11 avril 2022

Check/uncheck checkboxes in PDF with Python - pyPDF2 and with data from excel

I have written the code below that automatically fill in one of my pdf templates, getting data form an excel sheet and saves a copy of it in one of my folders. The full code is attached below. I have removed the folders and replaced the dictionary with example texts. And yes, I am a beginner:)

import os
import pandas as pd
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject, NumberObject

def set_need_appearances_writer(writer: PdfFileWriter):
    try:
        catalog = writer._root_object
        if "/AcroForm" not in catalog:
            writer._root_object.update({
                NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
        need_appearances = NameObject("/NeedAppearances")
        writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
        return writer

    except Exception as e:
        print('set_need_appearances_writer() catch : ', repr(e))
        return writer

def ReadOnlyPDFOutput(page, fields):
    for j in range(0, len(page['/Annots'])):
        writer_annot = page['/Annots'][j].getObject()
        for field in fields:
            if writer_annot.get('/T') == field:
                writer_annot.update({
                    NameObject("/Ff"): NumberObject(1),
                    NameObject("/Ff"): NameObject(1)})

if __name__ == '__main__':
    xl_filename = "Example_excel_list"
    pdf_filename = "Example_template"

    xlin = os.path.normpath(os.path.join(os.getcwd(),r'C:\.....',xl_filename))
    pdfin = os.path.normpath(os.path.join(os.getcwd(),r'C:\.....',pdf_filename))
    pdfout = os.path.normpath(os.path.join(os.getcwd(),r'C:\.....'))
    data = pd.read_excel(xlin)
    pdf = PdfFileReader(open(pdfin, "rb"), strict=False)
    
    if "/AcroForm" in pdf.trailer["/Root"]:
        pdf.trailer["/Root"]["/AcroForm"].update(
            {NameObject("/NeedAppearances"): BooleanObject(True)})
        
    pdf_fields = [str(x) for x in pdf.getFields().keys()] # List of all pdf field names
    excel_fields = data.columns.tolist()
    
    i = 0 #Filename numerical prefix
    for j, rows in data.iterrows():
        i += 1
        pdf2 = PdfFileWriter()
        set_need_appearances_writer(pdf2)
        if "/AcroForm" in pdf2._root_object:
           pdf2._root_object["/AcroForm"].update(
               {NameObject("/NeedAppearances"): BooleanObject(True)})
           
        # Below you must define the field names as keys in this dictionary
        # Field names found by running and printing line 15
        # Key = pdf_field_name : Value = csv_field_value
        field_dictionary_1 = {"Example1": str(rows['Example1']),
                            "Example2": rows['Example2'],
                            "Example3": rows['Example3'],
                            "Example4": rows['Example4'],
                            "Example5": rows['Example5'],
                            "Example6": rows['Example6'],
                            "Checkbox1": rows['Checkbox1'],
                            "Checkbox2": rows['Checkbox2'],
                            }
        
        

        temp_out_dir = os.path.normpath(os.path.join(pdfout,str(i) + 'out.pdf'))
        
        pdf2.addPage(pdf.getPage(0)) # Makes a copy of pdf template page
        pdf2.updatePageFormFieldValues(pdf2.getPage(0), field_dictionary_1) # Updates fields
        
        
       
        #Makes the pdf output file READ-ONLY
        ReadOnlyPDFOutput(pdf2.getPage(0), field_dictionary_1)
        
        outputStream = open(temp_out_dir, "wb")
        pdf2.write(outputStream) # Saves copy of enhanced template
        outputStream.close()

    print(f'Process Complete: {i} PDFs Processed!')

What I want to improve is the possibility to "check" the checkboxes instead of doing it with textboxes with the letter "x" which I have done in this code. For example, if I write "Yes" in a specific excel cell, the checkbox "checks". If nothing is displayed the checkboxes stays empty. I probably have to make a dictionary for the checkboxes itself, but i am not sure how I should proceed with the rest...

Any help would be gladly appreciated!




Aucun commentaire:

Enregistrer un commentaire