I have written the code below that automatically fill in one of my pdf templates, getting data form an excel sheet and saves a copy of it in one of my folders. The full code is attached below. I have removed the folders and replaced the dictionary with example texts. And yes, I am a beginner:)
import os
import pandas as pd
from PyPDF2 import PdfFileWriter, PdfFileReader
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject, NumberObject
def set_need_appearances_writer(writer: PdfFileWriter):
try:
catalog = writer._root_object
if "/AcroForm" not in catalog:
writer._root_object.update({
NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
need_appearances = NameObject("/NeedAppearances")
writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
return writer
except Exception as e:
print('set_need_appearances_writer() catch : ', repr(e))
return writer
def ReadOnlyPDFOutput(page, fields):
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in fields:
if writer_annot.get('/T') == field:
writer_annot.update({
NameObject("/Ff"): NumberObject(1),
NameObject("/Ff"): NameObject(1)})
if __name__ == '__main__':
xl_filename = "Example_excel_list"
pdf_filename = "Example_template"
xlin = os.path.normpath(os.path.join(os.getcwd(),r'C:\.....',xl_filename))
pdfin = os.path.normpath(os.path.join(os.getcwd(),r'C:\.....',pdf_filename))
pdfout = os.path.normpath(os.path.join(os.getcwd(),r'C:\.....'))
data = pd.read_excel(xlin)
pdf = PdfFileReader(open(pdfin, "rb"), strict=False)
if "/AcroForm" in pdf.trailer["/Root"]:
pdf.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
pdf_fields = [str(x) for x in pdf.getFields().keys()] # List of all pdf field names
excel_fields = data.columns.tolist()
i = 0 #Filename numerical prefix
for j, rows in data.iterrows():
i += 1
pdf2 = PdfFileWriter()
set_need_appearances_writer(pdf2)
if "/AcroForm" in pdf2._root_object:
pdf2._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
# Below you must define the field names as keys in this dictionary
# Field names found by running and printing line 15
# Key = pdf_field_name : Value = csv_field_value
field_dictionary_1 = {"Example1": str(rows['Example1']),
"Example2": rows['Example2'],
"Example3": rows['Example3'],
"Example4": rows['Example4'],
"Example5": rows['Example5'],
"Example6": rows['Example6'],
"Checkbox1": rows['Checkbox1'],
"Checkbox2": rows['Checkbox2'],
}
temp_out_dir = os.path.normpath(os.path.join(pdfout,str(i) + 'out.pdf'))
pdf2.addPage(pdf.getPage(0)) # Makes a copy of pdf template page
pdf2.updatePageFormFieldValues(pdf2.getPage(0), field_dictionary_1) # Updates fields
#Makes the pdf output file READ-ONLY
ReadOnlyPDFOutput(pdf2.getPage(0), field_dictionary_1)
outputStream = open(temp_out_dir, "wb")
pdf2.write(outputStream) # Saves copy of enhanced template
outputStream.close()
print(f'Process Complete: {i} PDFs Processed!')
What I want to improve is the possibility to "check" the checkboxes instead of doing it with textboxes with the letter "x" which I have done in this code. For example, if I write "Yes" in a specific excel cell, the checkbox "checks". If nothing is displayed the checkboxes stays empty. I probably have to make a dictionary for the checkboxes itself, but i am not sure how I should proceed with the rest...
Any help would be gladly appreciated!
Aucun commentaire:
Enregistrer un commentaire