I need to extract check boxes from pdf file. There is no acrofields or something like that. There is a requirement to use pdfSharp library, or some other that is free to use. I can't use iTextSharp, because it's not free for commercial use. I found some code for extracting images from the pages, but it brings me 4 objects and two of them are images. I assume that they maybe are ticked and unticked check box image for the page, but I need to take all the references of the images from the page, because there are around 15 checkmboxes on every page and I need to know which is clicked and which is not. The only way to do that is to extract all the images from the page and to check if it's clicked or not. The code I use currently is:
using (PdfDocument doc = PdfReader.Open(path))
{
foreach (PdfPage page in doc.Pages)
{
PdfDictionary resources = page.Elements.GetDictionary("/Resources");
if (resources != null)
{
// Get external objects dictionary
PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
if (xObjects != null)
{
ICollection<PdfItem> items = xObjects.Elements.Values;
// Iterate references to external objects
foreach (PdfItem item in items)
{
PdfReference reference = item as PdfReference;
if (reference != null)
{
PdfDictionary xObject = reference.Value as PdfDictionary;
// Is external object an image?
if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
{
// do something with your image here
// only the first image is handled here
var bitmap = ExportImage(xObject);
bitmap.Save($@"C:\Users\tishk\Desktop\exported{imageCount++}.png", ImageFormat.Bmp);
}
}
}
}
}
}
}
}
private static Bitmap ExportImage(PdfDictionary image)
{
string filter = image.Elements.GetName("/Filter");
switch (filter)
{
case "/FlateDecode":
return ExportAsPngImage(image);
default:
throw new ApplicationException(filter + " filter not implemented");
}
}
private static Bitmap ExportAsPngImage(PdfDictionary image)
{
int width = image.Elements.GetInteger(PdfImage.Keys.Width);
int height = image.Elements.GetInteger(PdfImage.Keys.Height);
int bitsPerComponent = image.Elements.GetInteger(PdfImage.Keys.BitsPerComponent);
var canUnfilter = image.Stream.TryUnfilter();
var decoded = image.Stream.Value;
Bitmap bmp = new Bitmap(width, height, System.Drawing.Imaging.PixelFormat.Format8bppIndexed);
BitmapData bmpData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.WriteOnly, bmp.PixelFormat);
Marshal.Copy(decoded, 0, bmpData.Scan0, decoded.Length);
bmp.UnlockBits(bmpData);
return bmp;
}
Aucun commentaire:
Enregistrer un commentaire