如何从 PDF 中提取所有值?

How do I pull all values from PDF?

我有一个可以打开 PDF 文件并抓取文本的工作解决方案。不幸的是,我需要的值在表单字段中。我已经尝试了几种方法来获取值,但我只能获取看似表单名称的内容。键值正确,但收到的值错误。

Key ValueReturned Company Name iText.Forms.Fields.PdfTextFormField Phone Number iText.Forms.Fields.PdfTextFormField Business Contact Data iText.Forms.Fields.PdfTextFormField Name iText.Forms.Fields.PdfTextFormField

未返回表单字段中的值。有更好的方法吗?

using System;
using System.Collections.Generic;
using iText.Forms;
using iText.Forms.Fields;
using iText.Kernel.Pdf;

namespace ConsoleApplication1 {
    class Class1 {      
        public string pdfthree(string pdfPath) {            
            PdfReader reader = new PdfReader(pdfPath);
            PdfDocument document = new PdfDocument(reader);
            PdfAcroForm acroForm = PdfAcroForm.GetAcroForm(document, false);
            IDictionary<string, PdfFormField> Map = new Dictionary<string, PdfFormField>();

            Map = acroForm.GetFormFields();
            acroForm.GetField("Name");
            string output = "";

            foreach (String fldName in Map.Keys) {
                output += fldName + ": " + Map[fldName].ToString() + "\n";
            }

            System.IO.File.WriteAllText(pdfPath, output);
            document.Close();
            reader.Close();
            return output;
        }
    }
}

而不是调用 PdfFormField#ToString(),您应该调用 PdfFormField#GetValueAsString() 来获取字段的值。

完整代码:

using System;
using System.Collections.Generic;
using iText.Forms;
using iText.Forms.Fields;
using iText.Kernel.Pdf;

namespace ConsoleApplication1 {
    class Class1 {      
        public string pdfthree(string pdfPath) {            
            PdfReader reader = new PdfReader(pdfPath);
            PdfDocument document = new PdfDocument(reader);
            PdfAcroForm acroForm = PdfAcroForm.GetAcroForm(document, false);
            IDictionary<string, PdfFormField> Map = new Dictionary<string, PdfFormField>();

            Map = acroForm.GetFormFields();
            acroForm.GetField("Name");
            string output = "";

            foreach (String fldName in Map.Keys) {
                output += fldName + ": " + Map[fldName].GetValueAsString() + "\n";
            }

            System.IO.File.WriteAllText(pdfPath, output);
            document.Close();
            reader.Close();
            return output;
        }
    }
}