Java: 如何使用iText从PDF文件中提取选定区域的文本?
Java: How to extract text by a selected area from a PDF file using iText?
我正在开发一个程序,可以从 PDF 文件中 extract texts特定区域,我正在使用 java 和 iText 库。
现在,我可以使用以下代码通过点击区域坐标来提取数据:
import java.io.IOException;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.FilteredTextRenderListener;
import com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import com.itextpdf.text.pdf.parser.RegionTextRenderFilter;
import com.itextpdf.text.pdf.parser.RenderFilter;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
/**
* Créer par Malek Boubakri le 03/06/2015 à 15:45.
*/
public class ExtractPageContentArea {
//
public void parsePdf(float x,float y,float width,float height,String pdf) throws IOException {
PdfReader reader = new PdfReader(pdf);
Rectangle rect = new Rectangle(x, y, width, height);
RenderFilter filter = new RegionTextRenderFilter(rect);
TextExtractionStrategy strategy;
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
System.out.println(PdfTextExtractor.getTextFromPage(reader, i, strategy));
}
reader.close();
}
}
并且此代码可以绘制矩形并保存所需的坐标使用:
import java.awt.BorderLayout;
import java.awt.Graphics;
import java.awt.Rectangle;
import java.awt.event.MouseEvent;
import java.awt.event.MouseListener;
import java.awt.event.MouseMotionListener;
import java.util.ArrayList;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.SwingConstants;
public class MouseTracker extends JFrame implements MouseListener, MouseMotionListener {
private static final long serialVersionUID = 1L;
private final JLabel mousePosition;
int x1, x2, y1, y2;
int w, h;
private final JLabel recStart;
private final JLabel recStop;
private final JLabel cords; // set up GUI and register mouse event handlers
private final ArrayList< Rectangle > rectangles = new ArrayList< Rectangle >();
private boolean isNewRect = true;
public MouseTracker() {
super( "Rectangle Drawer" );
this.mousePosition = new JLabel();
this.mousePosition.setHorizontalAlignment( SwingConstants.CENTER );
getContentPane().add( this.mousePosition, BorderLayout.CENTER );
JLabel text1 = new JLabel();
text1.setText( "At the center the mouse pointer's coordinates will be displayed." );
getContentPane().add( text1, BorderLayout.SOUTH );
this.recStart = new JLabel();
getContentPane().add( this.recStart, BorderLayout.WEST );
this.recStop = new JLabel();
getContentPane().add( this.recStop, BorderLayout.EAST );
this.cords = new JLabel();
getContentPane().add( this.cords, BorderLayout.NORTH );
addMouseListener( this ); // listens for own mouse and
addMouseMotionListener( this ); // mouse-motion events
setSize( 800, 600 );
setVisible( true );
}
// MouseListener event handlers // handle event when mouse released immediately after press
public void mouseClicked( final MouseEvent event ) {
this.mousePosition.setText( "Clicked at [" + event.getX() + ", " + event.getY() + "]" );
repaint();
}
// handle event when mouse pressed
public void mousePressed( final MouseEvent event ) {
this.mousePosition.setText( "Pressed at [" + ( this.x1 = event.getX() ) + ", " + ( this.y1 = event.getY() ) + "]" );
this.recStart.setText( "Start: [" + this.x1 + ", " + this.y1 + "]" );
repaint();
}
// handle event when mouse released after dragging
public void mouseReleased( final MouseEvent event ) {
this.mousePosition.setText( "Released at [" + ( this.x2 = event.getX() ) + ", " + ( this.y2 = event.getY() ) + "]" );
this.recStop.setText( "End: [" + this.x2 + ", " + this.y2 + "]" );
Rectangle rectangle = getRectangleFromPoints();
this.rectangles.add( rectangle );
this.w = this.h = this.x1 = this.y1 = this.x2 = this.y2 = 0;
this.isNewRect = true;
repaint();
}
private Rectangle getRectangleFromPoints() {
int width = this.x1 - this.x2;
int height = this.y1 - this.y2;
Rectangle rectangle = new Rectangle( width < 0 ? this.x1
: this.x2, height < 0 ? this.y1
: this.y2, Math.abs( width ), Math.abs( height ) );
return rectangle;
}
// handle event when mouse enters area
public void mouseEntered( final MouseEvent event ) {
this.mousePosition.setText( "Mouse entered at [" + event.getX() + ", " + event.getY() + "]" );
repaint();
}
// handle event when mouse exits area
public void mouseExited( final MouseEvent event ) {
this.mousePosition.setText( "Mouse outside window" );
repaint();
}
// MouseMotionListener event handlers // handle event when user drags mouse with button pressed
public void mouseDragged( final MouseEvent event ) {
this.mousePosition.setText( "Dragged at [" + ( this.x2 = event.getX() ) + ", " + ( this.y2 = event.getY() ) + "]" ); // call repaint which calls paint repaint();
this.isNewRect = false;
repaint();
}
// handle event when user moves mouse
public void mouseMoved( final MouseEvent event ) {
this.mousePosition.setText( "Moved at [" + event.getX() + ", " + event.getY() + "]" );
repaint();
}
@Override
public void paint( final Graphics g ) {
super.paint( g ); // clear the frame surface
g.drawString( "Start Rec Here", this.x1, this.y1 );
g.drawString( "End Rec Here", this.x2, this.y2 );
Rectangle newRectangle = getRectangleFromPoints();
if ( !this.isNewRect ) {
g.drawRect( newRectangle.x, newRectangle.y, newRectangle.width, newRectangle.height );
}
for( Rectangle rectangle : this.rectangles ) {
g.drawRect( rectangle.x, rectangle.y, rectangle.width, rectangle.height );
}
this.cords.setText( "w = " + this.w + ", h = " + this.h );
}
public static void main( final String args[] ) {
MouseTracker application = new MouseTracker();
application.setDefaultCloseOperation( JFrame.EXIT_ON_CLOSE );
}
}
我想使用这些坐标来指定 PDF 文件中的区域,我真的不知道如何合并这两个功能,如何将 绘图 space 放在文档上方 以及如何使矩形坐标与文本坐标相匹配。
如何在另一个面板上方绘制?
我应该将 PDF 转换为图像并将其放在后面吗?
如果我应该的话,请任何人推荐一个好的免费 OCR 库!
如果有任何模糊之处,请发表评论!
谁能把我放在路上!因为我真的迷路了。
等待您的帮助..谢谢(抱歉我的英语不好)
你有一个非常有趣的问题和一个具有挑战性的项目。此 "answer" 可能会提供一些有用的想法,但它不是一个完整的解决方案。
您可以使用所谓的 glass pane 在其他组件之上绘制。
我认为您需要决定的最重要的事情是哪些库最适合您的项目。 iText library 非常好,提供各种 pdf 功能,例如您在问题中显示的文本提取。
但是,据我所知,在 iText 中不支持查看 pdf。您可以使用像 ICEpdf for this (see this example) 这样的库。如果 ICEpdf 也可以支持文本提取,那就太好了,这样您就可以使用一个库,而不是让 ICEpdf 与 iText 或 OCR 一起工作(并处理诸如在 ICEpdf 中缩放 pdf 并在获取文本时进行补偿等问题).
我不确定您是否可以使用 ICEpdf 提取文本,因此在下面的示例代码中目前仍使用 iText:
// File ExtractSelectionFromPdf.java
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.*;
import java.awt.Container;
import java.awt.Point;
import java.io.IOException;
import javax.swing.*;
public class ExtractSelectionFromPdf {
private static String filePath = "[file path to a pdf file]";
private PdfViewer pdfViewer;
public static void main(final String[] arguments) {
SwingUtilities.invokeLater(() -> new ExtractSelectionFromPdf().launchGUI());
}
private void launchGUI() {
final JFrame frame = new JFrame("Extract selected text from a pdf");
frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
final Container contentPane = frame.getContentPane();
pdfViewer = new PdfViewer();
contentPane.add(pdfViewer);
pdfViewer.openDocument(filePath);
final CustomGlassPane customGlassPane = new CustomGlassPane(this, contentPane);
frame.setGlassPane(customGlassPane);
customGlassPane.setVisible(true);
frame.setBounds(60, 10, 1800, 1000);
frame.setVisible(true);
}
public void handleSelection(final Point topLeft, final Point bottomRight) {
final int width = bottomRight.x - topLeft.x;
final int height = bottomRight.y - topLeft.x;
final String text = parsePdf(topLeft.x, topLeft.y, width, height, filePath);
System.out.println("text: " + text);
}
public String parsePdf(final int x, final int y, final int width, final int height,
final String pdfFilePath) {
String text = null;
try {
final PdfReader pdfReader = new PdfReader(pdfFilePath);
final int pageNumber = pdfViewer.getCurrentPageNumber() + 1;
System.out.println("Page number: " + pageNumber);
final Rectangle selection = new Rectangle(x, y, width, height);
final RenderFilter renderFilter = new RegionTextRenderFilter(selection);
final LocationTextExtractionStrategy delegate
= new LocationTextExtractionStrategy();
final TextExtractionStrategy extractionStrategy
= new FilteredTextRenderListener(delegate, renderFilter);
text = PdfTextExtractor.getTextFromPage(pdfReader, pageNumber,
extractionStrategy);
pdfReader.close();
} catch (final IOException e) {
e.printStackTrace();
}
return text;
}
}
// File PdfViewer.java
import java.util.ResourceBundle;
import javax.swing.*;
import org.icepdf.ri.common.*;
import org.icepdf.ri.common.views.DocumentViewController;
import org.icepdf.ri.util.PropertiesManager;
public class PdfViewer extends JPanel {
private final SwingController controller;
public PdfViewer() {
controller = new SwingController();
controller.setIsEmbeddedComponent(true);
final String bundleName = PropertiesManager.DEFAULT_MESSAGE_BUNDLE;
final ResourceBundle messageBundle = ResourceBundle.getBundle(bundleName);
final Properties systemProperties = System.getProperties();
final PropertiesManager properties = new PropertiesManager(systemProperties,
messageBundle);
properties.set(PropertiesManager.PROPERTY_DEFAULT_ZOOM_LEVEL, "1");
final SwingViewBuilder factory = new SwingViewBuilder(controller, properties);
final DocumentViewController viewController
= controller.getDocumentViewController();
viewController.setAnnotationCallback(new MyAnnotationCallback(viewController));
final JScrollPane scrollPane = new JScrollPane(factory.buildViewerPanel());
final int horizontalPolicy = ScrollPaneConstants.HORIZONTAL_SCROLLBAR_ALWAYS;
final int verticalPolicy = ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS;
scrollPane.setHorizontalScrollBarPolicy(horizontalPolicy);
scrollPane.setVerticalScrollBarPolicy(verticalPolicy);
add(scrollPane);
}
public void openDocument(final String filePath) {
controller.openDocument(filePath);
}
public int getCurrentPageNumber() {
return controller.getCurrentPageNumber();
}
}
// File CustomGlassPane.java
import java.awt.*;
import javax.swing.JComponent;
public class CustomGlassPane extends JComponent {
private Point topLeftPoint;
private Point bottomRightPoint;
public CustomGlassPane(final ExtractSelectionFromPdf extractSelectionFromPdf,
final Container contentPane) {
final MouseEventsListener listener
= new MouseEventsListener(extractSelectionFromPdf, this, contentPane);
addMouseListener(listener);
addMouseMotionListener(listener);
}
public void setSelection(final Point topLeftPoint, final Point bottomRightPoint) {
this.topLeftPoint = topLeftPoint;
this.bottomRightPoint = bottomRightPoint;
}
protected void paintComponent(final Graphics graphics) {
if (topLeftPoint != null && bottomRightPoint != null) {
graphics.setColor(Color.BLACK);
graphics.drawRect(topLeftPoint.x,
topLeftPoint.y,
bottomRightPoint.x - topLeftPoint.x,
bottomRightPoint.y - topLeftPoint.y);
}
}
}
// File MouseEventsListener.java
import java.awt.*;
import java.awt.event.MouseEvent;
import javax.swing.SwingUtilities;
import javax.swing.event.MouseInputAdapter;
public class MouseEventsListener extends MouseInputAdapter {
private ExtractSelectionFromPdf extractSelectionFromPdf;
private CustomGlassPane customGlassPane;
private Container contentPane;
private Point topLeftPoint;
private Point bottomRightPoint;
public MouseEventsListener(final ExtractSelectionFromPdf extractSelectionFromPdf,
final CustomGlassPane customGlassPane,
final Container contentPane) {
this.extractSelectionFromPdf = extractSelectionFromPdf;
this.customGlassPane = customGlassPane;
this.contentPane = contentPane;
}
public void mousePressed(final MouseEvent mouseEvent) {
topLeftPoint = mouseEvent.getPoint();
redispatchMouseEvent(mouseEvent);
}
public void mouseDragged(final MouseEvent mouseEvent) {
bottomRightPoint = mouseEvent.getPoint();
redispatchMouseEvent(mouseEvent, topLeftPoint != null, false);
}
public void mouseReleased(final MouseEvent mouseEvent) {
bottomRightPoint = mouseEvent.getPoint();
redispatchMouseEvent(mouseEvent, true, true);
}
public void mouseMoved(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent);
}
public void mouseClicked(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent);
}
public void mouseEntered(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent);
}
public void mouseExited(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent);
}
private void redispatchMouseEvent(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent, false, false);
}
private void redispatchMouseEvent(final MouseEvent mouseEvent,
final boolean repaint,
final boolean extract) {
final Point glassPanePoint = mouseEvent.getPoint();
final Point containerPoint = SwingUtilities.convertPoint(customGlassPane,
glassPanePoint,
contentPane);
if (containerPoint.y >= 0) {
final Component component
= SwingUtilities.getDeepestComponentAt(contentPane,
containerPoint.x,
containerPoint.y);
if (component != null) {
final Point componentPoint
= SwingUtilities.convertPoint(customGlassPane,
glassPanePoint,
component);
// Forward events to the component under the glass pane.
component.dispatchEvent(new MouseEvent(component,
mouseEvent.getID(),
mouseEvent.getWhen(),
mouseEvent.getModifiers(),
componentPoint.x,
componentPoint.y,
mouseEvent.getClickCount(),
mouseEvent.isPopupTrigger()));
}
}
// Update the glass pane if requested.
if (repaint) {
if (extract) {
extractSelectionFromPdf.handleSelection(topLeftPoint, bottomRightPoint);
topLeftPoint = null;
bottomRightPoint = null;
}
customGlassPane.setSelection(topLeftPoint, bottomRightPoint);
customGlassPane.repaint();
}
}
}
上面代码的玻璃面板部分的灵感来自 GlassPaneDemo
example。
以上代码中已知的遗留问题:
- 出于某种原因,在页面 Up/Down 和箭头 Up/Down 键起作用之前,必须单击一次 pdf 查看器的向下滚动按钮。
- 当前实际提取的文本似乎在所选矩形下方。
经过多次尝试,我确信 "GlassPane" 不是我的应用程序或任何类似应用程序的正确解决方案。因为:
它不能在特定区域或组件之上。
它可用仅用于根窗格..
阅读 PDF 文件的最佳方法是将其转换为图片,然后在平板电脑上阅读ImagePane.
在这种情况下不需要OCR[=33=]..
我现在正在研究另一个解决方案,一切顺利。
如果有人在做这样的项目,请评论、标记或升级问题,我准备好了解任何细节。
我正在开发一个程序,可以从 PDF 文件中 extract texts特定区域,我正在使用 java 和 iText 库。
import java.io.IOException;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.FilteredTextRenderListener;
import com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
import com.itextpdf.text.pdf.parser.RegionTextRenderFilter;
import com.itextpdf.text.pdf.parser.RenderFilter;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
/**
* Créer par Malek Boubakri le 03/06/2015 à 15:45.
*/
public class ExtractPageContentArea {
//
public void parsePdf(float x,float y,float width,float height,String pdf) throws IOException {
PdfReader reader = new PdfReader(pdf);
Rectangle rect = new Rectangle(x, y, width, height);
RenderFilter filter = new RegionTextRenderFilter(rect);
TextExtractionStrategy strategy;
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
strategy = new FilteredTextRenderListener(new LocationTextExtractionStrategy(), filter);
System.out.println(PdfTextExtractor.getTextFromPage(reader, i, strategy));
}
reader.close();
}
}
并且此代码可以绘制矩形并保存所需的坐标使用:
import java.awt.BorderLayout;
import java.awt.Graphics;
import java.awt.Rectangle;
import java.awt.event.MouseEvent;
import java.awt.event.MouseListener;
import java.awt.event.MouseMotionListener;
import java.util.ArrayList;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.SwingConstants;
public class MouseTracker extends JFrame implements MouseListener, MouseMotionListener {
private static final long serialVersionUID = 1L;
private final JLabel mousePosition;
int x1, x2, y1, y2;
int w, h;
private final JLabel recStart;
private final JLabel recStop;
private final JLabel cords; // set up GUI and register mouse event handlers
private final ArrayList< Rectangle > rectangles = new ArrayList< Rectangle >();
private boolean isNewRect = true;
public MouseTracker() {
super( "Rectangle Drawer" );
this.mousePosition = new JLabel();
this.mousePosition.setHorizontalAlignment( SwingConstants.CENTER );
getContentPane().add( this.mousePosition, BorderLayout.CENTER );
JLabel text1 = new JLabel();
text1.setText( "At the center the mouse pointer's coordinates will be displayed." );
getContentPane().add( text1, BorderLayout.SOUTH );
this.recStart = new JLabel();
getContentPane().add( this.recStart, BorderLayout.WEST );
this.recStop = new JLabel();
getContentPane().add( this.recStop, BorderLayout.EAST );
this.cords = new JLabel();
getContentPane().add( this.cords, BorderLayout.NORTH );
addMouseListener( this ); // listens for own mouse and
addMouseMotionListener( this ); // mouse-motion events
setSize( 800, 600 );
setVisible( true );
}
// MouseListener event handlers // handle event when mouse released immediately after press
public void mouseClicked( final MouseEvent event ) {
this.mousePosition.setText( "Clicked at [" + event.getX() + ", " + event.getY() + "]" );
repaint();
}
// handle event when mouse pressed
public void mousePressed( final MouseEvent event ) {
this.mousePosition.setText( "Pressed at [" + ( this.x1 = event.getX() ) + ", " + ( this.y1 = event.getY() ) + "]" );
this.recStart.setText( "Start: [" + this.x1 + ", " + this.y1 + "]" );
repaint();
}
// handle event when mouse released after dragging
public void mouseReleased( final MouseEvent event ) {
this.mousePosition.setText( "Released at [" + ( this.x2 = event.getX() ) + ", " + ( this.y2 = event.getY() ) + "]" );
this.recStop.setText( "End: [" + this.x2 + ", " + this.y2 + "]" );
Rectangle rectangle = getRectangleFromPoints();
this.rectangles.add( rectangle );
this.w = this.h = this.x1 = this.y1 = this.x2 = this.y2 = 0;
this.isNewRect = true;
repaint();
}
private Rectangle getRectangleFromPoints() {
int width = this.x1 - this.x2;
int height = this.y1 - this.y2;
Rectangle rectangle = new Rectangle( width < 0 ? this.x1
: this.x2, height < 0 ? this.y1
: this.y2, Math.abs( width ), Math.abs( height ) );
return rectangle;
}
// handle event when mouse enters area
public void mouseEntered( final MouseEvent event ) {
this.mousePosition.setText( "Mouse entered at [" + event.getX() + ", " + event.getY() + "]" );
repaint();
}
// handle event when mouse exits area
public void mouseExited( final MouseEvent event ) {
this.mousePosition.setText( "Mouse outside window" );
repaint();
}
// MouseMotionListener event handlers // handle event when user drags mouse with button pressed
public void mouseDragged( final MouseEvent event ) {
this.mousePosition.setText( "Dragged at [" + ( this.x2 = event.getX() ) + ", " + ( this.y2 = event.getY() ) + "]" ); // call repaint which calls paint repaint();
this.isNewRect = false;
repaint();
}
// handle event when user moves mouse
public void mouseMoved( final MouseEvent event ) {
this.mousePosition.setText( "Moved at [" + event.getX() + ", " + event.getY() + "]" );
repaint();
}
@Override
public void paint( final Graphics g ) {
super.paint( g ); // clear the frame surface
g.drawString( "Start Rec Here", this.x1, this.y1 );
g.drawString( "End Rec Here", this.x2, this.y2 );
Rectangle newRectangle = getRectangleFromPoints();
if ( !this.isNewRect ) {
g.drawRect( newRectangle.x, newRectangle.y, newRectangle.width, newRectangle.height );
}
for( Rectangle rectangle : this.rectangles ) {
g.drawRect( rectangle.x, rectangle.y, rectangle.width, rectangle.height );
}
this.cords.setText( "w = " + this.w + ", h = " + this.h );
}
public static void main( final String args[] ) {
MouseTracker application = new MouseTracker();
application.setDefaultCloseOperation( JFrame.EXIT_ON_CLOSE );
}
}
我想使用这些坐标来指定 PDF 文件中的区域,我真的不知道如何合并这两个功能,如何将 绘图 space 放在文档上方 以及如何使矩形坐标与文本坐标相匹配。
如何在另一个面板上方绘制?
我应该将 PDF 转换为图像并将其放在后面吗?
如果我应该的话,请任何人推荐一个好的免费 OCR 库!
如果有任何模糊之处,请发表评论! 谁能把我放在路上!因为我真的迷路了。
等待您的帮助..谢谢(抱歉我的英语不好)
你有一个非常有趣的问题和一个具有挑战性的项目。此 "answer" 可能会提供一些有用的想法,但它不是一个完整的解决方案。
您可以使用所谓的 glass pane 在其他组件之上绘制。
我认为您需要决定的最重要的事情是哪些库最适合您的项目。 iText library 非常好,提供各种 pdf 功能,例如您在问题中显示的文本提取。
但是,据我所知,在 iText 中不支持查看 pdf。您可以使用像 ICEpdf for this (see this example) 这样的库。如果 ICEpdf 也可以支持文本提取,那就太好了,这样您就可以使用一个库,而不是让 ICEpdf 与 iText 或 OCR 一起工作(并处理诸如在 ICEpdf 中缩放 pdf 并在获取文本时进行补偿等问题).
我不确定您是否可以使用 ICEpdf 提取文本,因此在下面的示例代码中目前仍使用 iText:
// File ExtractSelectionFromPdf.java
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.*;
import java.awt.Container;
import java.awt.Point;
import java.io.IOException;
import javax.swing.*;
public class ExtractSelectionFromPdf {
private static String filePath = "[file path to a pdf file]";
private PdfViewer pdfViewer;
public static void main(final String[] arguments) {
SwingUtilities.invokeLater(() -> new ExtractSelectionFromPdf().launchGUI());
}
private void launchGUI() {
final JFrame frame = new JFrame("Extract selected text from a pdf");
frame.setDefaultCloseOperation(WindowConstants.EXIT_ON_CLOSE);
final Container contentPane = frame.getContentPane();
pdfViewer = new PdfViewer();
contentPane.add(pdfViewer);
pdfViewer.openDocument(filePath);
final CustomGlassPane customGlassPane = new CustomGlassPane(this, contentPane);
frame.setGlassPane(customGlassPane);
customGlassPane.setVisible(true);
frame.setBounds(60, 10, 1800, 1000);
frame.setVisible(true);
}
public void handleSelection(final Point topLeft, final Point bottomRight) {
final int width = bottomRight.x - topLeft.x;
final int height = bottomRight.y - topLeft.x;
final String text = parsePdf(topLeft.x, topLeft.y, width, height, filePath);
System.out.println("text: " + text);
}
public String parsePdf(final int x, final int y, final int width, final int height,
final String pdfFilePath) {
String text = null;
try {
final PdfReader pdfReader = new PdfReader(pdfFilePath);
final int pageNumber = pdfViewer.getCurrentPageNumber() + 1;
System.out.println("Page number: " + pageNumber);
final Rectangle selection = new Rectangle(x, y, width, height);
final RenderFilter renderFilter = new RegionTextRenderFilter(selection);
final LocationTextExtractionStrategy delegate
= new LocationTextExtractionStrategy();
final TextExtractionStrategy extractionStrategy
= new FilteredTextRenderListener(delegate, renderFilter);
text = PdfTextExtractor.getTextFromPage(pdfReader, pageNumber,
extractionStrategy);
pdfReader.close();
} catch (final IOException e) {
e.printStackTrace();
}
return text;
}
}
// File PdfViewer.java
import java.util.ResourceBundle;
import javax.swing.*;
import org.icepdf.ri.common.*;
import org.icepdf.ri.common.views.DocumentViewController;
import org.icepdf.ri.util.PropertiesManager;
public class PdfViewer extends JPanel {
private final SwingController controller;
public PdfViewer() {
controller = new SwingController();
controller.setIsEmbeddedComponent(true);
final String bundleName = PropertiesManager.DEFAULT_MESSAGE_BUNDLE;
final ResourceBundle messageBundle = ResourceBundle.getBundle(bundleName);
final Properties systemProperties = System.getProperties();
final PropertiesManager properties = new PropertiesManager(systemProperties,
messageBundle);
properties.set(PropertiesManager.PROPERTY_DEFAULT_ZOOM_LEVEL, "1");
final SwingViewBuilder factory = new SwingViewBuilder(controller, properties);
final DocumentViewController viewController
= controller.getDocumentViewController();
viewController.setAnnotationCallback(new MyAnnotationCallback(viewController));
final JScrollPane scrollPane = new JScrollPane(factory.buildViewerPanel());
final int horizontalPolicy = ScrollPaneConstants.HORIZONTAL_SCROLLBAR_ALWAYS;
final int verticalPolicy = ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS;
scrollPane.setHorizontalScrollBarPolicy(horizontalPolicy);
scrollPane.setVerticalScrollBarPolicy(verticalPolicy);
add(scrollPane);
}
public void openDocument(final String filePath) {
controller.openDocument(filePath);
}
public int getCurrentPageNumber() {
return controller.getCurrentPageNumber();
}
}
// File CustomGlassPane.java
import java.awt.*;
import javax.swing.JComponent;
public class CustomGlassPane extends JComponent {
private Point topLeftPoint;
private Point bottomRightPoint;
public CustomGlassPane(final ExtractSelectionFromPdf extractSelectionFromPdf,
final Container contentPane) {
final MouseEventsListener listener
= new MouseEventsListener(extractSelectionFromPdf, this, contentPane);
addMouseListener(listener);
addMouseMotionListener(listener);
}
public void setSelection(final Point topLeftPoint, final Point bottomRightPoint) {
this.topLeftPoint = topLeftPoint;
this.bottomRightPoint = bottomRightPoint;
}
protected void paintComponent(final Graphics graphics) {
if (topLeftPoint != null && bottomRightPoint != null) {
graphics.setColor(Color.BLACK);
graphics.drawRect(topLeftPoint.x,
topLeftPoint.y,
bottomRightPoint.x - topLeftPoint.x,
bottomRightPoint.y - topLeftPoint.y);
}
}
}
// File MouseEventsListener.java
import java.awt.*;
import java.awt.event.MouseEvent;
import javax.swing.SwingUtilities;
import javax.swing.event.MouseInputAdapter;
public class MouseEventsListener extends MouseInputAdapter {
private ExtractSelectionFromPdf extractSelectionFromPdf;
private CustomGlassPane customGlassPane;
private Container contentPane;
private Point topLeftPoint;
private Point bottomRightPoint;
public MouseEventsListener(final ExtractSelectionFromPdf extractSelectionFromPdf,
final CustomGlassPane customGlassPane,
final Container contentPane) {
this.extractSelectionFromPdf = extractSelectionFromPdf;
this.customGlassPane = customGlassPane;
this.contentPane = contentPane;
}
public void mousePressed(final MouseEvent mouseEvent) {
topLeftPoint = mouseEvent.getPoint();
redispatchMouseEvent(mouseEvent);
}
public void mouseDragged(final MouseEvent mouseEvent) {
bottomRightPoint = mouseEvent.getPoint();
redispatchMouseEvent(mouseEvent, topLeftPoint != null, false);
}
public void mouseReleased(final MouseEvent mouseEvent) {
bottomRightPoint = mouseEvent.getPoint();
redispatchMouseEvent(mouseEvent, true, true);
}
public void mouseMoved(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent);
}
public void mouseClicked(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent);
}
public void mouseEntered(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent);
}
public void mouseExited(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent);
}
private void redispatchMouseEvent(final MouseEvent mouseEvent) {
redispatchMouseEvent(mouseEvent, false, false);
}
private void redispatchMouseEvent(final MouseEvent mouseEvent,
final boolean repaint,
final boolean extract) {
final Point glassPanePoint = mouseEvent.getPoint();
final Point containerPoint = SwingUtilities.convertPoint(customGlassPane,
glassPanePoint,
contentPane);
if (containerPoint.y >= 0) {
final Component component
= SwingUtilities.getDeepestComponentAt(contentPane,
containerPoint.x,
containerPoint.y);
if (component != null) {
final Point componentPoint
= SwingUtilities.convertPoint(customGlassPane,
glassPanePoint,
component);
// Forward events to the component under the glass pane.
component.dispatchEvent(new MouseEvent(component,
mouseEvent.getID(),
mouseEvent.getWhen(),
mouseEvent.getModifiers(),
componentPoint.x,
componentPoint.y,
mouseEvent.getClickCount(),
mouseEvent.isPopupTrigger()));
}
}
// Update the glass pane if requested.
if (repaint) {
if (extract) {
extractSelectionFromPdf.handleSelection(topLeftPoint, bottomRightPoint);
topLeftPoint = null;
bottomRightPoint = null;
}
customGlassPane.setSelection(topLeftPoint, bottomRightPoint);
customGlassPane.repaint();
}
}
}
上面代码的玻璃面板部分的灵感来自 GlassPaneDemo
example。
以上代码中已知的遗留问题:
- 出于某种原因,在页面 Up/Down 和箭头 Up/Down 键起作用之前,必须单击一次 pdf 查看器的向下滚动按钮。
- 当前实际提取的文本似乎在所选矩形下方。
经过多次尝试,我确信 "GlassPane" 不是我的应用程序或任何类似应用程序的正确解决方案。因为:
它不能在特定区域或组件之上。
它可用仅用于根窗格..
阅读 PDF 文件的最佳方法是将其转换为图片,然后在平板电脑上阅读ImagePane.
在这种情况下不需要OCR[=33=]..
我现在正在研究另一个解决方案,一切顺利。 如果有人在做这样的项目,请评论、标记或升级问题,我准备好了解任何细节。