如何在使用 Apache POI 阅读 Word 文档时获取脚注超链接?
How to get footnote hyperlink while reading a Word document using Apache POI?
我正在使用 Apache POI 将 Word 文档转换为 HTML。我有一个包含外部 hyperlink 的脚注的 Word 文档。我无法获得那个 hyperlink 的 hyperlink URL。这是我的代码:
List<CTHyperlink> links = paragraph.getCTP().getHyperlinkList();
log.debug("Count of hyperlinks="+links.size());
for (CTHyperlink ctHyperlink : links) {
String rId = ctHyperlink.getId();
log.debug("rid="+rId);
XWPFHyperlink link = document.getHyperlinkByID(rId);
if(link!=null) {
log.debug("link not NULL");
}else {
log.debug("link is NULL");
}
}
从上面的代码中,我看到在我的例子中,hyperlinks 的计数是 2。我得到的 rId 正确为“rId1”和“rId2”,但是 link总是以 NULL 形式出现。
在 OOXML 中,我看到文档中的 hyperlinks 存储在包名称“/word/_rels/document.xml.rels”中,而 hyperlinks 在脚注存储在包名称“/word/_rels/footnotes.xml.rels”中。可能这就是为什么我的 link 变量变为 NULL 的原因。但是我不确定如何从脚注关系包中获取 hyperlink 元素。
你是对的。如果代码段中的 paragraph
在 XWPFAbstractFootnoteEndnote
中,那么它在包部分 /word/footnotes.xml
或 /word/endnotes.xml
中,而不是在 /word/document.xml
中。而XWPFDocument.getHyperlinkByID
只获取存储在/word/document.xml
.
中的超链接
解决方案取决于代码段中 paragraph
的来源。这个你没有显示。
但最简单的解决方案是从 XWPFParagraph
中获取 XWPFHyperlinkRun
,然后从 XWPFHyperlinkRun
中获取 XWPFHyperlink
。如果 XWPFHyperlinkRun
的父包部分不是 XWPFDocument
,则必须使用基础 PackageRelationship
完成此操作,因为直到现在 XWPFDocument
才存在超链接列表。
在 Unable to read all content in order of a word document (docx) in Apache POI 中,我展示了如何遍历 Word
文档的基本示例。我现在扩展了这段代码以遍历脚注和尾注以及页眉和页脚并处理找到的 XWPFHyperlinkRun
s.
示例:
import java.io.FileInputStream;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import java.util.List;
public class WordTraverseAll {
static void traversePictures(List<XWPFPicture> pictures) throws Exception {
for (XWPFPicture picture : pictures) {
System.out.println(picture);
XWPFPictureData pictureData = picture.getPictureData();
System.out.println(pictureData);
}
}
static void traverseComments(XWPFRun run) throws Exception {
CTMarkup comr = null;
if (run.getCTR().getCommentReferenceList().size() > 0) {
comr = run.getCTR().getCommentReferenceList().get(0);
}
if (comr != null) {
XWPFComment comment = run.getDocument().getCommentByID(String.valueOf(comr.getId().intValue()));
System.out.println("Comment from " + comment.getAuthor() + ": " + comment.getText());
}
}
static void traverseFootnotes(XWPFRun run) throws Exception {
CTFtnEdnRef ftn = null;
if (run.getCTR().getFootnoteReferenceList().size() > 0) {
ftn = run.getCTR().getFootnoteReferenceList().get(0);
} else if (run.getCTR().getEndnoteReferenceList().size() > 0) {
ftn = run.getCTR().getEndnoteReferenceList().get(0);
}
if (ftn != null) {
XWPFAbstractFootnoteEndnote footnote =
ftn.getDomNode().getLocalName().equals("footnoteReference") ?
run.getDocument().getFootnoteByID(ftn.getId().intValue()) :
run.getDocument().getEndnoteByID(ftn.getId().intValue());
for (XWPFParagraph paragraph : footnote.getParagraphs()) {
traverseRunElements(paragraph.getIRuns());
}
}
}
static void traverseRunElements(List<IRunElement> runElements) throws Exception {
for (IRunElement runElement : runElements) {
if (runElement instanceof XWPFFieldRun) {
XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
//System.out.println(fieldRun.getClass().getName());
System.out.println(fieldRun);
traversePictures(fieldRun.getEmbeddedPictures());
} else if (runElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
//System.out.println(hyperlinkRun.getClass().getName());
String rId = hyperlinkRun.getHyperlinkId();
XWPFHyperlink hyperlink = null;
if (hyperlinkRun.getParent().getPart() instanceof XWPFAbstractFootnotesEndnotes) {
PackageRelationship rel = hyperlinkRun.getParent().getPart().getPackagePart().getRelationships().getRelationshipByID(rId);
hyperlink = new XWPFHyperlink(rId, rel.getTargetURI().toString());
} else if (hyperlinkRun.getParent().getPart() instanceof XWPFHeaderFooter) {
PackageRelationship rel = hyperlinkRun.getParent().getPart().getPackagePart().getRelationships().getRelationshipByID(rId);
hyperlink = new XWPFHyperlink(rId, rel.getTargetURI().toString());
} else if (hyperlinkRun.getParent().getPart() instanceof XWPFDocument) {
hyperlink = hyperlinkRun.getDocument().getHyperlinkByID(rId);
}
System.out.print(hyperlinkRun);
if (hyperlink != null) System.out.println("->" + hyperlink.getURL());
traversePictures(hyperlinkRun.getEmbeddedPictures());
} else if (runElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
//System.out.println(run.getClass().getName());
System.out.println(run);
traverseFootnotes(run);
traverseComments(run);
traversePictures(run.getEmbeddedPictures());
} else if (runElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)runElement;
System.out.println(sDT);
System.out.println(sDT.getContent());
//ToDo: The SDT may have traversable content too.
}
}
}
static void traverseTableCells(List<ICell> tableICells) throws Exception {
for (ICell tableICell : tableICells) {
if (tableICell instanceof XWPFSDTCell) {
XWPFSDTCell sDTCell = (XWPFSDTCell)tableICell;
System.out.println(sDTCell);
//ToDo: The SDTCell may have traversable content too.
} else if (tableICell instanceof XWPFTableCell) {
XWPFTableCell tableCell = (XWPFTableCell)tableICell;
//System.out.println(tableCell);
traverseBodyElements(tableCell.getBodyElements());
}
}
}
static void traverseTableRows(List<XWPFTableRow> tableRows) throws Exception {
for (XWPFTableRow tableRow : tableRows) {
//System.out.println(tableRow);
traverseTableCells(tableRow.getTableICells());
}
}
static void traverseBodyElements(List<IBodyElement> bodyElements) throws Exception {
for (IBodyElement bodyElement : bodyElements) {
if (bodyElement instanceof XWPFParagraph) {
XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
//System.out.println(paragraph);
traverseRunElements(paragraph.getIRuns());
} else if (bodyElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)bodyElement;
System.out.println(sDT);
System.out.println(sDT.getContent());
//ToDo: The SDT may have traversable content too.
} else if (bodyElement instanceof XWPFTable) {
XWPFTable table = (XWPFTable)bodyElement;
//System.out.println(table);
traverseTableRows(table.getRows());
}
}
}
static void traverseHeaderFooterElements(XWPFDocument document) throws Exception {
for (XWPFHeader header : document.getHeaderList()) {
traverseBodyElements(header.getBodyElements());
}
for (XWPFFooter footer : document.getFooterList()) {
traverseBodyElements(footer.getBodyElements());
}
}
public static void main(String[] args) throws Exception {
XWPFDocument document = new XWPFDocument(new FileInputStream("WordHavingHyperlinks.docx"));
System.out.println("===== Document body elements =====");
traverseBodyElements(document.getBodyElements());
System.out.println("===== Header and footer elements =====");
traverseHeaderFooterElements(document);
document.close();
}
}
我正在使用 Apache POI 将 Word 文档转换为 HTML。我有一个包含外部 hyperlink 的脚注的 Word 文档。我无法获得那个 hyperlink 的 hyperlink URL。这是我的代码:
List<CTHyperlink> links = paragraph.getCTP().getHyperlinkList();
log.debug("Count of hyperlinks="+links.size());
for (CTHyperlink ctHyperlink : links) {
String rId = ctHyperlink.getId();
log.debug("rid="+rId);
XWPFHyperlink link = document.getHyperlinkByID(rId);
if(link!=null) {
log.debug("link not NULL");
}else {
log.debug("link is NULL");
}
}
从上面的代码中,我看到在我的例子中,hyperlinks 的计数是 2。我得到的 rId 正确为“rId1”和“rId2”,但是 link总是以 NULL 形式出现。
在 OOXML 中,我看到文档中的 hyperlinks 存储在包名称“/word/_rels/document.xml.rels”中,而 hyperlinks 在脚注存储在包名称“/word/_rels/footnotes.xml.rels”中。可能这就是为什么我的 link 变量变为 NULL 的原因。但是我不确定如何从脚注关系包中获取 hyperlink 元素。
你是对的。如果代码段中的 paragraph
在 XWPFAbstractFootnoteEndnote
中,那么它在包部分 /word/footnotes.xml
或 /word/endnotes.xml
中,而不是在 /word/document.xml
中。而XWPFDocument.getHyperlinkByID
只获取存储在/word/document.xml
.
解决方案取决于代码段中 paragraph
的来源。这个你没有显示。
但最简单的解决方案是从 XWPFParagraph
中获取 XWPFHyperlinkRun
,然后从 XWPFHyperlinkRun
中获取 XWPFHyperlink
。如果 XWPFHyperlinkRun
的父包部分不是 XWPFDocument
,则必须使用基础 PackageRelationship
完成此操作,因为直到现在 XWPFDocument
才存在超链接列表。
在 Unable to read all content in order of a word document (docx) in Apache POI 中,我展示了如何遍历 Word
文档的基本示例。我现在扩展了这段代码以遍历脚注和尾注以及页眉和页脚并处理找到的 XWPFHyperlinkRun
s.
示例:
import java.io.FileInputStream;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import java.util.List;
public class WordTraverseAll {
static void traversePictures(List<XWPFPicture> pictures) throws Exception {
for (XWPFPicture picture : pictures) {
System.out.println(picture);
XWPFPictureData pictureData = picture.getPictureData();
System.out.println(pictureData);
}
}
static void traverseComments(XWPFRun run) throws Exception {
CTMarkup comr = null;
if (run.getCTR().getCommentReferenceList().size() > 0) {
comr = run.getCTR().getCommentReferenceList().get(0);
}
if (comr != null) {
XWPFComment comment = run.getDocument().getCommentByID(String.valueOf(comr.getId().intValue()));
System.out.println("Comment from " + comment.getAuthor() + ": " + comment.getText());
}
}
static void traverseFootnotes(XWPFRun run) throws Exception {
CTFtnEdnRef ftn = null;
if (run.getCTR().getFootnoteReferenceList().size() > 0) {
ftn = run.getCTR().getFootnoteReferenceList().get(0);
} else if (run.getCTR().getEndnoteReferenceList().size() > 0) {
ftn = run.getCTR().getEndnoteReferenceList().get(0);
}
if (ftn != null) {
XWPFAbstractFootnoteEndnote footnote =
ftn.getDomNode().getLocalName().equals("footnoteReference") ?
run.getDocument().getFootnoteByID(ftn.getId().intValue()) :
run.getDocument().getEndnoteByID(ftn.getId().intValue());
for (XWPFParagraph paragraph : footnote.getParagraphs()) {
traverseRunElements(paragraph.getIRuns());
}
}
}
static void traverseRunElements(List<IRunElement> runElements) throws Exception {
for (IRunElement runElement : runElements) {
if (runElement instanceof XWPFFieldRun) {
XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
//System.out.println(fieldRun.getClass().getName());
System.out.println(fieldRun);
traversePictures(fieldRun.getEmbeddedPictures());
} else if (runElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
//System.out.println(hyperlinkRun.getClass().getName());
String rId = hyperlinkRun.getHyperlinkId();
XWPFHyperlink hyperlink = null;
if (hyperlinkRun.getParent().getPart() instanceof XWPFAbstractFootnotesEndnotes) {
PackageRelationship rel = hyperlinkRun.getParent().getPart().getPackagePart().getRelationships().getRelationshipByID(rId);
hyperlink = new XWPFHyperlink(rId, rel.getTargetURI().toString());
} else if (hyperlinkRun.getParent().getPart() instanceof XWPFHeaderFooter) {
PackageRelationship rel = hyperlinkRun.getParent().getPart().getPackagePart().getRelationships().getRelationshipByID(rId);
hyperlink = new XWPFHyperlink(rId, rel.getTargetURI().toString());
} else if (hyperlinkRun.getParent().getPart() instanceof XWPFDocument) {
hyperlink = hyperlinkRun.getDocument().getHyperlinkByID(rId);
}
System.out.print(hyperlinkRun);
if (hyperlink != null) System.out.println("->" + hyperlink.getURL());
traversePictures(hyperlinkRun.getEmbeddedPictures());
} else if (runElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
//System.out.println(run.getClass().getName());
System.out.println(run);
traverseFootnotes(run);
traverseComments(run);
traversePictures(run.getEmbeddedPictures());
} else if (runElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)runElement;
System.out.println(sDT);
System.out.println(sDT.getContent());
//ToDo: The SDT may have traversable content too.
}
}
}
static void traverseTableCells(List<ICell> tableICells) throws Exception {
for (ICell tableICell : tableICells) {
if (tableICell instanceof XWPFSDTCell) {
XWPFSDTCell sDTCell = (XWPFSDTCell)tableICell;
System.out.println(sDTCell);
//ToDo: The SDTCell may have traversable content too.
} else if (tableICell instanceof XWPFTableCell) {
XWPFTableCell tableCell = (XWPFTableCell)tableICell;
//System.out.println(tableCell);
traverseBodyElements(tableCell.getBodyElements());
}
}
}
static void traverseTableRows(List<XWPFTableRow> tableRows) throws Exception {
for (XWPFTableRow tableRow : tableRows) {
//System.out.println(tableRow);
traverseTableCells(tableRow.getTableICells());
}
}
static void traverseBodyElements(List<IBodyElement> bodyElements) throws Exception {
for (IBodyElement bodyElement : bodyElements) {
if (bodyElement instanceof XWPFParagraph) {
XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
//System.out.println(paragraph);
traverseRunElements(paragraph.getIRuns());
} else if (bodyElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)bodyElement;
System.out.println(sDT);
System.out.println(sDT.getContent());
//ToDo: The SDT may have traversable content too.
} else if (bodyElement instanceof XWPFTable) {
XWPFTable table = (XWPFTable)bodyElement;
//System.out.println(table);
traverseTableRows(table.getRows());
}
}
}
static void traverseHeaderFooterElements(XWPFDocument document) throws Exception {
for (XWPFHeader header : document.getHeaderList()) {
traverseBodyElements(header.getBodyElements());
}
for (XWPFFooter footer : document.getFooterList()) {
traverseBodyElements(footer.getBodyElements());
}
}
public static void main(String[] args) throws Exception {
XWPFDocument document = new XWPFDocument(new FileInputStream("WordHavingHyperlinks.docx"));
System.out.println("===== Document body elements =====");
traverseBodyElements(document.getBodyElements());
System.out.println("===== Header and footer elements =====");
traverseHeaderFooterElements(document);
document.close();
}
}