通过使用 poi 将内容插入新页面来合并文档
Merge documents by inserting content into a new page using poi
我有几个文档,我想将它们全部合并到一个 docx 文件中。
我的代码:
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
public class WordMerge {
private final OutputStream result;
private final List<InputStream> inputs;
private XWPFDocument first;
public WordMerge(OutputStream result) {
this.result = result;
inputs = new ArrayList<>();
}
public void add(InputStream stream) throws Exception{
inputs.add(stream);
OPCPackage srcPackage = OPCPackage.open(stream);
XWPFDocument src1Document = new XWPFDocument(srcPackage);
if(inputs.size() == 1){
first = src1Document;
} else {
CTBody srcBody = src1Document.getDocument().getBody();
first.getDocument().addNewBody().set(srcBody);
}
}
public void doMerge() throws Exception{
first.write(result);
}
public void close() throws Exception{
result.flush();
result.close();
for (InputStream input : inputs) {
input.close();
}
}
}
它使用:
public static void main(String[] args) throws Exception {
FileOutputStream faos = new FileOutputStream("/home/victor/result.docx");
WordMerge wm = new WordMerge(faos);
wm.add( new FileInputStream("/home/victor/001.docx") );
wm.add( new FileInputStream("/home/victor/002.docx") );
wm.doMerge();
wm.close();
}
它有效,不幸的是,如果您在任何非第一个文档中都有列表,它会变得有点混乱。列表符号变为数字,更糟糕的是,有时前一个文档中的列表将在附加文档中继续。假设 doc1 有 a.b.c 列表,第二个有未排序的列表,那么后一个变成 d.e.f。 (沿袭了之前的文档格式。)
如何让每个文档合并到下一页,而不是按照上一个文档的格式?
您的代码仅将多个 CTBody
元素附加到文档中。但这不是 Word 文档的结构。 “它有效”是因为 Microsoft Word 具有足够的宽容度来解释它。但是当涉及到 Word 文档结构中的引用时,它会失败。
例如,编号定义由 ID 引用。对于不同文档中的不同定义,这可以是相同的 ID。因此,第一个文档中的 ID 1 可能指向十进制编号,而第二个文档中的 ID 1 可能指向项目符号编号。所以numIDs需要合并,而不仅仅是复制。
嵌入式媒体(例如图像)由 rID 引用。所以 CTBody
只包含 ID。媒体本身存储在文档主体之外。因此,如果文档主体引用具有 rID12 的图片并且未存储该图片,则文档已损坏。
许多其他文档元素也是如此。
所以那个方法根本没用。
需要遍历文档的所有body元素,需要追加。然后将找到的每个正文元素附加到第一个文档并更新它的引用。
以下是展示原理的工作稿。它还没有准备好。例如,它不考虑超链接、脚注、评论、结构化文档标签等等。正如您从所需的大量代码中看到的那样,考虑所有可能的事情将是一项非常费力的任务。为了避免更多的代码,如果可能的话,我只是简单地复制底层的 XML bean。这也应该更好地形成以用于生产用途。但是从这段代码中应该可以看出原理。
当方法和变量的名称没有自我解释时,代码被注释了。
代码已经过测试,可以使用当前 apache poi 5.1.0
运行。以前的版本未经测试,也不应使用,因为它们对 XWPF
.
的支持更少
代码需要 Apache POI FAQ 中提到的所有模式的完整 jar。
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.poi.util.Units;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.math.BigInteger;
public class WordMerger {
private Map<BigInteger, BigInteger> numIDs = null; // to handle merging numID
public WordMerger() {
this.numIDs= new HashMap<BigInteger, BigInteger>();
}
private void traverseBodyElements(List<IBodyElement> bodyElements, IBody resultBody) throws Exception {
for (IBodyElement bodyElement : bodyElements) {
if (bodyElement instanceof XWPFParagraph) {
XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
XWPFParagraph resultParagraph = createParagraphWithPPr(paragraph, resultBody);
traverseRunElements(paragraph.getIRuns(), resultParagraph);
} else if (bodyElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)bodyElement;
XWPFSDT resultSDT = createSDT(sDT, resultBody);
//ToDo: handle further issues ...
} else if (bodyElement instanceof XWPFTable) {
XWPFTable table = (XWPFTable)bodyElement;
XWPFTable resultTable = createTableWithTblPrAndTblGrid(table, resultBody);
traverseTableRows(table.getRows(), resultTable);
}
}
}
private XWPFSDT createSDT(XWPFSDT sDT, IBody resultBody) {
//not ready yet
//we simply add paragraphs to avoid corruped documents
if (resultBody instanceof XWPFDocument) {
XWPFDocument resultDocument = (XWPFDocument)resultBody;
XWPFParagraph resultParagraph = resultDocument.createParagraph();
//ToDo: handle further issues ...
} else if (resultBody instanceof XWPFTableCell) {
XWPFTableCell resultTableCell = (XWPFTableCell)resultBody;
XWPFParagraph resultParagraph = resultTableCell.addParagraph();
//ToDo: handle further issues ...
} //ToDo: else others ...
//ToDo: handle SDT properly
return null;
}
private XWPFParagraph createParagraphWithPPr(XWPFParagraph paragraph, IBody resultBody) {
if (resultBody instanceof XWPFDocument) {
XWPFDocument resultDocument = (XWPFDocument)resultBody;
XWPFParagraph resultParagraph = resultDocument.createParagraph();
resultParagraph.getCTP().setPPr(paragraph.getCTP().getPPr());//simply copy the underlying XML bean to avoid more code
handleStyles(resultDocument, paragraph);
handleNumberings(paragraph, resultParagraph);
//ToDo: handle further issues ...
return resultParagraph;
} else if (resultBody instanceof XWPFTableCell) {
XWPFTableCell resultTableCell = (XWPFTableCell)resultBody;
XWPFParagraph resultParagraph = resultTableCell.addParagraph();
resultParagraph.getCTP().setPPr(paragraph.getCTP().getPPr());//simply copy the underlying XML bean to avoid more code
handleStyles(resultTableCell, paragraph);
//ToDo: handle further issues ...
return resultParagraph;
} //ToDo: else others ...
return null;
}
private void handleNumberings(XWPFParagraph paragraph, XWPFParagraph resultParagraph) {
//if we have numberings, we need merging the numIDs and abstract numberings of the two different documents
BigInteger numID = paragraph.getNumID();
if (numID == null) return;
BigInteger resultNumID = this.numIDs.get(numID);
if (resultNumID == null) {
XWPFDocument document = paragraph.getDocument();
XWPFNumbering numbering = document.createNumbering();
XWPFNum num = numbering.getNum(numID);
BigInteger abstractNumID = numbering.getAbstractNumID(numID);
XWPFAbstractNum abstractNum = numbering.getAbstractNum(abstractNumID);
XWPFAbstractNum resultAbstractNum = new XWPFAbstractNum((org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum)abstractNum.getCTAbstractNum().copy());
XWPFDocument resultDocument = resultParagraph.getDocument();
XWPFNumbering resultNumbering = resultDocument.createNumbering();
int pos = resultNumbering.getAbstractNums().size();
resultAbstractNum.getCTAbstractNum().setAbstractNumId(BigInteger.valueOf(pos));
BigInteger resultAbstractNumID = resultNumbering.addAbstractNum(resultAbstractNum);
resultNumID = resultNumbering.addNum(resultAbstractNumID);
XWPFNum resultNum = resultNumbering.getNum(resultNumID);
resultNum.getCTNum().setLvlOverrideArray(num.getCTNum().getLvlOverrideArray());
this.numIDs.put(numID, resultNumID);
}
resultParagraph.setNumID(resultNumID);
}
private void handleStyles(IBody resultBody, IBodyElement bodyElement) {
//if we have bodyElement styles we need merging those styles of the two different documents
XWPFDocument document = null;
String styleID = null;
if (bodyElement instanceof XWPFParagraph) {
XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
document = paragraph.getDocument();
styleID = paragraph.getStyleID();
} else if (bodyElement instanceof XWPFTable) {
XWPFTable table = (XWPFTable)bodyElement;
if (table.getPart() instanceof XWPFDocument) {
document = (XWPFDocument)table.getPart();
styleID = table.getStyleID();
}
} //ToDo: else others ...
if (document == null || styleID == null || "".equals(styleID)) return;
XWPFDocument resultDocument = null;
if (resultBody instanceof XWPFDocument) {
resultDocument = (XWPFDocument)resultBody;
} else if (resultBody instanceof XWPFTableCell) {
XWPFTableCell resultTableCell = (XWPFTableCell)resultBody;
resultDocument = resultTableCell.getXWPFDocument();
} //ToDo: else others ...
if (resultDocument != null) {
XWPFStyles styles = document.getStyles();
XWPFStyles resultStyles = resultDocument.getStyles();
XWPFStyle style = styles.getStyle(styleID);
//merge each used styles, also the related ones
for (XWPFStyle relatedStyle : styles.getUsedStyleList(style)) {
if (resultStyles.getStyle(relatedStyle.getStyleId()) == null) {
resultStyles.addStyle(relatedStyle);
}
}
}
}
private XWPFTable createTableWithTblPrAndTblGrid(XWPFTable table, IBody resultBody) {
if (resultBody instanceof XWPFDocument) {
XWPFDocument resultDocument = (XWPFDocument)resultBody;
XWPFTable resultTable = resultDocument.createTable();
resultTable.removeRow(0);
resultTable.getCTTbl().setTblPr(table.getCTTbl().getTblPr());//simply copy the underlying XML bean to avoid more code
resultTable.getCTTbl().setTblGrid(table.getCTTbl().getTblGrid());//simply copy the underlying XML bean to avoid more code
handleStyles(resultDocument, table);
//ToDo: handle further issues ...
return resultTable;
} else if (resultBody instanceof XWPFTableCell) {
//ToDo: handle stacked tables
} //ToDo: else others ...
return null;
}
private void traverseRunElements(List<IRunElement> runElements, IRunBody resultRunBody) throws Exception {
for (IRunElement runElement : runElements) {
if (runElement instanceof XWPFFieldRun) {
XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
XWPFFieldRun resultFieldRun = createFieldRunWithRPr(fieldRun, resultRunBody);
traversePictures(fieldRun, resultFieldRun);
} else if (runElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
XWPFHyperlinkRun resultHyperlinkRun = createHyperlinkRunWithRPr(hyperlinkRun, resultRunBody);
traversePictures(hyperlinkRun, resultHyperlinkRun);
} else if (runElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
XWPFRun resultRun = createRunWithRPr(run, resultRunBody);
traversePictures(run, resultRun);
} else if (runElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)runElement;
//ToDo: handle SDT
}
}
}
private void copyTextOfRuns(XWPFRun run, XWPFRun resultRun) {
//copy all of the possible T contents of the runs
for (int i = 0; i < run.getCTR().sizeOfTArray(); i++) {
resultRun.setText(run.getText(i), i);
}
}
private XWPFFieldRun createFieldRunWithRPr(XWPFFieldRun fieldRun, IRunBody resultRunBody) {
if (resultRunBody instanceof XWPFParagraph) {
XWPFParagraph resultParagraph = (XWPFParagraph)resultRunBody;
XWPFFieldRun resultFieldRun = (XWPFFieldRun)resultParagraph.createRun();
resultFieldRun.getCTR().setRPr(fieldRun.getCTR().getRPr());//simply copy the underlying XML bean to avoid more code
//ToDo: handle field runs properly ...
handleRunStyles(resultParagraph.getDocument(), fieldRun);
//ToDo: handle further issues ...
return resultFieldRun;
} else if (resultRunBody instanceof XWPFSDT) {
//ToDo: handle SDT
}
return null;
}
private XWPFHyperlinkRun createHyperlinkRunWithRPr(XWPFHyperlinkRun hyperlinkRun, IRunBody resultRunBody) {
if (resultRunBody instanceof XWPFParagraph) {
XWPFParagraph resultParagraph = (XWPFParagraph)resultRunBody;
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink resultCTHyperLink = resultParagraph.getCTP().addNewHyperlink();
resultCTHyperLink.addNewR();
XWPFHyperlinkRun resultHyperlinkRun = new XWPFHyperlinkRun(resultCTHyperLink, resultCTHyperLink.getRArray(0), resultParagraph);
if (hyperlinkRun.getAnchor() != null) {
resultHyperlinkRun = resultParagraph.createHyperlinkRun(hyperlinkRun.getAnchor());
}
resultHyperlinkRun.getCTR().setRPr(hyperlinkRun.getCTR().getRPr());//simply copy the underlying XML bean to avoid more code
copyTextOfRuns(hyperlinkRun, resultHyperlinkRun);
//ToDo: handle external hyperlink runs properly ...
handleRunStyles(resultParagraph.getDocument(), hyperlinkRun);
//ToDo: handle further issues ...
return resultHyperlinkRun;
} else if (resultRunBody instanceof XWPFSDT) {
//ToDo: handle SDT
}
return null;
}
private XWPFRun createRunWithRPr(XWPFRun run, IRunBody resultRunBody) {
if (resultRunBody instanceof XWPFParagraph) {
XWPFParagraph resultParagraph = (XWPFParagraph)resultRunBody;
XWPFRun resultRun = resultParagraph.createRun();
resultRun.getCTR().setRPr(run.getCTR().getRPr());//simply copy the underlying XML bean to avoid more code
copyTextOfRuns(run, resultRun);
handleRunStyles(resultParagraph.getDocument(), run);
//ToDo: handle further issues ...
return resultRun;
} else if (resultRunBody instanceof XWPFSDT) {
//ToDo: handle SDT
}
return null;
}
private void handleRunStyles(IBody resultBody, IRunElement runElement) {
//if we have runElement styles we need merging those styles of the two different documents
XWPFDocument document = null;
String styleID = null;
if (runElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
document = run.getDocument();
styleID = run.getStyle();
} else if (runElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun run = (XWPFHyperlinkRun)runElement;
document = run.getDocument();
styleID = run.getStyle();
} else if (runElement instanceof XWPFFieldRun) {
XWPFFieldRun run = (XWPFFieldRun)runElement;
document = run.getDocument();
styleID = run.getStyle();
} //ToDo: else others ...
if (document == null || styleID == null || "".equals(styleID)) return;
XWPFDocument resultDocument = null;
if (resultBody instanceof XWPFDocument) {
resultDocument = (XWPFDocument)resultBody;
} else if (resultBody instanceof XWPFTableCell) {
XWPFTableCell resultTableCell = (XWPFTableCell)resultBody;
resultDocument = resultTableCell.getXWPFDocument();
} //ToDo: else others ...
if (resultDocument != null) {
XWPFStyles styles = document.getStyles();
XWPFStyles resultStyles = resultDocument.getStyles();
XWPFStyle style = styles.getStyle(styleID);
//merge each used styles, also the related ones
for (XWPFStyle relatedStyle : styles.getUsedStyleList(style)) {
if (resultStyles.getStyle(relatedStyle.getStyleId()) == null) {
resultStyles.addStyle(relatedStyle);
}
}
}
}
private void traverseTableRows(List<XWPFTableRow> tableRows, XWPFTable resultTable) throws Exception {
for (XWPFTableRow tableRow : tableRows) {
XWPFTableRow resultTableRow = createTableRowWithTrPr(tableRow, resultTable);
traverseTableCells(tableRow.getTableICells(), resultTableRow);
}
}
private XWPFTableRow createTableRowWithTrPr(XWPFTableRow tableRow, XWPFTable resultTable) {
XWPFTableRow resultTableRow = resultTable.createRow();
for (int i = resultTableRow.getTableCells().size(); i > 0; i--) { //table row should be empty at first
resultTableRow.removeCell(i-1);
}
resultTableRow.getCtRow().setTrPr(tableRow.getCtRow().getTrPr());//simply copy the underlying XML bean to avoid more code
//ToDo: handle further issues ...
return resultTableRow;
}
private void traverseTableCells(List<ICell> tableICells, XWPFTableRow resultTableRow) throws Exception {
for (ICell tableICell : tableICells) {
if (tableICell instanceof XWPFSDTCell) {
XWPFSDTCell sDTCell = (XWPFSDTCell)tableICell;
XWPFSDTCell resultSdtTableCell = createSdtTableCell(sDTCell, resultTableRow);
//ToDo: handle further issues ...
} else if (tableICell instanceof XWPFTableCell) {
XWPFTableCell tableCell = (XWPFTableCell)tableICell;
XWPFTableCell resultTableCell = createTableCellWithTcPr(tableCell, resultTableRow);
traverseBodyElements(tableCell.getBodyElements(), resultTableCell);
}
}
}
private XWPFSDTCell createSdtTableCell(XWPFSDTCell sDTCell, XWPFTableRow resultTableRow) {
//create at least a cell to avoid corrupted document
XWPFTableCell resultTableCell = resultTableRow.createCell();
//ToDo: handle SDTCell properly
//ToDo: handle further issues ...
return null;
}
private XWPFTableCell createTableCellWithTcPr(XWPFTableCell tableCell, XWPFTableRow resultTableRow) {
XWPFTableCell resultTableCell = resultTableRow.createCell();
resultTableCell.removeParagraph(0);
resultTableCell.getCTTc().setTcPr(tableCell.getCTTc().getTcPr());//simply copy the underlying XML bean to avoid more code
//ToDo: handle further issues ...
return resultTableCell;
}
private void traversePictures(IRunElement runElement, IRunElement resultRunElement) throws Exception {
List<XWPFPicture> pictures = null;
if (runElement instanceof XWPFFieldRun) {
XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
pictures = fieldRun.getEmbeddedPictures();
} else if (runElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)resultRunElement;
pictures = hyperlinkRun.getEmbeddedPictures();
} else if (runElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
pictures = run.getEmbeddedPictures();
} else if (runElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)runElement;
//ToDo: handle SDT
}
if (pictures != null) {
for (XWPFPicture picture : pictures) {
XWPFPictureData pictureData = picture.getPictureData();
XWPFPicture resultPicture = createPictureWithDrawing(runElement, picture, pictureData, resultRunElement);
}
}
}
private XWPFPicture createPictureWithDrawing(IRunElement runElement, XWPFPicture picture, XWPFPictureData pictureData, IRunElement resultRunElement) {
if (resultRunElement instanceof XWPFFieldRun) {
XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
XWPFFieldRun resultFieldRun = (XWPFFieldRun)resultRunElement;
XWPFPicture resultPicture = createPictureWithDrawing(fieldRun, resultFieldRun, picture, pictureData);
return resultPicture;
} else if (resultRunElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
XWPFHyperlinkRun resultHyperlinkRun = (XWPFHyperlinkRun)resultRunElement;
XWPFPicture resultPicture = createPictureWithDrawing(hyperlinkRun, resultHyperlinkRun, picture, pictureData);
return resultPicture;
} else if (resultRunElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
XWPFRun resultRun = (XWPFRun)resultRunElement;
XWPFPicture resultPicture = createPictureWithDrawing(run, resultRun, picture, pictureData);
return resultPicture;
} else if (resultRunElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)resultRunElement;
//ToDo: handle SDT
}
return null;
}
private XWPFPicture createPictureWithDrawing(XWPFRun run, XWPFRun resultRun, XWPFPicture picture, XWPFPictureData pictureData) {
try {
XWPFPicture resultPicture = resultRun.addPicture(
pictureData.getPackagePart().getInputStream(),
pictureData.getPictureType(),
pictureData.getFileName(),
Units.pixelToEMU((int)picture.getWidth()),
Units.pixelToEMU((int)picture.getDepth()));
String rId = resultPicture.getCTPicture().getBlipFill().getBlip().getEmbed();
resultRun.getCTR().setDrawingArray(0, run.getCTR().getDrawingArray(0));//simply copy the underlying XML bean to avoid more code
//but then correct the rID
String declareNameSpaces = "declare namespace a='http://schemas.openxmlformats.org/drawingml/2006/main'; ";
org.apache.xmlbeans.XmlObject[] selectedObjects = resultRun.getCTR().getDrawingArray(0).selectPath(
declareNameSpaces
+ "$this//a:blip");
for (org.apache.xmlbeans.XmlObject blipObject : selectedObjects) {
if (blipObject instanceof org.openxmlformats.schemas.drawingml.x2006.main.CTBlip) {
org.openxmlformats.schemas.drawingml.x2006.main.CTBlip blip = (org.openxmlformats.schemas.drawingml.x2006.main.CTBlip)blipObject;
if (blip.isSetEmbed()) blip.setEmbed(rId);
}
}
//remove rIDs to external hyperlinks to avoid corruot document
selectedObjects = resultRun.getCTR().getDrawingArray(0).selectPath(
declareNameSpaces
+ "$this//a:hlinkClick");
for (org.apache.xmlbeans.XmlObject hlinkClickObject : selectedObjects) {
if (hlinkClickObject instanceof org.openxmlformats.schemas.drawingml.x2006.main.CTHyperlink) {
org.openxmlformats.schemas.drawingml.x2006.main.CTHyperlink hlinkClick = (org.openxmlformats.schemas.drawingml.x2006.main.CTHyperlink)hlinkClickObject;
if (hlinkClick.isSetId()) hlinkClick.setId("");
//ToDo: handle pictures having hyperlinks properly
}
}
//ToDo: handle further issues ...
return resultPicture;
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}
public void merge(String firstFilePath, String secondFilePath, String resultFilePath) throws Exception {
XWPFDocument resultDocument = new XWPFDocument(new FileInputStream(firstFilePath));
XWPFDocument documentToAppend = new XWPFDocument(new FileInputStream(secondFilePath));
traverseBodyElements(documentToAppend.getBodyElements(), resultDocument);
documentToAppend.close();
FileOutputStream out = new FileOutputStream(resultFilePath);
resultDocument.write(out);
out.close();
resultDocument.close();
}
public static void main(String[] args) throws Exception {
WordMerger merger = new WordMerger();
merger.merge("./WordDocument1.docx", "./WordDocument2.docx", "./WordDocumentResult.docx");
}
}
我有几个文档,我想将它们全部合并到一个 docx 文件中。 我的代码:
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
public class WordMerge {
private final OutputStream result;
private final List<InputStream> inputs;
private XWPFDocument first;
public WordMerge(OutputStream result) {
this.result = result;
inputs = new ArrayList<>();
}
public void add(InputStream stream) throws Exception{
inputs.add(stream);
OPCPackage srcPackage = OPCPackage.open(stream);
XWPFDocument src1Document = new XWPFDocument(srcPackage);
if(inputs.size() == 1){
first = src1Document;
} else {
CTBody srcBody = src1Document.getDocument().getBody();
first.getDocument().addNewBody().set(srcBody);
}
}
public void doMerge() throws Exception{
first.write(result);
}
public void close() throws Exception{
result.flush();
result.close();
for (InputStream input : inputs) {
input.close();
}
}
}
它使用:
public static void main(String[] args) throws Exception {
FileOutputStream faos = new FileOutputStream("/home/victor/result.docx");
WordMerge wm = new WordMerge(faos);
wm.add( new FileInputStream("/home/victor/001.docx") );
wm.add( new FileInputStream("/home/victor/002.docx") );
wm.doMerge();
wm.close();
}
它有效,不幸的是,如果您在任何非第一个文档中都有列表,它会变得有点混乱。列表符号变为数字,更糟糕的是,有时前一个文档中的列表将在附加文档中继续。假设 doc1 有 a.b.c 列表,第二个有未排序的列表,那么后一个变成 d.e.f。 (沿袭了之前的文档格式。)
如何让每个文档合并到下一页,而不是按照上一个文档的格式?
您的代码仅将多个 CTBody
元素附加到文档中。但这不是 Word 文档的结构。 “它有效”是因为 Microsoft Word 具有足够的宽容度来解释它。但是当涉及到 Word 文档结构中的引用时,它会失败。
例如,编号定义由 ID 引用。对于不同文档中的不同定义,这可以是相同的 ID。因此,第一个文档中的 ID 1 可能指向十进制编号,而第二个文档中的 ID 1 可能指向项目符号编号。所以numIDs需要合并,而不仅仅是复制。
嵌入式媒体(例如图像)由 rID 引用。所以 CTBody
只包含 ID。媒体本身存储在文档主体之外。因此,如果文档主体引用具有 rID12 的图片并且未存储该图片,则文档已损坏。
许多其他文档元素也是如此。
所以那个方法根本没用。
需要遍历文档的所有body元素,需要追加。然后将找到的每个正文元素附加到第一个文档并更新它的引用。
以下是展示原理的工作稿。它还没有准备好。例如,它不考虑超链接、脚注、评论、结构化文档标签等等。正如您从所需的大量代码中看到的那样,考虑所有可能的事情将是一项非常费力的任务。为了避免更多的代码,如果可能的话,我只是简单地复制底层的 XML bean。这也应该更好地形成以用于生产用途。但是从这段代码中应该可以看出原理。
当方法和变量的名称没有自我解释时,代码被注释了。
代码已经过测试,可以使用当前 apache poi 5.1.0
运行。以前的版本未经测试,也不应使用,因为它们对 XWPF
.
代码需要 Apache POI FAQ 中提到的所有模式的完整 jar。
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.xwpf.usermodel.*;
import org.apache.poi.util.Units;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import java.math.BigInteger;
public class WordMerger {
private Map<BigInteger, BigInteger> numIDs = null; // to handle merging numID
public WordMerger() {
this.numIDs= new HashMap<BigInteger, BigInteger>();
}
private void traverseBodyElements(List<IBodyElement> bodyElements, IBody resultBody) throws Exception {
for (IBodyElement bodyElement : bodyElements) {
if (bodyElement instanceof XWPFParagraph) {
XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
XWPFParagraph resultParagraph = createParagraphWithPPr(paragraph, resultBody);
traverseRunElements(paragraph.getIRuns(), resultParagraph);
} else if (bodyElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)bodyElement;
XWPFSDT resultSDT = createSDT(sDT, resultBody);
//ToDo: handle further issues ...
} else if (bodyElement instanceof XWPFTable) {
XWPFTable table = (XWPFTable)bodyElement;
XWPFTable resultTable = createTableWithTblPrAndTblGrid(table, resultBody);
traverseTableRows(table.getRows(), resultTable);
}
}
}
private XWPFSDT createSDT(XWPFSDT sDT, IBody resultBody) {
//not ready yet
//we simply add paragraphs to avoid corruped documents
if (resultBody instanceof XWPFDocument) {
XWPFDocument resultDocument = (XWPFDocument)resultBody;
XWPFParagraph resultParagraph = resultDocument.createParagraph();
//ToDo: handle further issues ...
} else if (resultBody instanceof XWPFTableCell) {
XWPFTableCell resultTableCell = (XWPFTableCell)resultBody;
XWPFParagraph resultParagraph = resultTableCell.addParagraph();
//ToDo: handle further issues ...
} //ToDo: else others ...
//ToDo: handle SDT properly
return null;
}
private XWPFParagraph createParagraphWithPPr(XWPFParagraph paragraph, IBody resultBody) {
if (resultBody instanceof XWPFDocument) {
XWPFDocument resultDocument = (XWPFDocument)resultBody;
XWPFParagraph resultParagraph = resultDocument.createParagraph();
resultParagraph.getCTP().setPPr(paragraph.getCTP().getPPr());//simply copy the underlying XML bean to avoid more code
handleStyles(resultDocument, paragraph);
handleNumberings(paragraph, resultParagraph);
//ToDo: handle further issues ...
return resultParagraph;
} else if (resultBody instanceof XWPFTableCell) {
XWPFTableCell resultTableCell = (XWPFTableCell)resultBody;
XWPFParagraph resultParagraph = resultTableCell.addParagraph();
resultParagraph.getCTP().setPPr(paragraph.getCTP().getPPr());//simply copy the underlying XML bean to avoid more code
handleStyles(resultTableCell, paragraph);
//ToDo: handle further issues ...
return resultParagraph;
} //ToDo: else others ...
return null;
}
private void handleNumberings(XWPFParagraph paragraph, XWPFParagraph resultParagraph) {
//if we have numberings, we need merging the numIDs and abstract numberings of the two different documents
BigInteger numID = paragraph.getNumID();
if (numID == null) return;
BigInteger resultNumID = this.numIDs.get(numID);
if (resultNumID == null) {
XWPFDocument document = paragraph.getDocument();
XWPFNumbering numbering = document.createNumbering();
XWPFNum num = numbering.getNum(numID);
BigInteger abstractNumID = numbering.getAbstractNumID(numID);
XWPFAbstractNum abstractNum = numbering.getAbstractNum(abstractNumID);
XWPFAbstractNum resultAbstractNum = new XWPFAbstractNum((org.openxmlformats.schemas.wordprocessingml.x2006.main.CTAbstractNum)abstractNum.getCTAbstractNum().copy());
XWPFDocument resultDocument = resultParagraph.getDocument();
XWPFNumbering resultNumbering = resultDocument.createNumbering();
int pos = resultNumbering.getAbstractNums().size();
resultAbstractNum.getCTAbstractNum().setAbstractNumId(BigInteger.valueOf(pos));
BigInteger resultAbstractNumID = resultNumbering.addAbstractNum(resultAbstractNum);
resultNumID = resultNumbering.addNum(resultAbstractNumID);
XWPFNum resultNum = resultNumbering.getNum(resultNumID);
resultNum.getCTNum().setLvlOverrideArray(num.getCTNum().getLvlOverrideArray());
this.numIDs.put(numID, resultNumID);
}
resultParagraph.setNumID(resultNumID);
}
private void handleStyles(IBody resultBody, IBodyElement bodyElement) {
//if we have bodyElement styles we need merging those styles of the two different documents
XWPFDocument document = null;
String styleID = null;
if (bodyElement instanceof XWPFParagraph) {
XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
document = paragraph.getDocument();
styleID = paragraph.getStyleID();
} else if (bodyElement instanceof XWPFTable) {
XWPFTable table = (XWPFTable)bodyElement;
if (table.getPart() instanceof XWPFDocument) {
document = (XWPFDocument)table.getPart();
styleID = table.getStyleID();
}
} //ToDo: else others ...
if (document == null || styleID == null || "".equals(styleID)) return;
XWPFDocument resultDocument = null;
if (resultBody instanceof XWPFDocument) {
resultDocument = (XWPFDocument)resultBody;
} else if (resultBody instanceof XWPFTableCell) {
XWPFTableCell resultTableCell = (XWPFTableCell)resultBody;
resultDocument = resultTableCell.getXWPFDocument();
} //ToDo: else others ...
if (resultDocument != null) {
XWPFStyles styles = document.getStyles();
XWPFStyles resultStyles = resultDocument.getStyles();
XWPFStyle style = styles.getStyle(styleID);
//merge each used styles, also the related ones
for (XWPFStyle relatedStyle : styles.getUsedStyleList(style)) {
if (resultStyles.getStyle(relatedStyle.getStyleId()) == null) {
resultStyles.addStyle(relatedStyle);
}
}
}
}
private XWPFTable createTableWithTblPrAndTblGrid(XWPFTable table, IBody resultBody) {
if (resultBody instanceof XWPFDocument) {
XWPFDocument resultDocument = (XWPFDocument)resultBody;
XWPFTable resultTable = resultDocument.createTable();
resultTable.removeRow(0);
resultTable.getCTTbl().setTblPr(table.getCTTbl().getTblPr());//simply copy the underlying XML bean to avoid more code
resultTable.getCTTbl().setTblGrid(table.getCTTbl().getTblGrid());//simply copy the underlying XML bean to avoid more code
handleStyles(resultDocument, table);
//ToDo: handle further issues ...
return resultTable;
} else if (resultBody instanceof XWPFTableCell) {
//ToDo: handle stacked tables
} //ToDo: else others ...
return null;
}
private void traverseRunElements(List<IRunElement> runElements, IRunBody resultRunBody) throws Exception {
for (IRunElement runElement : runElements) {
if (runElement instanceof XWPFFieldRun) {
XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
XWPFFieldRun resultFieldRun = createFieldRunWithRPr(fieldRun, resultRunBody);
traversePictures(fieldRun, resultFieldRun);
} else if (runElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
XWPFHyperlinkRun resultHyperlinkRun = createHyperlinkRunWithRPr(hyperlinkRun, resultRunBody);
traversePictures(hyperlinkRun, resultHyperlinkRun);
} else if (runElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
XWPFRun resultRun = createRunWithRPr(run, resultRunBody);
traversePictures(run, resultRun);
} else if (runElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)runElement;
//ToDo: handle SDT
}
}
}
private void copyTextOfRuns(XWPFRun run, XWPFRun resultRun) {
//copy all of the possible T contents of the runs
for (int i = 0; i < run.getCTR().sizeOfTArray(); i++) {
resultRun.setText(run.getText(i), i);
}
}
private XWPFFieldRun createFieldRunWithRPr(XWPFFieldRun fieldRun, IRunBody resultRunBody) {
if (resultRunBody instanceof XWPFParagraph) {
XWPFParagraph resultParagraph = (XWPFParagraph)resultRunBody;
XWPFFieldRun resultFieldRun = (XWPFFieldRun)resultParagraph.createRun();
resultFieldRun.getCTR().setRPr(fieldRun.getCTR().getRPr());//simply copy the underlying XML bean to avoid more code
//ToDo: handle field runs properly ...
handleRunStyles(resultParagraph.getDocument(), fieldRun);
//ToDo: handle further issues ...
return resultFieldRun;
} else if (resultRunBody instanceof XWPFSDT) {
//ToDo: handle SDT
}
return null;
}
private XWPFHyperlinkRun createHyperlinkRunWithRPr(XWPFHyperlinkRun hyperlinkRun, IRunBody resultRunBody) {
if (resultRunBody instanceof XWPFParagraph) {
XWPFParagraph resultParagraph = (XWPFParagraph)resultRunBody;
org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink resultCTHyperLink = resultParagraph.getCTP().addNewHyperlink();
resultCTHyperLink.addNewR();
XWPFHyperlinkRun resultHyperlinkRun = new XWPFHyperlinkRun(resultCTHyperLink, resultCTHyperLink.getRArray(0), resultParagraph);
if (hyperlinkRun.getAnchor() != null) {
resultHyperlinkRun = resultParagraph.createHyperlinkRun(hyperlinkRun.getAnchor());
}
resultHyperlinkRun.getCTR().setRPr(hyperlinkRun.getCTR().getRPr());//simply copy the underlying XML bean to avoid more code
copyTextOfRuns(hyperlinkRun, resultHyperlinkRun);
//ToDo: handle external hyperlink runs properly ...
handleRunStyles(resultParagraph.getDocument(), hyperlinkRun);
//ToDo: handle further issues ...
return resultHyperlinkRun;
} else if (resultRunBody instanceof XWPFSDT) {
//ToDo: handle SDT
}
return null;
}
private XWPFRun createRunWithRPr(XWPFRun run, IRunBody resultRunBody) {
if (resultRunBody instanceof XWPFParagraph) {
XWPFParagraph resultParagraph = (XWPFParagraph)resultRunBody;
XWPFRun resultRun = resultParagraph.createRun();
resultRun.getCTR().setRPr(run.getCTR().getRPr());//simply copy the underlying XML bean to avoid more code
copyTextOfRuns(run, resultRun);
handleRunStyles(resultParagraph.getDocument(), run);
//ToDo: handle further issues ...
return resultRun;
} else if (resultRunBody instanceof XWPFSDT) {
//ToDo: handle SDT
}
return null;
}
private void handleRunStyles(IBody resultBody, IRunElement runElement) {
//if we have runElement styles we need merging those styles of the two different documents
XWPFDocument document = null;
String styleID = null;
if (runElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
document = run.getDocument();
styleID = run.getStyle();
} else if (runElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun run = (XWPFHyperlinkRun)runElement;
document = run.getDocument();
styleID = run.getStyle();
} else if (runElement instanceof XWPFFieldRun) {
XWPFFieldRun run = (XWPFFieldRun)runElement;
document = run.getDocument();
styleID = run.getStyle();
} //ToDo: else others ...
if (document == null || styleID == null || "".equals(styleID)) return;
XWPFDocument resultDocument = null;
if (resultBody instanceof XWPFDocument) {
resultDocument = (XWPFDocument)resultBody;
} else if (resultBody instanceof XWPFTableCell) {
XWPFTableCell resultTableCell = (XWPFTableCell)resultBody;
resultDocument = resultTableCell.getXWPFDocument();
} //ToDo: else others ...
if (resultDocument != null) {
XWPFStyles styles = document.getStyles();
XWPFStyles resultStyles = resultDocument.getStyles();
XWPFStyle style = styles.getStyle(styleID);
//merge each used styles, also the related ones
for (XWPFStyle relatedStyle : styles.getUsedStyleList(style)) {
if (resultStyles.getStyle(relatedStyle.getStyleId()) == null) {
resultStyles.addStyle(relatedStyle);
}
}
}
}
private void traverseTableRows(List<XWPFTableRow> tableRows, XWPFTable resultTable) throws Exception {
for (XWPFTableRow tableRow : tableRows) {
XWPFTableRow resultTableRow = createTableRowWithTrPr(tableRow, resultTable);
traverseTableCells(tableRow.getTableICells(), resultTableRow);
}
}
private XWPFTableRow createTableRowWithTrPr(XWPFTableRow tableRow, XWPFTable resultTable) {
XWPFTableRow resultTableRow = resultTable.createRow();
for (int i = resultTableRow.getTableCells().size(); i > 0; i--) { //table row should be empty at first
resultTableRow.removeCell(i-1);
}
resultTableRow.getCtRow().setTrPr(tableRow.getCtRow().getTrPr());//simply copy the underlying XML bean to avoid more code
//ToDo: handle further issues ...
return resultTableRow;
}
private void traverseTableCells(List<ICell> tableICells, XWPFTableRow resultTableRow) throws Exception {
for (ICell tableICell : tableICells) {
if (tableICell instanceof XWPFSDTCell) {
XWPFSDTCell sDTCell = (XWPFSDTCell)tableICell;
XWPFSDTCell resultSdtTableCell = createSdtTableCell(sDTCell, resultTableRow);
//ToDo: handle further issues ...
} else if (tableICell instanceof XWPFTableCell) {
XWPFTableCell tableCell = (XWPFTableCell)tableICell;
XWPFTableCell resultTableCell = createTableCellWithTcPr(tableCell, resultTableRow);
traverseBodyElements(tableCell.getBodyElements(), resultTableCell);
}
}
}
private XWPFSDTCell createSdtTableCell(XWPFSDTCell sDTCell, XWPFTableRow resultTableRow) {
//create at least a cell to avoid corrupted document
XWPFTableCell resultTableCell = resultTableRow.createCell();
//ToDo: handle SDTCell properly
//ToDo: handle further issues ...
return null;
}
private XWPFTableCell createTableCellWithTcPr(XWPFTableCell tableCell, XWPFTableRow resultTableRow) {
XWPFTableCell resultTableCell = resultTableRow.createCell();
resultTableCell.removeParagraph(0);
resultTableCell.getCTTc().setTcPr(tableCell.getCTTc().getTcPr());//simply copy the underlying XML bean to avoid more code
//ToDo: handle further issues ...
return resultTableCell;
}
private void traversePictures(IRunElement runElement, IRunElement resultRunElement) throws Exception {
List<XWPFPicture> pictures = null;
if (runElement instanceof XWPFFieldRun) {
XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
pictures = fieldRun.getEmbeddedPictures();
} else if (runElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)resultRunElement;
pictures = hyperlinkRun.getEmbeddedPictures();
} else if (runElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
pictures = run.getEmbeddedPictures();
} else if (runElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)runElement;
//ToDo: handle SDT
}
if (pictures != null) {
for (XWPFPicture picture : pictures) {
XWPFPictureData pictureData = picture.getPictureData();
XWPFPicture resultPicture = createPictureWithDrawing(runElement, picture, pictureData, resultRunElement);
}
}
}
private XWPFPicture createPictureWithDrawing(IRunElement runElement, XWPFPicture picture, XWPFPictureData pictureData, IRunElement resultRunElement) {
if (resultRunElement instanceof XWPFFieldRun) {
XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
XWPFFieldRun resultFieldRun = (XWPFFieldRun)resultRunElement;
XWPFPicture resultPicture = createPictureWithDrawing(fieldRun, resultFieldRun, picture, pictureData);
return resultPicture;
} else if (resultRunElement instanceof XWPFHyperlinkRun) {
XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
XWPFHyperlinkRun resultHyperlinkRun = (XWPFHyperlinkRun)resultRunElement;
XWPFPicture resultPicture = createPictureWithDrawing(hyperlinkRun, resultHyperlinkRun, picture, pictureData);
return resultPicture;
} else if (resultRunElement instanceof XWPFRun) {
XWPFRun run = (XWPFRun)runElement;
XWPFRun resultRun = (XWPFRun)resultRunElement;
XWPFPicture resultPicture = createPictureWithDrawing(run, resultRun, picture, pictureData);
return resultPicture;
} else if (resultRunElement instanceof XWPFSDT) {
XWPFSDT sDT = (XWPFSDT)resultRunElement;
//ToDo: handle SDT
}
return null;
}
private XWPFPicture createPictureWithDrawing(XWPFRun run, XWPFRun resultRun, XWPFPicture picture, XWPFPictureData pictureData) {
try {
XWPFPicture resultPicture = resultRun.addPicture(
pictureData.getPackagePart().getInputStream(),
pictureData.getPictureType(),
pictureData.getFileName(),
Units.pixelToEMU((int)picture.getWidth()),
Units.pixelToEMU((int)picture.getDepth()));
String rId = resultPicture.getCTPicture().getBlipFill().getBlip().getEmbed();
resultRun.getCTR().setDrawingArray(0, run.getCTR().getDrawingArray(0));//simply copy the underlying XML bean to avoid more code
//but then correct the rID
String declareNameSpaces = "declare namespace a='http://schemas.openxmlformats.org/drawingml/2006/main'; ";
org.apache.xmlbeans.XmlObject[] selectedObjects = resultRun.getCTR().getDrawingArray(0).selectPath(
declareNameSpaces
+ "$this//a:blip");
for (org.apache.xmlbeans.XmlObject blipObject : selectedObjects) {
if (blipObject instanceof org.openxmlformats.schemas.drawingml.x2006.main.CTBlip) {
org.openxmlformats.schemas.drawingml.x2006.main.CTBlip blip = (org.openxmlformats.schemas.drawingml.x2006.main.CTBlip)blipObject;
if (blip.isSetEmbed()) blip.setEmbed(rId);
}
}
//remove rIDs to external hyperlinks to avoid corruot document
selectedObjects = resultRun.getCTR().getDrawingArray(0).selectPath(
declareNameSpaces
+ "$this//a:hlinkClick");
for (org.apache.xmlbeans.XmlObject hlinkClickObject : selectedObjects) {
if (hlinkClickObject instanceof org.openxmlformats.schemas.drawingml.x2006.main.CTHyperlink) {
org.openxmlformats.schemas.drawingml.x2006.main.CTHyperlink hlinkClick = (org.openxmlformats.schemas.drawingml.x2006.main.CTHyperlink)hlinkClickObject;
if (hlinkClick.isSetId()) hlinkClick.setId("");
//ToDo: handle pictures having hyperlinks properly
}
}
//ToDo: handle further issues ...
return resultPicture;
} catch (Exception ex) {
ex.printStackTrace();
}
return null;
}
public void merge(String firstFilePath, String secondFilePath, String resultFilePath) throws Exception {
XWPFDocument resultDocument = new XWPFDocument(new FileInputStream(firstFilePath));
XWPFDocument documentToAppend = new XWPFDocument(new FileInputStream(secondFilePath));
traverseBodyElements(documentToAppend.getBodyElements(), resultDocument);
documentToAppend.close();
FileOutputStream out = new FileOutputStream(resultFilePath);
resultDocument.write(out);
out.close();
resultDocument.close();
}
public static void main(String[] args) throws Exception {
WordMerger merger = new WordMerger();
merger.merge("./WordDocument1.docx", "./WordDocument2.docx", "./WordDocumentResult.docx");
}
}