XPATH 抛出 java.lang.RuntimeException

XPATH throwing java.lang.RuntimeException

我想要 CONFIDENTIAL 作为我想要的输出。

我的程序

import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import java.io.File;
import java.io.IOException;

public class main {
    public static void main(String[] args) {
        File file = new File("test.xml");
        try {
            XmlObject xoobj = XmlObject.Factory.parse(file);
            var declareNameSpaces = "declare namespace v='urn:schemas-microsoft-com:vml' ";
            final String xpathFilter = "data(//v:shape[contains(@id,'PowerPlusWaterMarkObject')]/v:textpath/@string)";

            XmlObject[] arr = xoobj.selectPath(declareNameSpaces + xpathFilter);
            System.out.println(arr[0].toString());
        } catch (XmlException | IOException ex) {
            System.out.println(ex);
        }
    }
}

test.xml

<xml-fragment
        xmlns:wpc="http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas" xmlns:cx="http://schemas.microsoft.com/office/drawing/2014/chartex" xmlns:cx1="http://schemas.microsoft.com/office/drawing/2015/9/8/chartex" xmlns:cx2="http://schemas.microsoft.com/office/drawing/2015/10/21/chartex" xmlns:cx3="http://schemas.microsoft.com/office/drawing/2016/5/9/chartex" xmlns:cx4="http://schemas.microsoft.com/office/drawing/2016/5/10/chartex" xmlns:cx5="http://schemas.microsoft.com/office/drawing/2016/5/11/chartex" xmlns:cx6="http://schemas.microsoft.com/office/drawing/2016/5/12/chartex" xmlns:cx7="http://schemas.microsoft.com/office/drawing/2016/5/13/chartex" xmlns:cx8="http://schemas.microsoft.com/office/drawing/2016/5/14/chartex"
        xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:aink="http://schemas.microsoft.com/office/drawing/2016/ink" xmlns:am3d="http://schemas.microsoft.com/office/drawing/2017/model3d"
        xmlns:o="urn:schemas-microsoft-com:office:office"
        xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" xmlns:m="http://schemas.openxmlformats.org/officeDocument/2006/math"
        xmlns:v="urn:schemas-microsoft-com:vml"
        xmlns:wp14="http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing" xmlns:wp="http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing"
        xmlns:w10="urn:schemas-microsoft-com:office:word"
        xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml"
        xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" xmlns:wpg="http://schemas.microsoft.com/office/word/2010/wordprocessingGroup" xmlns:wpi="http://schemas.microsoft.com/office/word/2010/wordprocessingInk" xmlns:wne="http://schemas.microsoft.com/office/word/2006/wordml" xmlns:wps="http://schemas.microsoft.com/office/word/2010/wordprocessingShape">
    <w:sdtPr>
        <w:id w:val="-1126775779"/>
        <w:docPartObj>
            <w:docPartGallery w:val="Watermarks"/>
            <w:docPartUnique/>
        </w:docPartObj>
    </w:sdtPr>
    <w:sdtContent>
        <w:p w14:paraId="41319DAD" w14:textId="4534348F" w:rsidR="006868D8" w:rsidRDefault="006868D8">
            <w:pPr>
                <w:pStyle w:val="Header"/>
            </w:pPr>
            <w:r>
                <w:rPr>
                    <w:noProof/>
                </w:rPr>
                <w:pict w14:anchorId="63C3AA3C">
                    <v:shapetype id="_x0000_t136" coordsize="21600,21600" o:spt="136" adj="10800" path="m@7,l@8,m@5,21600l@6,21600e">
                        <v:formulas>
                            <v:f eqn="sum #0 0 10800"/>
                            <v:f eqn="prod #0 2 1"/>
                            <v:f eqn="sum 21600 0 @1"/>
                            <v:f eqn="sum 0 0 @2"/>
                            <v:f eqn="sum 21600 0 @3"/>
                            <v:f eqn="if @0 @3 0"/>
                            <v:f eqn="if @0 21600 @1"/>
                            <v:f eqn="if @0 0 @2"/>
                            <v:f eqn="if @0 @4 21600"/>
                            <v:f eqn="mid @5 @6"/>
                            <v:f eqn="mid @8 @5"/>
                            <v:f eqn="mid @7 @8"/>
                            <v:f eqn="mid @6 @7"/>
                            <v:f eqn="sum @6 0 @5"/>
                        </v:formulas>
                        <v:path textpathok="t" o:connecttype="custom" o:connectlocs="@9,0;@10,10800;@11,21600;@12,10800" o:connectangles="270,180,90,0"/>
                        <v:textpath on="t" fitshape="t"/>
                        <v:handles>
                            <v:h position="#0,bottomRight" xrange="6629,14971"/>
                        </v:handles>
                        <o:lock v:ext="edit" text="t" shapetype="t"/>
                    </v:shapetype>
                    <v:shape id="PowerPlusWaterMarkObject357476642" o:spid="_x0000_s1025" type="#_x0000_t136" style="position:absolute;margin-left:0;margin-top:0;width:527.85pt;height:131.95pt;rotation:315;z-index:-251657216;mso-position-horizontal:center;mso-position-horizontal-relative:margin;mso-position-vertical:center;mso-position-vertical-relative:margin" o:allowincell="f" fillcolor="silver" stroked="f">
                        <v:fill opacity=".5"/>
                        <v:textpath style="font-family:&quot;Calibri&quot;;font-size:1pt" string="CONFIDENTIAL"/>
                        <w10:wrap anchorx="margin" anchory="margin"/>
                    </v:shape>
                </w:pict>
            </w:r>
        </w:p>
    </w:sdtContent>
</xml-fragment>

输出

/Library/Java/JavaVirtualMachines/jdk-11.0.13.jdk/Contents/Home/bin/java -javaagent:/private/var/folders/5_/5blqbh750wsc9pc01bn0lgph0000gn/T/AppTranslocation/3F078016-ECB8-463C-BEBA-5701DF97E1A5/d/IntelliJ IDEA CE.app/Contents/lib/idea_rt.jar=54386:/private/var/folders/5_/5blqbh750wsc9pc01bn0lgph0000gn/T/AppTranslocation/3F078016-ECB8-463C-BEBA-5701DF97E1A5/d/IntelliJ IDEA CE.app/Contents/bin -Dfile.encoding=UTF-8 -classpath /Users/sahibyar/Dailytasks/test-project/target/classes:/Users/sahibyar/.m2/repository/org/apache/xmlbeans/xmlbeans/3.1.0/xmlbeans-3.1.0.jar:/Users/sahibyar/.m2/repository/org/apache/xmlbeans/xmlbeans-xpath/2.4.0/xmlbeans-xpath-2.4.0.jar:/Users/sahibyar/.m2/repository/net/sf/saxon/saxon/8.5.1/saxon-8.5.1.jar:/Users/sahibyar/.m2/repository/net/sf/saxon/saxon-xpath/8.7/saxon-xpath-8.7.jar:/Users/sahibyar/.m2/repository/net/sf/saxon/saxon-dom/8.7/saxon-dom-8.7.jar main
Exception in thread "main" java.lang.RuntimeException:  Trying XBeans path engine... Trying XQRL... Trying XDK... Trying delegated path engine... FAILED on declare namespace v='urn:schemas-microsoft-com:vml' data(//v:shape[contains(@id,'PowerPlusWaterMarkObject')]/v:textpath/@string)
    at org.apache.xmlbeans.impl.store.Path.getCompiledPath(Path.java:217)
    at org.apache.xmlbeans.impl.store.Path.getCompiledPath(Path.java:143)
    at org.apache.xmlbeans.impl.store.Cursor._selectPath(Cursor.java:902)
    at org.apache.xmlbeans.impl.store.Cursor.selectPath(Cursor.java:2634)
    at org.apache.xmlbeans.impl.values.XmlObjectBase.selectPath(XmlObjectBase.java:476)
    at org.apache.xmlbeans.impl.values.XmlObjectBase.selectPath(XmlObjectBase.java:460)
    at main.main(main.java:14)

Process finished with exit code 1

pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>org.example</groupId>
    <artifactId>test-project</artifactId>
    <version>1.0-SNAPSHOT</version>
    <dependencies>
        <dependency>
            <groupId>org.apache.xmlbeans</groupId>
            <artifactId>xmlbeans</artifactId>
            <version>3.1.0</version>
        </dependency>
    </dependencies>

    <properties>
        <maven.compiler.source>11</maven.compiler.source>
        <maven.compiler.target>11</maven.compiler.target>
    </properties>

</project>

declare namespace 可以在 XQuery 中完成,但不能在 XPath 中完成,因此要么使用像 BaseX 或 Saxon 10 或 9 这样的 XQuery 处理器,要么研究你选择的 XPath 处理器的 API,对于 XPath 有通常是 API 方法将前缀绑定到名称空间 URI,以便您可以在 XPath 表达式中使用前缀。

XMLBeans 默认不支持复杂的 XPath,包括 data() 方法和谓词过滤器等。要支持这些功能,它依赖于 Saxon 出现在类路径中。

For XMLBeans 3.x.x, we'll need to manually download Saxon 9.0.0.4j and add it to classpath

For XMLBeans 5.x.x (Latest), we'll need Saxon-HE 10.x which we can directly add it to our pom file as follows

<dependencies>
    <dependency>
        <groupId>org.apache.xmlbeans</groupId>
        <artifactId>xmlbeans</artifactId>
        <version>5.0.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.logging.log4j</groupId>
        <artifactId>log4j-core</artifactId>
        <version>2.14.1</version>
    </dependency>
    <dependency>
        <groupId>net.sf.saxon</groupId>
        <artifactId>Saxon-HE</artifactId>
        <version>10.6</version>
    </dependency>
</dependencies>

Source : https://xmlbeans.apache.org/documentation/conInstallGuide.html

其次,我们需要在解析之前手动或在代码中替换根 <xml-fragment> 开始和结束标记为其他标记名称,如 <root>,否则 XMLBeans 将解析它作为 document fragment 作为根节点架构类型而不是 document 类型,带有谓词的 XPath 将不起作用。

最后,假设我们添加了 Saxon 并处理了 <xml-fragment/> 标签。要将 CONFIDENTIAL 值作为输出,我们只需将结果转换为 SimpleValue 并对其调用 getStringValue()

这里是修改后的

import org.apache.xmlbeans.SimpleValue;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import java.io.File;
import java.io.IOException;

public class main {
    public static void main(String[] args) {
        File file = new File("test.xml");
        try {
            XmlObject xoobj = XmlObject.Factory.parse(file);
            var declareNameSpaces = "declare namespace v='urn:schemas-microsoft-com:vml' ";
            final String xpathFilter = "data(//v:shape[contains(@id,'PowerPlusWaterMarkObject')]/v:textpath/@string)";

            XmlObject[] arr = xoobj.selectPath(declareNameSpaces + xpathFilter);
            
            //Cast to SimpleValue
            SimpleValue result = (SimpleValue) arr[0];
            System.out.println(result.getStringValue());

        } catch (XmlException | IOException ex) {
            System.out.println(ex);
        }
    }
}

以下 XPath 解决了,我的问题。

final var xpathFilter = "*//v:shape/v:textpath/@string"