使用 xslt 根据子字符串重构 XML

Restructure XML based on substring using xslt

我有一个问题,我不确定它是否可以完成。我正在搜索 Stackpages,但我仍然没有任何线索。

在有以下输入XML

<?xml version="1.0" encoding="windows-1252"?>
<files count="5" filemask="*.*">
    <folder>D:\PlanetPress\PPWorkdir\debug\output210107\DAGSET20210107\
        <file>
            <filename>AB001_AA107_00024788_1_02001_2001_Brief.pdf</filename>
        </file>
        <file>
            <filename>AB001_AA107_00024788_2_02001_2001_NotaXML.pdf</filename>
        </file>
        <file>
            <filename>AB001_AA107_00024788_5_02001_02001_Prov.pdf</filename>
        </file>
        <file>
            <filename>ZZ001_AA117_00030393_1_80001__Brief.pdf</filename>
        </file>
        <file>
            <filename>ZZ001_AA117_00030393_2_80001__NotaXML.pdf</filename>
        </file>
    </folder>
</files>

我想将其更改为

<files>
    <ID>AB001_AA107_00024788
        <filename>AB001_AA107_00024788_1_02001_2001_Brief.pdf</filename>
        <filename>AB001_AA107_00024788_2_02001_2001_NotaXML.pdf</filename>
        <filename>AB001_AA107_00024788_5_02001_02001_Prov.pdf</filename>
    </ID>
    <ID>ZZ001_AA117_00030393
        <filename>ZZ001_AA117_00030393_1_80001__Brief.pdf</filename>
        <filename>ZZ001_AA117_00030393_2_80001__NotaXML.pdf</filename>
    </ID>
</files>

这个例子有5条记录,但可以是900条甚至更多。

现在我已经拆分到 ID

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:template match="/files/folder">
        <xsl:for-each select="file">
                <attachment>
                        <xsl:element name="fileid">
                        <xsl:for-each select="filename/text()">
                                <xsl:value-of select="substring(., 1, 20)"/>
                                <xsl:if test="not(position() = last())">,</xsl:if>
                        </xsl:for-each>
                       </xsl:element>
                         <xsl:element name="filename">
                        <xsl:for-each select="filename/text()">
                                <xsl:value-of select="."/>
                        </xsl:for-each>
                       </xsl:element>
                </attachment>
        </xsl:for-each>
    </xsl:template>

  <xsl:template match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()"/>
    </xsl:copy>
  </xsl:template>
</xsl:stylesheet>

这给了我

<files count="127" filemask="*.*">
    <attachment>
        <fileid>GH001_GH107_00024788</fileid>
        <filename>GH001_GH107_00024788_1_02001_2001_Brief.pdf</filename>
    </attachment>
    <attachment>
        <fileid>GH001_GH107_00024788</fileid>
        <filename>GH001_GH107_00024788_2_02001_2001_NotaXML.pdf</filename>
    </attachment>
    <attachment>
        <fileid>GH001_GH107_00024788</fileid>
        <filename>GH001_GH107_00024788_5_02001_02001_Prov.pdf</filename>
    </attachment>
    <attachment>
        <fileid>GH001_GH117_00030393</fileid>
        <filename>GH001_GH117_00030393_1_80001.pdf</filename>
    </attachment>
    <attachment>
        <fileid>GH001_GH117_00030393</fileid>
        <filename>GH001_GH117_00030393_2_80001__Notam_XML.pdf</filename>
    </attachment>

希望大家多多指教,即使是​​Muenchian我也搞不定。 我使用的第二个 XML

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
              version="1.0">
<xsl:output method="xml" indent="yes"/>

<xsl:key name="groups" match="/files/attachment" use="fileid" />

<xsl:template match="/">
    <File>
    <xsl:apply-templates select="//attachment[generate-id() = generate-id(key('groups', fileid)[1])]"/>
    </File>
</xsl:template>

<xsl:template match="attachment">
    <Name><xsl:value-of select="fileid"/></Name>
        <Bestanden>
         <xsl:for-each select="key('groups', filename)">
                    <Name><xsl:value-of select="filename"/></Name>
         </xsl:for-each>
        </Bestanden>
</xsl:template>
</xsl:stylesheet>

给我.....

<File>
<Name>GH001_GH107_00024788</Name>
    <Bestanden/>
<Name>GH001_GH117_00030393</Name>
    <Bestanden/>
<Name>GH001_GH118_00024722</Name>
    <Bestanden/>
<Name>GH001_GH118_00025554</Name>
    <Bestanden/>
'''

您的样式表显示 version="2.0"。在 XSLT 2.0 中,这样做很简单:

<xsl:stylesheet version="2.0" 
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:strip-space elements="*"/>

<xsl:template match="/files">
    <xsl:copy>
        <xsl:for-each-group select="folder/file/filename" group-by="substring(., 1, 20)">
            <ID>
                <xsl:value-of select="current-grouping-key()"/>
                <xsl:copy-of select="current-group()"/>
            </ID>
        </xsl:for-each-group>
    </xsl:copy>
</xsl:template>

</xsl:stylesheet>