Excel XML 到 Java 对象

Excel XML to Java Object

我有一个 xml 文件要用 Excel 打开的情况。一位客户向我发送了这种类型的 xml:

<?xml version="1.0"?>
<?mso-application progid="Excel.Sheet"?>
<Workbook xmlns="urn:schemas-microsoft-com:office:spreadsheet"
 xmlns:o="urn:schemas-microsoft-com:office:office"
 xmlns:x="urn:schemas-microsoft-com:office:excel"
 xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet"
 xmlns:html="http://www.w3.org/TR/REC-html40">
 <DocumentProperties xmlns="urn:schemas-microsoft-com:office:office">
  <LastAuthor>Microsoft Office User</LastAuthor>
  <Created>1996-10-14T23:33:28Z</Created>
  <LastSaved>2021-03-10T13:02:44Z</LastSaved>
  <Version>16.00</Version>
 </DocumentProperties>
 <OfficeDocumentSettings xmlns="urn:schemas-microsoft-com:office:office">
  <AllowPNG/>
 </OfficeDocumentSettings>
 <ExcelWorkbook xmlns="urn:schemas-microsoft-com:office:excel">
  <WindowHeight>15940</WindowHeight>
  <WindowWidth>28040</WindowWidth>
  <WindowTopX>380</WindowTopX>
  <WindowTopY>500</WindowTopY>
  <ProtectStructure>False</ProtectStructure>
  <ProtectWindows>False</ProtectWindows>
 </ExcelWorkbook>
 <Styles>
  <Style ss:ID="Default" ss:Name="Normal">
   <Alignment ss:Vertical="Bottom"/>
   <Borders/>
   <Font ss:FontName="Calibri" x:Family="Swiss" ss:Size="12" ss:Color="#000000"/>
   <Interior/>
   <NumberFormat/>
   <Protection/>
  </Style>
  <Style ss:ID="s62">
   <Alignment ss:Vertical="Bottom"/>
   <Borders/>
   <Font ss:FontName="Arial"/>
   <Interior/>
   <NumberFormat/>
   <Protection/>
  </Style>
  <Style ss:ID="s63">
   <Alignment ss:Vertical="Bottom"/>
   <Borders/>
   <Font ss:FontName="Trebuchet MS" ss:Color="#000000" ss:Bold="1"/>
   <Interior ss:Color="#D3D3D3" ss:Pattern="Solid"/>
   <NumberFormat/>
   <Protection/>
  </Style>
  <Style ss:ID="s64">
   <Alignment ss:Vertical="Bottom"/>
   <Borders/>
   <Font ss:FontName="Trebuchet MS" ss:Color="#000000"/>
   <Interior/>
   <NumberFormat/>
   <Protection/>
  </Style>
  <Style ss:ID="s65">
   <Alignment ss:Vertical="Bottom"/>
   <Borders/>
   <Font ss:FontName="Trebuchet MS" ss:Color="#993300" ss:Bold="1"/>
   <Interior ss:Color="#FFFF00" ss:Pattern="Solid"/>
   <NumberFormat/>
   <Protection/>
  </Style>
 </Styles>
 <Worksheet ss:Name="SITES_INFO">
  <Table ss:ExpandedColumnCount="36" ss:ExpandedRowCount="2" x:FullColumns="1"
   x:FullRows="1" ss:StyleID="s62" ss:DefaultColumnWidth="65"
   ss:DefaultRowHeight="13">
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="75" ss:Span="4"/>
   <Column ss:Index="6" ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="175"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="90"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="225"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="200"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="225"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="100" ss:Span="1"/>
   <Column ss:Index="13" ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="125"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="100"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="175"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="125" ss:Span="2"/>
   <Column ss:Index="19" ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="175"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="160"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="150"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="160" ss:Span="1"/>
   <Column ss:Index="24" ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="250"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="200" ss:Span="1"/>
   <Column ss:Index="27" ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="160"
    ss:Span="1"/>
   <Column ss:Index="29" ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="175"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="160"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="215"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="175"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="160" ss:Span="1"/>
   <Column ss:Index="35" ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="200"/>
   <Column ss:StyleID="s62" ss:AutoFitWidth="0" ss:Width="100"/>
   <Row ss:AutoFitHeight="0" ss:Height="20">
    <Cell ss:StyleID="s63"><Data ss:Type="String">Protocol</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Site Number</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">PI Salutation</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">PI Last Name</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">PI First Name</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">PI Email</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">PI Phone Number</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Name</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Address 1</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Address 2</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment City</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment State</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Province</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Zip</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Country ISO Code</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Country</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Phone</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment FAX</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Contact First Name</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Contact Last Name</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Contact Email</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Contact Phone</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Drug Shipment Contact Role</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Name</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Address 1</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Address 2</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center City</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center State</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Province</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Zip</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Country ISO Code</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Country</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Phone</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center Fax</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Patient Treatment Center TimeZone</Data></Cell>
    <Cell ss:StyleID="s63"><Data ss:Type="String">Transaction Flag</Data></Cell>
   </Row>
   <Row>
    <Cell ss:StyleID="s64"><Data ss:Type="String">fgregre</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">0382</Data></Cell>
    <Cell ss:StyleID="s64"/>
    <Cell ss:StyleID="s64"><Data ss:Type="String">gregreger</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">gergre</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">vregregerge</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ergreger</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">regreger</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">8 Haaliya st</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">vervregerge</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">Haifa</Data></Cell>
    <Cell ss:StyleID="s64"/>
    <Cell ss:StyleID="s64"><Data ss:Type="String">N/A</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">gergere</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ISR</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ISRAEL</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">gergereg</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">gergreger</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">regerger</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">regerge</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">rgergerg</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ergerge</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ergerge</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">Rambam Medical Center</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">regergerge</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">Hematology Department</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ergergre</Data></Cell>
    <Cell ss:StyleID="s64"/>
    <Cell ss:StyleID="s65"><Data ss:Type="String">N/A</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ergreegre</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ISR</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">ergerger</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">regergreger</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">regregerge</Data></Cell>
    <Cell ss:StyleID="s64"><Data ss:Type="String">(GMT+02:00) Jerusalem</Data></Cell>
    <Cell ss:StyleID="s65"><Data ss:Type="String">UPDATE</Data></Cell>
   </Row>
  </Table>
  <WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
   <Selected/>
   <LeftColumnVisible>28</LeftColumnVisible>
   <Panes>
    <Pane>
     <Number>3</Number>
     <ActiveRow>5</ActiveRow>
     <ActiveCol>34</ActiveCol>
    </Pane>
   </Panes>
   <ProtectObjects>False</ProtectObjects>
   <ProtectScenarios>False</ProtectScenarios>
  </WorksheetOptions>
 </Worksheet>
 <Worksheet ss:Name="USER_INFO">
  <Table ss:ExpandedColumnCount="1" ss:ExpandedRowCount="1" x:FullColumns="1"
   x:FullRows="1" ss:StyleID="s62" ss:DefaultColumnWidth="65"
   ss:DefaultRowHeight="13">
  </Table>
  <WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
   <Panes>
    <Pane>
     <Number>3</Number>
     <ActiveRow>22</ActiveRow>
     <ActiveCol>7</ActiveCol>
    </Pane>
   </Panes>
   <ProtectObjects>False</ProtectObjects>
   <ProtectScenarios>False</ProtectScenarios>
  </WorksheetOptions>
 </Worksheet>
</Workbook>

我正在通过 sftp 接收此文件,我正在将其作为字符串读取,获取其内容。我尝试使用 Apache POI,认为我可以将其作为 xls 文件读取并将其映射到 class.

这是我试图阅读它的方式:

   private void processFileData(String stringData, String filename, String extension)
    {
        InputStream inp = IOUtils.toInputStream(stringData, StandardCharsets.UTF_8);
        List<MULSite> siteList = new ArrayList<MULSite>();

        try {
            inp = FileMagic.prepareToCheckMagic(inp);
            Workbook workbook = WorkbookFactory.create(inp);
            Iterator<Row> iterator = workbook.getSheet("SITES_INFO").iterator();
            while (iterator.hasNext())
            {
                MULSite site = new MULSite();
                site.parse(iterator.next(),"xls", filename);
                DtoValidatorResult isValid = site.validate();
                if(isValid.getValid())
                {
                    splittableDtoMap.put(site.getSiteNumber(), site.toString());
                }
                else {
                    throw new RetrieverException("Issue encountered at " + filename);
                }
            }
        } catch (IOException | RetrieverException | JAXBException e) {
            e.printStackTrace();
        }
    }

但是当它到达:Workbook workbook = WorkbookFactory.create(inp); 时它会抛出以下错误:java.lang.IllegalArgumentException: Your InputStream was neither an OLE2 stream, nor an OOXML stream

我对如何处理此类文件有点困惑,我尝试将扩展名从 .xml 更改为 .xls,Excel 可以两种方式完美打开它,但 Apache POI 不会接受吧,我不确定是文件的问题还是我用InputStream读取它的方式。

遗憾的是,这是不可能的,Apache POI 不支持 Office XML,它从未在库中实现,因为它很快被 Microsoft 弃用了。

我已经放弃尝试使用 Apache POI 执行此操作,并将使用 XML 解析器将其解析为 DOM。