如何正确设置 SerDe XML 架构?
How do I set the SerDe XML schema correctly?
我知道了 XML:
<AssetCrossReferences Ordered="false">
<AssetCrossReference AssetID="F7961393-01" Type="Primary Image"/>
<AssetCrossReference AssetID="M0504-01" Type="Vendor Logo"/>
<AssetCrossReference AssetID="F7961393-02" Type="Colour Photograph"/>
</AssetCrossReferences><Specification Ordered="true">
我希望最终结果如下所示:
AssetID:F7961393-01, Type:Primary Image
AssetID:M0504-01, Type:Vendor Logo
AssetID:F7961393-02, Type:Colour Photograph
我该怎么做?
使用结构
create external table test
(
asset STRUCT<AssetID:STRING,Type:STRING>
)
ROW FORMAT SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
with serdeproperties
(
"column.xpath.asset"="/AssetCrossReferences/AssetCrossReference"
)
stored as inputformat "com.ibm.spss.hive.serde2.xml.XmlInputFormat"
outputformat "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
location "file:///yourfilepath"
tblproperties
(
"xmlinput.start"="<AssetCrossReferences",
"xmlinput.end"="</AssetCrossReferences>"
);
然后
select * from test;
我知道了 XML:
<AssetCrossReferences Ordered="false">
<AssetCrossReference AssetID="F7961393-01" Type="Primary Image"/>
<AssetCrossReference AssetID="M0504-01" Type="Vendor Logo"/>
<AssetCrossReference AssetID="F7961393-02" Type="Colour Photograph"/>
</AssetCrossReferences><Specification Ordered="true">
我希望最终结果如下所示:
AssetID:F7961393-01, Type:Primary Image
AssetID:M0504-01, Type:Vendor Logo
AssetID:F7961393-02, Type:Colour Photograph
我该怎么做?
使用结构
create external table test
(
asset STRUCT<AssetID:STRING,Type:STRING>
)
ROW FORMAT SERDE 'com.ibm.spss.hive.serde2.xml.XmlSerDe'
with serdeproperties
(
"column.xpath.asset"="/AssetCrossReferences/AssetCrossReference"
)
stored as inputformat "com.ibm.spss.hive.serde2.xml.XmlInputFormat"
outputformat "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat"
location "file:///yourfilepath"
tblproperties
(
"xmlinput.start"="<AssetCrossReferences",
"xmlinput.end"="</AssetCrossReferences>"
);
然后
select * from test;