XML 到 CSV Python
XML to CSV Python
该州的 XML 数据 (file.xml) 如下所示
<?xml version="1.0" encoding="UTF-8" standalone="true"?>
<Activity_Logs xsi:schemaLocation="http://www.cisco.com/PowerKEYDVB/Auditing
DailyActivityLog.xsd" To="2018-04-01" From="2018-04-01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.cisco.com/PowerKEYDVB/Auditing">
<ActivityRecord>
<time>2015-09-16T04:13:20Z</time>
<oper>Create_Product</oper>
<pkgEid>10</pkgEid>
<pkgName>BBCWRL</pkgName>
</ActivityRecord>
<ActivityRecord>
<time>2015-09-16T04:13:20Z</time>
<oper>Create_Product</oper>
<pkgEid>18</pkgEid>
<pkgName>CNNINT</pkgName>
</ActivityRecord>
上述 XML 文件的解析和转换为 CSV 将由以下 python 代码完成。
import csv
import xml.etree.cElementTree as ET
tree = ET.parse('file.xml')
root = tree.getroot()
data_to_csv= open('output.csv','w')
list_head=[]
Csv_writer=csv.writer(data_to_csv)
count=0
for elements in root.findall('ActivityRecord'):
List_node = []
if count == 0 :
time = elements.find('time').tag
list_head.append(time)
oper = elements.find('oper').tag
list_head.append(oper)
pkgEid = elements.find('pkgEid').tag
list_head.append(pkgEid)
pkgName = elements.find('pkgName').tag
list_head.append(pkgName)
Csv_writer.writerow(list_head)
count = +1
time = elements.find('time').text
List_node.append(time)
oper = elements.find('oper').text
List_node.append(oper)
pkgEid = elements.find('pkgEid').text
List_node.append(pkgEid)
pkgName = elements.find('pkgName').text
List_node.append(pkgName)
Csv_writer.writerow(List_node)
data_to_csv.close()
我使用的代码没有提供任何 CSV 数据。谁能告诉我到底哪里出错了?
使用 pandas
和 BeautifulSoup
您可以轻松实现预期的输出:
#Code:
import pandas as pd
import itertools
from bs4 import BeautifulSoup as b
with open("file.xml", "r") as f: # opening xml file
content = f.read()
soup = b(content, "lxml")
pkgeid = [ values.text for values in soup.findAll("pkgeid")]
pkgname = [ values.text for values in soup.findAll("pkgname")]
time = [ values.text for values in soup.findAll("time")]
oper = [ values.text for values in soup.findAll("oper")]
# For python-3.x use `zip_longest` method
# For python-2.x use 'izip_longest method
data = [item for item in itertools.zip_longest(time, oper, pkgeid, pkgname)]
df = pd.DataFrame(data=data)
df.to_csv("sample.csv",index=False, header=None)
#output in `sample.csv` file will be as follows:
2015-09-16T04:13:20Z,Create_Product,10,BBCWRL
2015-09-16T04:13:20Z,Create_Product,18,CNNINT
2018-04-01T03:30:28Z,Deactivate_Dhct,,
找到最合适的方法:
import os
import pandas as pd
from bs4 import BeautifulSoup as b
with open("file.xml", "r") as f: # opening xml file
content = f.read()
soup = b(content, "lxml")
df1 = pd.DataFrame()
for each_file in files_xlm:
with open( each_file, "r") as f: # opening xml file
content = f.read()
soup = b(content, "lxml")
list1 = []
for values in soup.findAll("activityrecord"):
if values.find("time") is None:
time = ""
else:
time = values.find("time").text
if values.find("oper") is None:
oper = ""
else:
oper = values.find("oper").text
if values.find("pkgeid") is None:
pkgeid = ""
else:
pkgeid = values.find("pkgeid").text
if values.find("pkgname") is None:
pkgname = ""
else:
pkgname = values.find("pkgname").text
if values.find("dhct") is None:
dhct = ""
else:
dhct = values.find("dhct").text
if values.find("sourceid") is None:
sourceid = ""
else:
sourceid = values.find("sourceid").text
list1.append(time+','+ oper+','+pkgeid+','+ pkgname+','+dhct+','+sourceid)
df = pd.DataFrame(list1)
df=df[0].str.split(',', expand=True)
df.columns = ['Time','Oper','PkgEid','PkgName','dhct','sourceid']
df.to_csv("new.csv",index=False)
使用 Pandas,解析所有 xml 个字段。
import xml.etree.ElementTree as ET
import pandas as pd
tree = ET.parse("file.xml")
root = tree.getroot()
get_range = lambda col: range(len(col))
l = [{r[i].tag:r[i].text for i in get_range(r)} for r in root]
df = pd.DataFrame.from_dict(l)
df.to_csv('file.csv')
如果是一次性操作,请使用 pyxmlparser。
免责声明我是图书馆的作者,它是相当新的。任何反馈表示赞赏。它是一个命令行实用程序。
2021 年答案:
您可以使用 Pandas 读取 XML 并输出 CSV
https://pandas.pydata.org/pandas-docs/dev/whatsnew/v1.3.0.html#read-and-write-xml-documents
import pandas as pd
df = pd.read_xml(<xml_or_xml_filepath>)
# ...
df.to_csv(<csv_filepath>)
有关使用的更多详细信息,请参阅官方文档:
https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.read_xml.html
该州的 XML 数据 (file.xml) 如下所示
<?xml version="1.0" encoding="UTF-8" standalone="true"?>
<Activity_Logs xsi:schemaLocation="http://www.cisco.com/PowerKEYDVB/Auditing
DailyActivityLog.xsd" To="2018-04-01" From="2018-04-01" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.cisco.com/PowerKEYDVB/Auditing">
<ActivityRecord>
<time>2015-09-16T04:13:20Z</time>
<oper>Create_Product</oper>
<pkgEid>10</pkgEid>
<pkgName>BBCWRL</pkgName>
</ActivityRecord>
<ActivityRecord>
<time>2015-09-16T04:13:20Z</time>
<oper>Create_Product</oper>
<pkgEid>18</pkgEid>
<pkgName>CNNINT</pkgName>
</ActivityRecord>
上述 XML 文件的解析和转换为 CSV 将由以下 python 代码完成。
import csv
import xml.etree.cElementTree as ET
tree = ET.parse('file.xml')
root = tree.getroot()
data_to_csv= open('output.csv','w')
list_head=[]
Csv_writer=csv.writer(data_to_csv)
count=0
for elements in root.findall('ActivityRecord'):
List_node = []
if count == 0 :
time = elements.find('time').tag
list_head.append(time)
oper = elements.find('oper').tag
list_head.append(oper)
pkgEid = elements.find('pkgEid').tag
list_head.append(pkgEid)
pkgName = elements.find('pkgName').tag
list_head.append(pkgName)
Csv_writer.writerow(list_head)
count = +1
time = elements.find('time').text
List_node.append(time)
oper = elements.find('oper').text
List_node.append(oper)
pkgEid = elements.find('pkgEid').text
List_node.append(pkgEid)
pkgName = elements.find('pkgName').text
List_node.append(pkgName)
Csv_writer.writerow(List_node)
data_to_csv.close()
我使用的代码没有提供任何 CSV 数据。谁能告诉我到底哪里出错了?
使用 pandas
和 BeautifulSoup
您可以轻松实现预期的输出:
#Code:
import pandas as pd
import itertools
from bs4 import BeautifulSoup as b
with open("file.xml", "r") as f: # opening xml file
content = f.read()
soup = b(content, "lxml")
pkgeid = [ values.text for values in soup.findAll("pkgeid")]
pkgname = [ values.text for values in soup.findAll("pkgname")]
time = [ values.text for values in soup.findAll("time")]
oper = [ values.text for values in soup.findAll("oper")]
# For python-3.x use `zip_longest` method
# For python-2.x use 'izip_longest method
data = [item for item in itertools.zip_longest(time, oper, pkgeid, pkgname)]
df = pd.DataFrame(data=data)
df.to_csv("sample.csv",index=False, header=None)
#output in `sample.csv` file will be as follows:
2015-09-16T04:13:20Z,Create_Product,10,BBCWRL
2015-09-16T04:13:20Z,Create_Product,18,CNNINT
2018-04-01T03:30:28Z,Deactivate_Dhct,,
找到最合适的方法:
import os
import pandas as pd
from bs4 import BeautifulSoup as b
with open("file.xml", "r") as f: # opening xml file
content = f.read()
soup = b(content, "lxml")
df1 = pd.DataFrame()
for each_file in files_xlm:
with open( each_file, "r") as f: # opening xml file
content = f.read()
soup = b(content, "lxml")
list1 = []
for values in soup.findAll("activityrecord"):
if values.find("time") is None:
time = ""
else:
time = values.find("time").text
if values.find("oper") is None:
oper = ""
else:
oper = values.find("oper").text
if values.find("pkgeid") is None:
pkgeid = ""
else:
pkgeid = values.find("pkgeid").text
if values.find("pkgname") is None:
pkgname = ""
else:
pkgname = values.find("pkgname").text
if values.find("dhct") is None:
dhct = ""
else:
dhct = values.find("dhct").text
if values.find("sourceid") is None:
sourceid = ""
else:
sourceid = values.find("sourceid").text
list1.append(time+','+ oper+','+pkgeid+','+ pkgname+','+dhct+','+sourceid)
df = pd.DataFrame(list1)
df=df[0].str.split(',', expand=True)
df.columns = ['Time','Oper','PkgEid','PkgName','dhct','sourceid']
df.to_csv("new.csv",index=False)
使用 Pandas,解析所有 xml 个字段。
import xml.etree.ElementTree as ET
import pandas as pd
tree = ET.parse("file.xml")
root = tree.getroot()
get_range = lambda col: range(len(col))
l = [{r[i].tag:r[i].text for i in get_range(r)} for r in root]
df = pd.DataFrame.from_dict(l)
df.to_csv('file.csv')
如果是一次性操作,请使用 pyxmlparser。
免责声明我是图书馆的作者,它是相当新的。任何反馈表示赞赏。它是一个命令行实用程序。
2021 年答案:
您可以使用 Pandas 读取 XML 并输出 CSV
https://pandas.pydata.org/pandas-docs/dev/whatsnew/v1.3.0.html#read-and-write-xml-documents
import pandas as pd
df = pd.read_xml(<xml_or_xml_filepath>)
# ...
df.to_csv(<csv_filepath>)
有关使用的更多详细信息,请参阅官方文档: https://pandas.pydata.org/pandas-docs/dev/reference/api/pandas.read_xml.html