如何根据组对 xml 节点值进行排序?
How to sorted xml node values within against a group?
我输入了csv文件:
col2 col3 col1
a1 b1 c1
a2 b2 c2
a3 b3 c3
转换为 xml 文件的脚本:
import csv
csvFile = r'C:\Users\tkp\Desktop\Holdings_Download\testxml.csv'
xmlFile = r'C:\Users\tkp\Desktop\Holdings_Download\testxml.xml'
csvData = csv.reader(open(csvFile), delimiter=';')
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n" +'<TabularXml>' + "\n" )
xmlData.write('<Sheet>' + "\n" )
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write(' '+'<Row' + ' ' +'srcidx='+'"'+str(rowNum+1)+'"'+ '>' + "\n")
for i in range (len(tags)):
if tags[i]=='col1':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="1"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="String"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="1"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="String"'+' '+'>' \
+ "\n")
if tags[i]=='col2':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="2"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="DateTime"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="2"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="DateTime"'+' '+'>' \
+ "\n")
if tags[i]=='col3':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="3"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="Int32"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="3"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="Int32"'+' '+'>' \
+ "\n")
xmlData.write(' '+'</Row>' + "\n")
rowNum +=1
xmlData.write('</Sheet>' + "\n")
xmlData.write('</TabularXml>' + "\n")
xmlData.close()
输出xml文件:
<?xml version="1.0" encoding="utf-8"?>
<TabularXml>
<Sheet>
<Row srcidx="2">
<Cell idx="2" name="col2" type="DateTime" >a1</Cell>
<Cell idx="3" name="col3" type="Int32" >b1</Cell>
<Cell idx="1" name="col1" type="String" >c1</Cell>
</Row>
<Row srcidx="3">
<Cell idx="2" name="col2" type="DateTime" >a2</Cell>
<Cell idx="3" name="col3" type="Int32" >b2</Cell>
<Cell idx="1" name="col1" type="String" >c2</Cell>
</Row>
<Row srcidx="4">
<Cell idx="2" name="col2" type="DateTime" >a3</Cell>
<Cell idx="3" name="col3" type="Int32" >b3</Cell>
<Cell idx="1" name="col1" type="String" >c3</Cell>
</Row>
</Sheet>
</TabularXml>
如何按 'Cell' 个节点和 'idx' 个值对数据进行排序?
预期结果:
<?xml version="1.0" encoding="utf-8"?>
<TabularXml>
<Sheet>
<Row srcidx="2">
<Cell idx="1" name="col1" type="String" >c1</Cell>
<Cell idx="2" name="col2" type="DateTime" >a1</Cell>
<Cell idx="3" name="col3" type="Int32" >b1</Cell>
</Row>
<Row srcidx="3">
<Cell idx="1" name="col1" type="String" >c2</Cell>
<Cell idx="2" name="col2" type="DateTime" >a2</Cell>
<Cell idx="3" name="col3" type="Int32" >b2</Cell>
</Row>
<Row srcidx="4">
<Cell idx="1" name="col1" type="String" >c3</Cell>
<Cell idx="2" name="col2" type="DateTime" >a3</Cell>
<Cell idx="3" name="col3" type="Int32" >b3</Cell>
</Row>
</Sheet>
</TabularXml>
您始终拥有相同的列名称,因此您可以使用它们来创建订单
order = [
column_names.index('col1'),
column_names.index('col2'),
column_names.index('col3')
]
然后你可以用它来排序数据
column_names = [
column_names[order[0]],
column_names[order[1]],
column_names[order[2]]
]
row = [
row[order[0]],
row[order[1]],
row[order[2]]
]
对于更多专栏,您甚至可以使用 for
-loop,但我跳过了这个想法。
最少的工作代码。
我使用io
只是为了模拟内存中的文件。
我使用 next()
从 csv
中获取 first/single 行
text = '''col2;col3;col1
a1;b1;c1
a2;b2;c2
a3;b3;c3'''
import csv
import io
csv_reader = csv.reader(io.StringIO(text), delimiter=';')
column_names = next(csv_reader)
order = [
column_names.index('col1'),
column_names.index('col2'),
column_names.index('col3')
]
print('order:', order)
print('old column_names:', column_names)
column_names = [
column_names[order[0]],
column_names[order[1]],
column_names[order[2]]
]
print('new column_names:', column_names)
for row in csv_reader:
print('---')
print('old row:', row)
row = [
row[order[0]],
row[order[1]],
row[order[2]]
]
print('new row:', row)
结果:
order: [2, 0, 1]
old column_names: ['col2', 'col3', 'col1']
new column_names: ['col1', 'col2', 'col3']
---
old row: ['a1', 'b1', 'c1']
new row: ['c1', 'a1', 'b1']
---
old row: ['a2', 'b2', 'c2']
new row: ['c2', 'a2', 'b2']
---
old row: ['a3', 'b3', 'c3']
new row: ['c3', 'a3', 'b3']
我输入了csv文件:
col2 col3 col1
a1 b1 c1
a2 b2 c2
a3 b3 c3
转换为 xml 文件的脚本:
import csv
csvFile = r'C:\Users\tkp\Desktop\Holdings_Download\testxml.csv'
xmlFile = r'C:\Users\tkp\Desktop\Holdings_Download\testxml.xml'
csvData = csv.reader(open(csvFile), delimiter=';')
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0" encoding="utf-8"?>' + "\n" +'<TabularXml>' + "\n" )
xmlData.write('<Sheet>' + "\n" )
rowNum = 0
for row in csvData:
if rowNum == 0:
tags = row
# replace spaces w/ underscores in tag names
for i in range(len(tags)):
tags[i] = tags[i].replace(' ', '_')
else:
xmlData.write(' '+'<Row' + ' ' +'srcidx='+'"'+str(rowNum+1)+'"'+ '>' + "\n")
for i in range (len(tags)):
if tags[i]=='col1':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="1"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="String"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="1"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="String"'+' '+'>' \
+ "\n")
if tags[i]=='col2':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="2"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="DateTime"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="2"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="DateTime"'+' '+'>' \
+ "\n")
if tags[i]=='col3':
if row[i] !='':
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="3"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="Int32"'+' '+'>' \
+ row[i] + '</' + 'Cell' + '>' + "\n")
else:
xmlData.write(' ' +'<' + 'Cell' + ' ' +'idx="3"'+' '+ 'name='+'"'+tags[i]+'"'+' '+'type="Int32"'+' '+'>' \
+ "\n")
xmlData.write(' '+'</Row>' + "\n")
rowNum +=1
xmlData.write('</Sheet>' + "\n")
xmlData.write('</TabularXml>' + "\n")
xmlData.close()
输出xml文件:
<?xml version="1.0" encoding="utf-8"?>
<TabularXml>
<Sheet>
<Row srcidx="2">
<Cell idx="2" name="col2" type="DateTime" >a1</Cell>
<Cell idx="3" name="col3" type="Int32" >b1</Cell>
<Cell idx="1" name="col1" type="String" >c1</Cell>
</Row>
<Row srcidx="3">
<Cell idx="2" name="col2" type="DateTime" >a2</Cell>
<Cell idx="3" name="col3" type="Int32" >b2</Cell>
<Cell idx="1" name="col1" type="String" >c2</Cell>
</Row>
<Row srcidx="4">
<Cell idx="2" name="col2" type="DateTime" >a3</Cell>
<Cell idx="3" name="col3" type="Int32" >b3</Cell>
<Cell idx="1" name="col1" type="String" >c3</Cell>
</Row>
</Sheet>
</TabularXml>
如何按 'Cell' 个节点和 'idx' 个值对数据进行排序?
预期结果:
<?xml version="1.0" encoding="utf-8"?>
<TabularXml>
<Sheet>
<Row srcidx="2">
<Cell idx="1" name="col1" type="String" >c1</Cell>
<Cell idx="2" name="col2" type="DateTime" >a1</Cell>
<Cell idx="3" name="col3" type="Int32" >b1</Cell>
</Row>
<Row srcidx="3">
<Cell idx="1" name="col1" type="String" >c2</Cell>
<Cell idx="2" name="col2" type="DateTime" >a2</Cell>
<Cell idx="3" name="col3" type="Int32" >b2</Cell>
</Row>
<Row srcidx="4">
<Cell idx="1" name="col1" type="String" >c3</Cell>
<Cell idx="2" name="col2" type="DateTime" >a3</Cell>
<Cell idx="3" name="col3" type="Int32" >b3</Cell>
</Row>
</Sheet>
</TabularXml>
您始终拥有相同的列名称,因此您可以使用它们来创建订单
order = [
column_names.index('col1'),
column_names.index('col2'),
column_names.index('col3')
]
然后你可以用它来排序数据
column_names = [
column_names[order[0]],
column_names[order[1]],
column_names[order[2]]
]
row = [
row[order[0]],
row[order[1]],
row[order[2]]
]
对于更多专栏,您甚至可以使用 for
-loop,但我跳过了这个想法。
最少的工作代码。
我使用io
只是为了模拟内存中的文件。
我使用 next()
从 csv
text = '''col2;col3;col1
a1;b1;c1
a2;b2;c2
a3;b3;c3'''
import csv
import io
csv_reader = csv.reader(io.StringIO(text), delimiter=';')
column_names = next(csv_reader)
order = [
column_names.index('col1'),
column_names.index('col2'),
column_names.index('col3')
]
print('order:', order)
print('old column_names:', column_names)
column_names = [
column_names[order[0]],
column_names[order[1]],
column_names[order[2]]
]
print('new column_names:', column_names)
for row in csv_reader:
print('---')
print('old row:', row)
row = [
row[order[0]],
row[order[1]],
row[order[2]]
]
print('new row:', row)
结果:
order: [2, 0, 1]
old column_names: ['col2', 'col3', 'col1']
new column_names: ['col1', 'col2', 'col3']
---
old row: ['a1', 'b1', 'c1']
new row: ['c1', 'a1', 'b1']
---
old row: ['a2', 'b2', 'c2']
new row: ['c2', 'a2', 'b2']
---
old row: ['a3', 'b3', 'c3']
new row: ['c3', 'a3', 'b3']