python lxml - loop/iterate 到 excel 行并将每一行保存为一个 xml
python lxml - loop/iterate through excel rows and save each row as one xml
问题是第二个 xml 文件还包含来自 excel 行的第一次迭代的数据,第三个 xml 文件也包含来自第一行和第二行的所有数据
为此工作了几个小时,还是想不通
from lxml import etree
import openpyxl
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
####open excel file speadsheet
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# build the xml tree
for i in range(2,6):
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value),'consolidatedDate': str(sheet.cell(row=i,column=2).value), 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value), 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value) })
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = sheet.cell(row=i,column=21).value
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = sheet.cell(row=i,column=13).value
date = etree.SubElement(accountsPayableLedger, 'date')
date.text = sheet.cell(row=i,column=2).value
amount = etree.SubElement(accountsPayableLedger, 'amount')
amount.text = sheet.cell(row=i,column=16).value
account_no = etree.SubElement(accountsPayableLedger, 'accountNo')
account_no.text = sheet.cell(row=i,column=19).value
cost1 = etree.SubElement(accountsPayableLedger, 'costCategoryId')
cost1.text = sheet.cell(row=i,column=15).value
currency_code = etree.SubElement(accountsPayableLedger, 'currencyCode')
currency_code.text = sheet.cell(row=i,column=12).value
party_id = etree.SubElement(accountsPayableLedger, 'partyId')
party_id.text = sheet.cell(row=i,column=20).value
bpaccount = etree.SubElement(accountsPayableLedger, 'bpAccountNo')
bpaccount.text = sheet.cell(row=i,column=20).value
doc = etree.ElementTree(xmlRoot)
doc.write( str(sheet.cell(row=i,column=13).value)+".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
如描述
这对于每一行 excel 和每一行一个 .xml 文件
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://xml.datev.de/bedi/tps/ledger/v040" generating_system="DATEV manuell" generator_info="DATEV Musterdaten" version="4.0" xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="1337.01">
<accountsPayableLedger>
<bookingText>amazon</bookingText>
<invoiceId>1</invoiceId>
</accountsPayableLedger>
</consolidate>
</LedgerImport>
同一个 xmlRoot
对象被多次重复使用。您需要为 for
循环中的每次迭代创建一个新的根元素。
创建根元素的代码可以放在函数中。这是一个简化的例子:
from lxml import etree
def makeroot():
return etree.Element("LedgerImport")
for i in range(2, 6):
xmlRoot = makeroot()
consolidate = etree.SubElement(xmlRoot, 'consolidate',
attrib={'consolidatedAmount': str(i)})
doc = etree.ElementTree(xmlRoot)
doc.write(str(i) + ".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
或者,考虑 XSLT, the special-purpose declarative langauge designed to transform XML files, which lxml
does support. Specifically, pass parameters 从 Python 到样式表以转换模板 XML(与将参数传递给准备好的 SQL 语句不同):
XML 模板 (包括所有顶级命名空间)
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://xml.datev.de/bedi/tps/ledger/v040"
generating_system="DATEV manuell"
generator_info="DATEV Musterdaten" version="4.0"
xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="???">
<accountsPayableLedger>
<bookingText>???</bookingText>
<invoiceId>???</invoiceId>
<date>???</date>
<amount>???</amount>
<accountNo>???</accountNo>
<costCategoryId>???</costCategoryId>
<currencyCode>???</currencyCode>
<partyId>???</partyId>
<bpAccountNo>???</bpAccountNo>
</accountsPayableLedger>
</consolidate>
</LedgerImport>
XSLT (另存为 .xsl 文件,由于 XML 中的默认命名空间稍长)
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:doc="http://xml.datev.de/bedi/tps/ledger/v040">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- INITIALIZE PARAMETERS -->
<xsl:param name="prm_consolidate" />
<xsl:param name="prm_bookingText" />
<xsl:param name="prm_invoiceId" />
<xsl:param name="prm_date" />
<xsl:param name="prm_amount" />
<xsl:param name="prm_accountNo" />
<xsl:param name="prm_costCategoryId" />
<xsl:param name="prm_currencyCode" />
<xsl:param name="prm_partyId" />
<xsl:param name="prm_bpAccountNo" />
<!-- IDENTITY TRANSFORM -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<!-- REWRITE TITLE TEXT -->
<xsl:template match="doc:accountsPayableLedger">
<xsl:copy>
<xsl:element name="consolidate" namespace="http://xml.datev.de/bedi/tps/ledger/v040">
<xsl:attribute name="consolidatedAmount"><xsl:value-of select="$prm_consolidate"/></xsl:attribute>
</xsl:element>
<xsl:element name="bookingText" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_bookingText"/></xsl:element>
<xsl:element name="invoiceId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_invoiceId"/></xsl:element>
<xsl:element name="date" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_date"/></xsl:element>
<xsl:element name="amount" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_amount"/></xsl:element>
<xsl:element name="accountNo" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_accountNo"/></xsl:element>
<xsl:element name="costCategoryId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_costCategoryId"/></xsl:element>
<xsl:element name="currencyCode" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_currencyCode"/></xsl:element>
<xsl:element name="partyId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_partyId"/></xsl:element>
<xsl:element name="bpAccountNo" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_bpAccountNo"/></xsl:element>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
Python(无DOM元素建筑)
import lxml.etree as et
# LOAD XML AND XSL
xml = et.parse('/path/to/Template.xml')
xsl = et.parse('/path/to/XSLTScript.xsl')
### OPEN EXCEL SPREADSHEET
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# LOOP THROUGH ROWS
for i in range(2, 6):
consolidate = et.XSLT.strparam(sheet.cell(row=i,column=16).value)
account = et.XSLT.strparam(sheet.cell(row=i,column=21).value)
invoice = et.XSLT.strparam(sheet.cell(row=i,column=13).value)
date = et.XSLT.strparam(sheet.cell(row=i,column=2).value)
amount = et.XSLT.strparam(sheet.cell(row=i,column=16).value)
account_no = et.XSLT.strparam(sheet.cell(row=i,column=19).value)
cost1 = et.XSLT.strparam(sheet.cell(row=i,column=15).value)
currency_code = et.XSLT.strparam(sheet.cell(row=i,column=12).value)
party_id = et.XSLT.strparam(sheet.cell(row=i,column=20).value)
bpaccount = et.XSLT.strparam(sheet.cell(row=i,column=20).value)
# PASS PARAMETER TO XSLT
transform = et.XSLT(xsl)
result = transform(xml, prm_consolidate = consolidate,
prm_bookingText=account,
prm_invoiceId = invoice,
prm_date = date,
prm_amount = amount,
prm_account_no = account_no,
prm_costCategoryId = cost1,
prm_currencyCode = currency_code,
prm_partyId = party_id,
prm_bpAccountNo = bpaccount)
# SAVE XML TO FILE
with open('/path/to/Output_Row{}.xml'.format(i), 'wb') as f:
f.write(result)
在@mzjn 指出了你的基本错误之后,这是我为了好玩而制作的东西 - 你可以使用声明性映射创建嵌套 XML,而不是自己费力地调用 etree.SubElement
。
方法如下。假设基本情况是这样的:
from lxml import etree
import openpyxl
ns = {
None: 'http://xml.datev.de/bedi/tps/ledger/v040',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
}
mapping = {
'_tag': '{' + ns[None] + '}LedgerImport',
'attrib': {
'version': '4.0',
'{' + ns['xsi'] + '}schemaLocation': 'http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd',
'generator_info': 'DATEV Musterdaten',
'generating_system': 'DATEV manuell',
},
'nsmap': ns,
'_children': [{
'_tag': 'consolidate',
'attrib': {
'consolidatedAmount': lambda: sheet.cell(i, 16).value,
'consolidatedDate': lambda: sheet.cell(i, 2).value,
'consolidatedInvoiceId': lambda: sheet.cell(i, 13).value,
'consolidatedCurrencyCode': lambda: sheet.cell(i, 12).value,
},
'_children': [{
'_tag': 'accountsPayableLedger',
'_children': [
{'_tag': 'bookingText', '_text': lambda: sheet.cell(i, 21).value},
{'_tag': 'invoiceId', '_text': lambda: sheet.cell(i, 13).value},
{'_tag': 'date', '_text': lambda: sheet.cell(i, 2).value},
{'_tag': 'amount', '_text': lambda: sheet.cell(i, 16).value},
{'_tag': 'accountNo', '_text': lambda: sheet.cell(i, 19).value},
{'_tag': 'costCategoryId', '_text': lambda: sheet.cell(i, 15).value},
{'_tag': 'currencyCode', '_text': lambda: sheet.cell(i, 12).value},
{'_tag': 'partyId', '_text': lambda: sheet.cell(i, 20).value},
{'_tag': 'bpAccountNo', '_text': lambda: sheet.cell(i, 20).value},
]
}]
}],
}
嵌套的字典类似于您的最终 XML 文档。它的键也类似于 etree.Element()
和 etree.SubElement()
采用的参数,增加了 _text
和 _children
。
现在我们可以定义一个递归辅助函数,它接受这个输入树并将其转换为具有相同配置的嵌套 XML 树。作为奖励,我们可以执行 lambda
函数,它允许我们动态计算属性值和文本:
def build_tree(template, parent=None):
# prepare a dict for calling etree.Element()/etree.SubElement()
params = {k: v for k, v in template.items() if k not in ['_children', '_text']}
# calculate any dynamic attribute values
for name in params.get('attrib', {}):
value = params['attrib'][name]
params['attrib'][name] = str(value() if callable(value) else value)
if parent is None:
node = etree.Element(**params)
else:
params['_parent'] = parent
node = etree.SubElement(**params)
# calculate (if necessary) and set the node text
if '_text' in template:
if callable(template['_text']):
node.text = str(template['_text']())
else:
node.text = str(template['_text']) if template['_text'] else template['_text']
# recurse into children, if any
for child in template.get('_children', []):
build_tree(child, node)
return node
我们可以循环调用它:
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
for i in range(2,6):
root = build_tree(mapping)
doc = etree.ElementTree(root)
name = "%s.xml" % sheet.cell(i, 13).value
doc.write(name, xml_declaration=True, encoding='utf-8', pretty_print=True)
这应该会生成一些嵌套良好的 XML 文档,如果您的 XML 结构发生变化或发生变化,它应该 很多 更易于管理更复杂。
问题是第二个 xml 文件还包含来自 excel 行的第一次迭代的数据,第三个 xml 文件也包含来自第一行和第二行的所有数据
为此工作了几个小时,还是想不通
from lxml import etree
import openpyxl
# Create root element with namespace information
xmlns = "http://xml.datev.de/bedi/tps/ledger/v040"
xsi = "http://www.w3.org/2001/XMLSchema-instance"
schemaLocation = "http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd"
version = "4.0"
generator_info = "DATEV Musterdaten"
generating_system = "DATEV manuell"
xmlRoot = etree.Element(
"{" + xmlns + "}LedgerImport",
version=version,
attrib={"{" + xsi + "}schemaLocation": schemaLocation},
generator_info=generator_info,
generating_system=generating_system,
nsmap={'xsi': xsi, None: xmlns}
)
####open excel file speadsheet
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# build the xml tree
for i in range(2,6):
consolidate = etree.SubElement(xmlRoot, 'consolidate', attrib={'consolidatedAmount': str(sheet.cell(row=i,column=16).value),'consolidatedDate': str(sheet.cell(row=i,column=2).value), 'consolidatedInvoiceId': str(sheet.cell(row=i,column=13).value), 'consolidatedCurrencyCode': str(sheet.cell(row=i,column=12).value) })
accountsPayableLedger = etree.SubElement(consolidate, 'accountsPayableLedger')
account = etree.SubElement(accountsPayableLedger, 'bookingText')
account.text = sheet.cell(row=i,column=21).value
invoice = etree.SubElement(accountsPayableLedger, 'invoiceId')
invoice.text = sheet.cell(row=i,column=13).value
date = etree.SubElement(accountsPayableLedger, 'date')
date.text = sheet.cell(row=i,column=2).value
amount = etree.SubElement(accountsPayableLedger, 'amount')
amount.text = sheet.cell(row=i,column=16).value
account_no = etree.SubElement(accountsPayableLedger, 'accountNo')
account_no.text = sheet.cell(row=i,column=19).value
cost1 = etree.SubElement(accountsPayableLedger, 'costCategoryId')
cost1.text = sheet.cell(row=i,column=15).value
currency_code = etree.SubElement(accountsPayableLedger, 'currencyCode')
currency_code.text = sheet.cell(row=i,column=12).value
party_id = etree.SubElement(accountsPayableLedger, 'partyId')
party_id.text = sheet.cell(row=i,column=20).value
bpaccount = etree.SubElement(accountsPayableLedger, 'bpAccountNo')
bpaccount.text = sheet.cell(row=i,column=20).value
doc = etree.ElementTree(xmlRoot)
doc.write( str(sheet.cell(row=i,column=13).value)+".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
如描述 这对于每一行 excel 和每一行一个 .xml 文件
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://xml.datev.de/bedi/tps/ledger/v040" generating_system="DATEV manuell" generator_info="DATEV Musterdaten" version="4.0" xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="1337.01">
<accountsPayableLedger>
<bookingText>amazon</bookingText>
<invoiceId>1</invoiceId>
</accountsPayableLedger>
</consolidate>
</LedgerImport>
同一个 xmlRoot
对象被多次重复使用。您需要为 for
循环中的每次迭代创建一个新的根元素。
创建根元素的代码可以放在函数中。这是一个简化的例子:
from lxml import etree
def makeroot():
return etree.Element("LedgerImport")
for i in range(2, 6):
xmlRoot = makeroot()
consolidate = etree.SubElement(xmlRoot, 'consolidate',
attrib={'consolidatedAmount': str(i)})
doc = etree.ElementTree(xmlRoot)
doc.write(str(i) + ".xml", xml_declaration=True, encoding='utf-8', pretty_print=True)
或者,考虑 XSLT, the special-purpose declarative langauge designed to transform XML files, which lxml
does support. Specifically, pass parameters 从 Python 到样式表以转换模板 XML(与将参数传递给准备好的 SQL 语句不同):
XML 模板 (包括所有顶级命名空间)
<?xml version='1.0' encoding='UTF-8'?>
<LedgerImport xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://xml.datev.de/bedi/tps/ledger/v040"
generating_system="DATEV manuell"
generator_info="DATEV Musterdaten" version="4.0"
xsi:schemaLocation="http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd">
<consolidate consolidatedAmount="???">
<accountsPayableLedger>
<bookingText>???</bookingText>
<invoiceId>???</invoiceId>
<date>???</date>
<amount>???</amount>
<accountNo>???</accountNo>
<costCategoryId>???</costCategoryId>
<currencyCode>???</currencyCode>
<partyId>???</partyId>
<bpAccountNo>???</bpAccountNo>
</accountsPayableLedger>
</consolidate>
</LedgerImport>
XSLT (另存为 .xsl 文件,由于 XML 中的默认命名空间稍长)
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:doc="http://xml.datev.de/bedi/tps/ledger/v040">
<xsl:output indent="yes"/>
<xsl:strip-space elements="*"/>
<!-- INITIALIZE PARAMETERS -->
<xsl:param name="prm_consolidate" />
<xsl:param name="prm_bookingText" />
<xsl:param name="prm_invoiceId" />
<xsl:param name="prm_date" />
<xsl:param name="prm_amount" />
<xsl:param name="prm_accountNo" />
<xsl:param name="prm_costCategoryId" />
<xsl:param name="prm_currencyCode" />
<xsl:param name="prm_partyId" />
<xsl:param name="prm_bpAccountNo" />
<!-- IDENTITY TRANSFORM -->
<xsl:template match="@*|node()">
<xsl:copy>
<xsl:apply-templates select="@*|node()"/>
</xsl:copy>
</xsl:template>
<!-- REWRITE TITLE TEXT -->
<xsl:template match="doc:accountsPayableLedger">
<xsl:copy>
<xsl:element name="consolidate" namespace="http://xml.datev.de/bedi/tps/ledger/v040">
<xsl:attribute name="consolidatedAmount"><xsl:value-of select="$prm_consolidate"/></xsl:attribute>
</xsl:element>
<xsl:element name="bookingText" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_bookingText"/></xsl:element>
<xsl:element name="invoiceId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_invoiceId"/></xsl:element>
<xsl:element name="date" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_date"/></xsl:element>
<xsl:element name="amount" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_amount"/></xsl:element>
<xsl:element name="accountNo" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_accountNo"/></xsl:element>
<xsl:element name="costCategoryId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_costCategoryId"/></xsl:element>
<xsl:element name="currencyCode" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_currencyCode"/></xsl:element>
<xsl:element name="partyId" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_partyId"/></xsl:element>
<xsl:element name="bpAccountNo" namespace="http://xml.datev.de/bedi/tps/ledger/v040"><xsl:value-of select="$prm_bpAccountNo"/></xsl:element>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
Python(无DOM元素建筑)
import lxml.etree as et
# LOAD XML AND XSL
xml = et.parse('/path/to/Template.xml')
xsl = et.parse('/path/to/XSLTScript.xsl')
### OPEN EXCEL SPREADSHEET
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
# LOOP THROUGH ROWS
for i in range(2, 6):
consolidate = et.XSLT.strparam(sheet.cell(row=i,column=16).value)
account = et.XSLT.strparam(sheet.cell(row=i,column=21).value)
invoice = et.XSLT.strparam(sheet.cell(row=i,column=13).value)
date = et.XSLT.strparam(sheet.cell(row=i,column=2).value)
amount = et.XSLT.strparam(sheet.cell(row=i,column=16).value)
account_no = et.XSLT.strparam(sheet.cell(row=i,column=19).value)
cost1 = et.XSLT.strparam(sheet.cell(row=i,column=15).value)
currency_code = et.XSLT.strparam(sheet.cell(row=i,column=12).value)
party_id = et.XSLT.strparam(sheet.cell(row=i,column=20).value)
bpaccount = et.XSLT.strparam(sheet.cell(row=i,column=20).value)
# PASS PARAMETER TO XSLT
transform = et.XSLT(xsl)
result = transform(xml, prm_consolidate = consolidate,
prm_bookingText=account,
prm_invoiceId = invoice,
prm_date = date,
prm_amount = amount,
prm_account_no = account_no,
prm_costCategoryId = cost1,
prm_currencyCode = currency_code,
prm_partyId = party_id,
prm_bpAccountNo = bpaccount)
# SAVE XML TO FILE
with open('/path/to/Output_Row{}.xml'.format(i), 'wb') as f:
f.write(result)
在@mzjn 指出了你的基本错误之后,这是我为了好玩而制作的东西 - 你可以使用声明性映射创建嵌套 XML,而不是自己费力地调用 etree.SubElement
。
方法如下。假设基本情况是这样的:
from lxml import etree
import openpyxl
ns = {
None: 'http://xml.datev.de/bedi/tps/ledger/v040',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
}
mapping = {
'_tag': '{' + ns[None] + '}LedgerImport',
'attrib': {
'version': '4.0',
'{' + ns['xsi'] + '}schemaLocation': 'http://xml.datev.de/bedi/tps/ledger/v040 Belegverwaltung_online_ledger_import_v040.xsd',
'generator_info': 'DATEV Musterdaten',
'generating_system': 'DATEV manuell',
},
'nsmap': ns,
'_children': [{
'_tag': 'consolidate',
'attrib': {
'consolidatedAmount': lambda: sheet.cell(i, 16).value,
'consolidatedDate': lambda: sheet.cell(i, 2).value,
'consolidatedInvoiceId': lambda: sheet.cell(i, 13).value,
'consolidatedCurrencyCode': lambda: sheet.cell(i, 12).value,
},
'_children': [{
'_tag': 'accountsPayableLedger',
'_children': [
{'_tag': 'bookingText', '_text': lambda: sheet.cell(i, 21).value},
{'_tag': 'invoiceId', '_text': lambda: sheet.cell(i, 13).value},
{'_tag': 'date', '_text': lambda: sheet.cell(i, 2).value},
{'_tag': 'amount', '_text': lambda: sheet.cell(i, 16).value},
{'_tag': 'accountNo', '_text': lambda: sheet.cell(i, 19).value},
{'_tag': 'costCategoryId', '_text': lambda: sheet.cell(i, 15).value},
{'_tag': 'currencyCode', '_text': lambda: sheet.cell(i, 12).value},
{'_tag': 'partyId', '_text': lambda: sheet.cell(i, 20).value},
{'_tag': 'bpAccountNo', '_text': lambda: sheet.cell(i, 20).value},
]
}]
}],
}
嵌套的字典类似于您的最终 XML 文档。它的键也类似于 etree.Element()
和 etree.SubElement()
采用的参数,增加了 _text
和 _children
。
现在我们可以定义一个递归辅助函数,它接受这个输入树并将其转换为具有相同配置的嵌套 XML 树。作为奖励,我们可以执行 lambda
函数,它允许我们动态计算属性值和文本:
def build_tree(template, parent=None):
# prepare a dict for calling etree.Element()/etree.SubElement()
params = {k: v for k, v in template.items() if k not in ['_children', '_text']}
# calculate any dynamic attribute values
for name in params.get('attrib', {}):
value = params['attrib'][name]
params['attrib'][name] = str(value() if callable(value) else value)
if parent is None:
node = etree.Element(**params)
else:
params['_parent'] = parent
node = etree.SubElement(**params)
# calculate (if necessary) and set the node text
if '_text' in template:
if callable(template['_text']):
node.text = str(template['_text']())
else:
node.text = str(template['_text']) if template['_text'] else template['_text']
# recurse into children, if any
for child in template.get('_children', []):
build_tree(child, node)
return node
我们可以循环调用它:
wb = openpyxl.load_workbook('import_spendesk_datev.xlsx')
sheet = wb['Import']
for i in range(2,6):
root = build_tree(mapping)
doc = etree.ElementTree(root)
name = "%s.xml" % sheet.cell(i, 13).value
doc.write(name, xml_declaration=True, encoding='utf-8', pretty_print=True)
这应该会生成一些嵌套良好的 XML 文档,如果您的 XML 结构发生变化或发生变化,它应该 很多 更易于管理更复杂。