在 python 数据框中迭代文件路径
Iterating file paths in python dataframe
我有一个名为 filedataframe 的数据框,其中包含所有文件路径。我的代码用于从单个 xml 文件中提取我想要的内容。但它目前是为单个文件设置的。 我如何使它遍历数据框 filedataframe 以使用文件路径?我想添加 rootId、file_Name、unique_ID 和 employee_badge 有相应的文件路径。
import re
import pathlib
import os
import pandas as pd
import xml.etree.ElementTree as ET
filesdataframe = []
# example path would be Defined Contributions,
xmlfile = (r'INVESTING.cdm')
#WE are parseing it.
tree = ET.parse(xmlfile)
#We then get the root.
root = tree.getroot()
for elm in root.findall('.//{object}IntraModelReport'):
print(elm.text)
for Model in root.findall('.//{object}IntraModelReport'):
rootId = elm.attrib
file_Name = Model.find("{attribute}Code").text
unique_ID = Model.find("{attribute}ObjectID").text
employee_badge = Model.find("{attribute}Creator").text
print(rootId,file_Name, unique_ID, employee_badge)
试试这个。
import re
import pathlib
import os
import pandas as pd
import xml.etree.ElementTree as ET
from typing import Dict, List
def process_single_xmlfile(xmlfile: str, verbose: bool=False) -> Dict:
tree = ET.parse(xmlfile)
root = tree.getroot()
for elm in root.findall('.//{object}IntraModelReport'):
print(elm.text)
package: Dict = {'xmlfile': xmlfile, 'models': []}
for Model in root.findall('.//{object}IntraModelReport'):
rootId = elm.attrib
file_Name = Model.find("{attribute}Code").text
unique_ID = Model.find("{attribute}ObjectID").text
employee_badge = Model.find("{attribute}Creator").text
if verbose:
print(rootId, file_Name, unique_ID, employee_badge)
package['models'].append(dict(
rootId = rootId,
file_Name = file_Name,
unique_ID = unique_ID,
employee_badge = employee_badge,
))
return package
#### LOOP OVER
# all the results will be stored in this list
extracts: List[Dict] = []
# xmlfiles is a list of xml filenames: You need to provide this
# you can replace "xmlfiles" with your "filedataframe".
for xmlfile in xmlfiles:
# set verbose=True to enable printing
extracts.append(process_single_xmlfile(xmlfile, verbose=False))
我有一个名为 filedataframe 的数据框,其中包含所有文件路径。我的代码用于从单个 xml 文件中提取我想要的内容。但它目前是为单个文件设置的。 我如何使它遍历数据框 filedataframe 以使用文件路径?我想添加 rootId、file_Name、unique_ID 和 employee_badge 有相应的文件路径。
import re
import pathlib
import os
import pandas as pd
import xml.etree.ElementTree as ET
filesdataframe = []
# example path would be Defined Contributions,
xmlfile = (r'INVESTING.cdm')
#WE are parseing it.
tree = ET.parse(xmlfile)
#We then get the root.
root = tree.getroot()
for elm in root.findall('.//{object}IntraModelReport'):
print(elm.text)
for Model in root.findall('.//{object}IntraModelReport'):
rootId = elm.attrib
file_Name = Model.find("{attribute}Code").text
unique_ID = Model.find("{attribute}ObjectID").text
employee_badge = Model.find("{attribute}Creator").text
print(rootId,file_Name, unique_ID, employee_badge)
试试这个。
import re
import pathlib
import os
import pandas as pd
import xml.etree.ElementTree as ET
from typing import Dict, List
def process_single_xmlfile(xmlfile: str, verbose: bool=False) -> Dict:
tree = ET.parse(xmlfile)
root = tree.getroot()
for elm in root.findall('.//{object}IntraModelReport'):
print(elm.text)
package: Dict = {'xmlfile': xmlfile, 'models': []}
for Model in root.findall('.//{object}IntraModelReport'):
rootId = elm.attrib
file_Name = Model.find("{attribute}Code").text
unique_ID = Model.find("{attribute}ObjectID").text
employee_badge = Model.find("{attribute}Creator").text
if verbose:
print(rootId, file_Name, unique_ID, employee_badge)
package['models'].append(dict(
rootId = rootId,
file_Name = file_Name,
unique_ID = unique_ID,
employee_badge = employee_badge,
))
return package
#### LOOP OVER
# all the results will be stored in this list
extracts: List[Dict] = []
# xmlfiles is a list of xml filenames: You need to provide this
# you can replace "xmlfiles" with your "filedataframe".
for xmlfile in xmlfiles:
# set verbose=True to enable printing
extracts.append(process_single_xmlfile(xmlfile, verbose=False))