在 python 数据框中迭代文件路径

Question

我有一个名为 filedataframe 的数据框，其中包含所有文件路径。我的代码用于从单个 xml 文件中提取我想要的内容。但它目前是为单个文件设置的。 我如何使它遍历数据框 filedataframe 以使用文件路径？我想添加 rootId、file_Name、unique_ID 和 employee_badge 有相应的文件路径。

import re
import pathlib
import os  
import pandas as pd
import xml.etree.ElementTree as ET

filesdataframe = []
# example path would be Defined Contributions,



xmlfile = (r'INVESTING.cdm')
    #WE are parseing it.
tree = ET.parse(xmlfile)
    #We then get the root.
root = tree.getroot()

for elm in root.findall('.//{object}IntraModelReport'):
        print(elm.text)


for Model in root.findall('.//{object}IntraModelReport'):
        rootId = elm.attrib
        file_Name = Model.find("{attribute}Code").text
        unique_ID = Model.find("{attribute}ObjectID").text
        employee_badge = Model.find("{attribute}Creator").text
        print(rootId,file_Name, unique_ID, employee_badge)

Answer 1

试试这个。

import re
import pathlib
import os  
import pandas as pd
import xml.etree.ElementTree as ET
from typing import Dict, List

def process_single_xmlfile(xmlfile: str, verbose: bool=False) -> Dict:
    tree = ET.parse(xmlfile)
    root = tree.getroot()

    for elm in root.findall('.//{object}IntraModelReport'):
        print(elm.text)

    package: Dict = {'xmlfile': xmlfile, 'models': []}
    for Model in root.findall('.//{object}IntraModelReport'):
        rootId = elm.attrib
        file_Name = Model.find("{attribute}Code").text
        unique_ID = Model.find("{attribute}ObjectID").text
        employee_badge = Model.find("{attribute}Creator").text
        if verbose:
            print(rootId, file_Name, unique_ID, employee_badge)
        package['models'].append(dict(
            rootId = rootId,
            file_Name = file_Name, 
            unique_ID = unique_ID,
            employee_badge = employee_badge,
        ))
    return package

#### LOOP OVER
        
# all the results will be stored in this list        
extracts: List[Dict] = []
# xmlfiles is a list of xml filenames: You need to provide this
# you can replace "xmlfiles" with your "filedataframe".
for xmlfile in xmlfiles:
    # set verbose=True to enable printing
    extracts.append(process_single_xmlfile(xmlfile, verbose=False))

在 python 数据框中迭代文件路径

Iterating file paths in python dataframe

python

iteration

loops

dataframe

pandas