从不同目录中的多个文件中读取特定数据 python

Read specific data from multiple files in different directories python

如果我有很多这样的文件:

[

每个文件夹里面还有3个这样的:

[

现在每个文件夹中都有一个 .txt 文件,如下所示:

[

对于每个 .txt 文件,我需要从文件中用红色圈出的第 6 列获取值,我只对包含 cope1、cope2、cope3、cope4 和 cope5 的行感兴趣在开头(以蓝色突出显示)。其他一切都可以忽略。

我需要为每个文件夹单独呈现数据

所以会是:

[

我需要从每个文件中读取相关数据并将其存储在合理的数据结构中。

我需要像这样呈现所有数据,以便我得到 FFA cope 1 - cope 5 的平均值 对于 10 个文件夹中的每个文件夹中的 3 个文件夹,依此类推。

大致是这样呈现的:

对于冗长的问题表示歉意python 这里是新手!非常感谢所有帮助。

import os
import csv
import statistics

def main():
    values = {}
    ffaResults = {}
    lingualResults = {}
    ppaResults = {}

    dir = os.path.join("fmriroi", "roi_data")
    subdirs = os.listdir(dir)
    for subdir in subdirs:
        subdirpath = os.path.join(dir, subdir)
        subsubdirs = os.listdir(subdirpath)
        for subsubdir in subsubdirs:

            if subsubdir == "ffa":
                dirpath = os.path.join(subdirpath, subsubdir)
                files = os.listdir(dirpath)
                for filename in files:
                    path = os.path.join(dirpath, filename)
                    with open(path, "r") as f:
                        content = csv.reader(f, delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in values:
                                    ffaResults[name] = [float(row[6])]
                                else:
                                    ffaResults[name].append(float(row[6]))  

            if subsubdir == "lingual_gyrus":
                dirpath = os.path.join(subdirpath, subsubdir)
                files = os.listdir(dirpath)
                for filename in files:
                    path = os.path.join(dirpath, filename)
                    with open(path, "r") as f:
                        content = csv.reader(f, delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in lingualResults:
                                    lingualResults[name] = [float(row[6])]
                                else:
                                    lingualResults[name].append(float(row[6]))

            if subsubdir == "ppa":
                dirpath = os.path.join(subdirpath, subsubdir)
                files = os.listdir(dirpath)
                for filename in files:
                    path = os.path.join(dirpath, filename)
                    with open(path, "r") as f:
                        content = csv.reader(f, delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in ppaResults:
                                    ppaResults[name] = [float(row[6])]
                                else:
                                    ppaResults[name].append(float(row[6]))
    res = {}
    
    for k in ffaResults:
        res[k] = statistics.mean(values[k])
    print(res)

if __name__ == "__main__":
    main()

输出:需要

我同意评论中@Silveris 的建议。您可以将我的代码用于单个文件:

import re

datas = []

with open('file.txt', 'r', encoding='utf-8') as f:
    lines = f.readlines()
    i = 1
    data = {}
    for line in lines:
        cope = [item for item in line.split() if re.match(rf'cope{i}', item)]
        if cope != []:
            data[''.join(cope)] = line.split()[5]
            i += 1

datas.append(data)

这是我添加到您的代码中的一些代码:

import os
import csv
import pprint
import statistics

def main():
    values = {}
    ffaResults = {}
    lingualResults = {}
    ppaResults = {}

    dir = os.path.join("fmriroi", "roi_data")
    subdirs = os.listdir(dir)
    for subdir in subdirs:
        subdirpath = os.path.join(dir, subdir)
        subsubdirs = os.listdir(subdirpath)
        for subsubdir in subsubdirs:

            if subsubdir == "ffa":
                dirpath = os.path.join(subdirpath, subsubdir)
                files = os.listdir(dirpath)
                for filename in files:
                    path = os.path.join(dirpath, filename)
                    with open(path, "r") as f:
                        content = csv.reader(f, delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in values:
                                    ffaResults[name] = [float(row[6])]
                                else:
                                    ffaResults[name].append(float(row[6]))  

            if subsubdir == "lingual_gyrus":
                dirpath = os.path.join(subdirpath, subsubdir)
                files = os.listdir(dirpath)
                for filename in files:
                    path = os.path.join(dirpath, filename)
                    with open(path, "r") as f:
                        content = csv.reader(f, delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in lingualResults:
                                    lingualResults[name] = [float(row[6])]
                                else:
                                    lingualResults[name].append(float(row[6]))

            if subsubdir == "ppa":
                dirpath = os.path.join(subdirpath, subsubdir)
                files = os.listdir(dirpath)
                for filename in files:
                    path = os.path.join(dirpath, filename)
                    with open(path, "r") as f:
                        content = csv.reader(f, delimiter=" ")
                        for row in content:
                            if "cope" in row[1]:
                                name = row[1].split("/")[1]
                                if not name in ppaResults:
                                    ppaResults[name] = [float(row[6])]
                                else:
                                    ppaResults[name].append(float(row[6]))
    res = {"ffa": {}, "lingual": {}, "ppa": {}}
    for k in ffaResults:
        res["ffa"][k] = statistics.mean(ffaResults[k])
    for k in lingualResults:
        res["lingual"][k] = statistics.mean(lingualResults[k])
    for k in ppaResults:
        res["ppa"][k] = statistics.mean(ppaResults[k])

    pprint.pprint(res)

if __name__ == "__main__":
    main()

它给出以下输出:

{'ffa': {'cope1': 0.4376,
         'cope2': 0.3582,
         'cope3': 0.6315,
         'cope4': 0.1722,
         'cope5': 0.3518},
 'lingual': {'cope1': -0.08865060000000001,
             'cope2': -0.150985,
             'cope3': -0.162005,
             'cope4': -0.130845,
             'cope5': -0.126411},
 'ppa': {'cope1': 0.74836,
         'cope2': 0.9444,
         'cope3': 0.300482,
         'cope4': 1.12435,
         'cope5': 0.8332200000000001}}