从不同目录中的多个文件中读取特定数据 python
Read specific data from multiple files in different directories python
如果我有很多这样的文件:
[
每个文件夹里面还有3个这样的:
[
现在每个文件夹中都有一个 .txt 文件,如下所示:
[
对于每个 .txt 文件,我需要从文件中用红色圈出的第 6 列获取值,我只对包含 cope1、cope2、cope3、cope4 和 cope5 的行感兴趣在开头(以蓝色突出显示)。其他一切都可以忽略。
我需要为每个文件夹单独呈现数据
所以会是:
[
我需要从每个文件中读取相关数据并将其存储在合理的数据结构中。
我需要像这样呈现所有数据,以便我得到 FFA cope 1 - cope 5 的平均值
对于 10 个文件夹中的每个文件夹中的 3 个文件夹,依此类推。
大致是这样呈现的:
对于冗长的问题表示歉意python 这里是新手!非常感谢所有帮助。
import os
import csv
import statistics
def main():
values = {}
ffaResults = {}
lingualResults = {}
ppaResults = {}
dir = os.path.join("fmriroi", "roi_data")
subdirs = os.listdir(dir)
for subdir in subdirs:
subdirpath = os.path.join(dir, subdir)
subsubdirs = os.listdir(subdirpath)
for subsubdir in subsubdirs:
if subsubdir == "ffa":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in values:
ffaResults[name] = [float(row[6])]
else:
ffaResults[name].append(float(row[6]))
if subsubdir == "lingual_gyrus":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in lingualResults:
lingualResults[name] = [float(row[6])]
else:
lingualResults[name].append(float(row[6]))
if subsubdir == "ppa":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in ppaResults:
ppaResults[name] = [float(row[6])]
else:
ppaResults[name].append(float(row[6]))
res = {}
for k in ffaResults:
res[k] = statistics.mean(values[k])
print(res)
if __name__ == "__main__":
main()
输出:需要
我同意评论中@Silveris 的建议。您可以将我的代码用于单个文件:
import re
datas = []
with open('file.txt', 'r', encoding='utf-8') as f:
lines = f.readlines()
i = 1
data = {}
for line in lines:
cope = [item for item in line.split() if re.match(rf'cope{i}', item)]
if cope != []:
data[''.join(cope)] = line.split()[5]
i += 1
datas.append(data)
这是我添加到您的代码中的一些代码:
import os
import csv
import pprint
import statistics
def main():
values = {}
ffaResults = {}
lingualResults = {}
ppaResults = {}
dir = os.path.join("fmriroi", "roi_data")
subdirs = os.listdir(dir)
for subdir in subdirs:
subdirpath = os.path.join(dir, subdir)
subsubdirs = os.listdir(subdirpath)
for subsubdir in subsubdirs:
if subsubdir == "ffa":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in values:
ffaResults[name] = [float(row[6])]
else:
ffaResults[name].append(float(row[6]))
if subsubdir == "lingual_gyrus":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in lingualResults:
lingualResults[name] = [float(row[6])]
else:
lingualResults[name].append(float(row[6]))
if subsubdir == "ppa":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in ppaResults:
ppaResults[name] = [float(row[6])]
else:
ppaResults[name].append(float(row[6]))
res = {"ffa": {}, "lingual": {}, "ppa": {}}
for k in ffaResults:
res["ffa"][k] = statistics.mean(ffaResults[k])
for k in lingualResults:
res["lingual"][k] = statistics.mean(lingualResults[k])
for k in ppaResults:
res["ppa"][k] = statistics.mean(ppaResults[k])
pprint.pprint(res)
if __name__ == "__main__":
main()
它给出以下输出:
{'ffa': {'cope1': 0.4376,
'cope2': 0.3582,
'cope3': 0.6315,
'cope4': 0.1722,
'cope5': 0.3518},
'lingual': {'cope1': -0.08865060000000001,
'cope2': -0.150985,
'cope3': -0.162005,
'cope4': -0.130845,
'cope5': -0.126411},
'ppa': {'cope1': 0.74836,
'cope2': 0.9444,
'cope3': 0.300482,
'cope4': 1.12435,
'cope5': 0.8332200000000001}}
如果我有很多这样的文件:
[
每个文件夹里面还有3个这样的:
[
现在每个文件夹中都有一个 .txt 文件,如下所示:
[
对于每个 .txt 文件,我需要从文件中用红色圈出的第 6 列获取值,我只对包含 cope1、cope2、cope3、cope4 和 cope5 的行感兴趣在开头(以蓝色突出显示)。其他一切都可以忽略。
我需要为每个文件夹单独呈现数据
所以会是:
[
我需要从每个文件中读取相关数据并将其存储在合理的数据结构中。
我需要像这样呈现所有数据,以便我得到 FFA cope 1 - cope 5 的平均值 对于 10 个文件夹中的每个文件夹中的 3 个文件夹,依此类推。
大致是这样呈现的:
对于冗长的问题表示歉意python 这里是新手!非常感谢所有帮助。
import os
import csv
import statistics
def main():
values = {}
ffaResults = {}
lingualResults = {}
ppaResults = {}
dir = os.path.join("fmriroi", "roi_data")
subdirs = os.listdir(dir)
for subdir in subdirs:
subdirpath = os.path.join(dir, subdir)
subsubdirs = os.listdir(subdirpath)
for subsubdir in subsubdirs:
if subsubdir == "ffa":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in values:
ffaResults[name] = [float(row[6])]
else:
ffaResults[name].append(float(row[6]))
if subsubdir == "lingual_gyrus":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in lingualResults:
lingualResults[name] = [float(row[6])]
else:
lingualResults[name].append(float(row[6]))
if subsubdir == "ppa":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in ppaResults:
ppaResults[name] = [float(row[6])]
else:
ppaResults[name].append(float(row[6]))
res = {}
for k in ffaResults:
res[k] = statistics.mean(values[k])
print(res)
if __name__ == "__main__":
main()
输出:需要
我同意评论中@Silveris 的建议。您可以将我的代码用于单个文件:
import re
datas = []
with open('file.txt', 'r', encoding='utf-8') as f:
lines = f.readlines()
i = 1
data = {}
for line in lines:
cope = [item for item in line.split() if re.match(rf'cope{i}', item)]
if cope != []:
data[''.join(cope)] = line.split()[5]
i += 1
datas.append(data)
这是我添加到您的代码中的一些代码:
import os
import csv
import pprint
import statistics
def main():
values = {}
ffaResults = {}
lingualResults = {}
ppaResults = {}
dir = os.path.join("fmriroi", "roi_data")
subdirs = os.listdir(dir)
for subdir in subdirs:
subdirpath = os.path.join(dir, subdir)
subsubdirs = os.listdir(subdirpath)
for subsubdir in subsubdirs:
if subsubdir == "ffa":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in values:
ffaResults[name] = [float(row[6])]
else:
ffaResults[name].append(float(row[6]))
if subsubdir == "lingual_gyrus":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in lingualResults:
lingualResults[name] = [float(row[6])]
else:
lingualResults[name].append(float(row[6]))
if subsubdir == "ppa":
dirpath = os.path.join(subdirpath, subsubdir)
files = os.listdir(dirpath)
for filename in files:
path = os.path.join(dirpath, filename)
with open(path, "r") as f:
content = csv.reader(f, delimiter=" ")
for row in content:
if "cope" in row[1]:
name = row[1].split("/")[1]
if not name in ppaResults:
ppaResults[name] = [float(row[6])]
else:
ppaResults[name].append(float(row[6]))
res = {"ffa": {}, "lingual": {}, "ppa": {}}
for k in ffaResults:
res["ffa"][k] = statistics.mean(ffaResults[k])
for k in lingualResults:
res["lingual"][k] = statistics.mean(lingualResults[k])
for k in ppaResults:
res["ppa"][k] = statistics.mean(ppaResults[k])
pprint.pprint(res)
if __name__ == "__main__":
main()
它给出以下输出:
{'ffa': {'cope1': 0.4376,
'cope2': 0.3582,
'cope3': 0.6315,
'cope4': 0.1722,
'cope5': 0.3518},
'lingual': {'cope1': -0.08865060000000001,
'cope2': -0.150985,
'cope3': -0.162005,
'cope4': -0.130845,
'cope5': -0.126411},
'ppa': {'cope1': 0.74836,
'cope2': 0.9444,
'cope3': 0.300482,
'cope4': 1.12435,
'cope5': 0.8332200000000001}}