从两个 csv 文件创建嵌套字典
create a nested dictionary from two csv files
我有两个 csv 文件
file1.csv:
ID,map1,map2
a,x1,x2
b,y1,
c,z1,z2
file2.csv:
ID,map1Val1,map1Val2,map2Val1
a,a1,a2,l1
b,b1,b2,
c,c1,c2,n1
我希望输出如下所示:
{'ID': {'map1':['map1Val1','map1Val2'], 'map2':'map2Val1'},'a': {'x1':['a1','a2'], 'x2':'l1'},'b': {'y1':['b1','b2']},'c': {'z1':['c1','c2'], 'z2':'n1'},}
我想不出任何方法来创建它。到目前为止,我只有一个代码可以从一个 csv 文件创建字典:
import csv
new_data_dict = {}
with open("file1.csv", 'r') as map_file:
mapping = csv.DictReader(map_file, delimiter=",")
for row in mapping:
new_data_dict= {row[0]:{row[1],row[2]}}
print new_data_dict
输出:
{"ID":{map1,map2}, "a":{x1,x2}, "b":{y1}, "a":{z1,z2}}
您可以使用 zip
聚合来自两个 csv 文件的行:
>>> list(zip([1,2,3], [4,5,6])) # assume 1, 2, 3 / 4, 5, 6 as row values
[(1, 4), (2, 5), (3, 6)]
import csv
new_data_dict = {}
with open('file1.csv') as f1, open('file2.csv') as f2:
reader1, reader2 = csv.reader(f1), csv.reader(f2)
for row1, row2 in zip(reader1, reader2):
id_, map1, map2 = row1
new_data_dict[id_] = {map1: row2[1:3]}
map2 = map2.strip()
if map2: # put map2 only if map2 key exists
new_data_dict[id_][map2] = row2[3]
new_data_dict
变为:
{'ID': {'map1': ['map1Val1', 'map1Val2'], 'map2': 'map2Val2'},
'a': {'x1': ['a1', 'a2'], 'x2': 'l1'},
'b': {'y1': ['b1', 'b2']},
'c': {'z1': ['c1', 'c2'], 'z2 ': 'n1'}}
这是一个更动态的解决方案,允许您 pre-configure file1
中的哪些列映射到 file2
中的哪些列:
import csv
= {'map1': ['map1Val1', 'map1Val2'],
'map2': ['map2Val1']
}
joined_data = dict()
joined_data['ID'] = column_map
with open("file1.txt") as f1, open("file2.txt") as f2:
key_list = list(csv.DictReader(f1))
value_list = list(csv.DictReader(f2))
for kl, vl in zip(key_list, value_list):
inner = {}
for key, value_list in column_map.items():
if kl[key]:
inner[kl[key]] = [vl[el] for el in value_list]
joined_data[kl['ID']] = inner
csv.DictReader
的使用让我们可以将每一行的数据映射到 dict
,其键(默认情况下)由文件的第一行给出。两个 DictReader
objects 被转换为列表并使用 zip
进行迭代。使用 column_map
作为我们的指南,我们创建了一个新的 inner
字典,将来自 key_list
的键与来自 value_list
的值相关联。
编辑
对于 fully-dynamic 解决方案,您可以通过比较 file1
中的列 headers 与 file2
[= 中的列来即时创建 column_map
25=]
import csv
from collections import defaultdict
joined_data = dict()
column_map = defaultdict(list)
with open("file1.txt") as f1, open("file2.txt") as f2:
kh = next(f1).strip()
vh = next(f2).strip()
key_headers = kh.split(',')
value_headers = vh.split(',')
[column_map[k].append(v) for k in key_headers[1:] for v in value_headers[1:] if v.startswith(k)]
joined_data['ID'] = dict(column_map)
key_list = list(csv.DictReader(f1, fieldnames=key_headers))
value_list = list(csv.DictReader(f2, fieldnames=value_headers))
for kl, vl in zip(key_list, value_list):
inner = {}
for key, value_list in column_map.items():
if kl[key]:
inner[kl[key]] = [vl[el] for el in value_list]
joined_data[kl['ID']] = inner
我有两个 csv 文件
file1.csv:
ID,map1,map2
a,x1,x2
b,y1,
c,z1,z2
file2.csv:
ID,map1Val1,map1Val2,map2Val1
a,a1,a2,l1
b,b1,b2,
c,c1,c2,n1
我希望输出如下所示:
{'ID': {'map1':['map1Val1','map1Val2'], 'map2':'map2Val1'},'a': {'x1':['a1','a2'], 'x2':'l1'},'b': {'y1':['b1','b2']},'c': {'z1':['c1','c2'], 'z2':'n1'},}
我想不出任何方法来创建它。到目前为止,我只有一个代码可以从一个 csv 文件创建字典:
import csv
new_data_dict = {}
with open("file1.csv", 'r') as map_file:
mapping = csv.DictReader(map_file, delimiter=",")
for row in mapping:
new_data_dict= {row[0]:{row[1],row[2]}}
print new_data_dict
输出:
{"ID":{map1,map2}, "a":{x1,x2}, "b":{y1}, "a":{z1,z2}}
您可以使用 zip
聚合来自两个 csv 文件的行:
>>> list(zip([1,2,3], [4,5,6])) # assume 1, 2, 3 / 4, 5, 6 as row values
[(1, 4), (2, 5), (3, 6)]
import csv
new_data_dict = {}
with open('file1.csv') as f1, open('file2.csv') as f2:
reader1, reader2 = csv.reader(f1), csv.reader(f2)
for row1, row2 in zip(reader1, reader2):
id_, map1, map2 = row1
new_data_dict[id_] = {map1: row2[1:3]}
map2 = map2.strip()
if map2: # put map2 only if map2 key exists
new_data_dict[id_][map2] = row2[3]
new_data_dict
变为:
{'ID': {'map1': ['map1Val1', 'map1Val2'], 'map2': 'map2Val2'},
'a': {'x1': ['a1', 'a2'], 'x2': 'l1'},
'b': {'y1': ['b1', 'b2']},
'c': {'z1': ['c1', 'c2'], 'z2 ': 'n1'}}
这是一个更动态的解决方案,允许您 pre-configure file1
中的哪些列映射到 file2
中的哪些列:
import csv
= {'map1': ['map1Val1', 'map1Val2'],
'map2': ['map2Val1']
}
joined_data = dict()
joined_data['ID'] = column_map
with open("file1.txt") as f1, open("file2.txt") as f2:
key_list = list(csv.DictReader(f1))
value_list = list(csv.DictReader(f2))
for kl, vl in zip(key_list, value_list):
inner = {}
for key, value_list in column_map.items():
if kl[key]:
inner[kl[key]] = [vl[el] for el in value_list]
joined_data[kl['ID']] = inner
csv.DictReader
的使用让我们可以将每一行的数据映射到 dict
,其键(默认情况下)由文件的第一行给出。两个 DictReader
objects 被转换为列表并使用 zip
进行迭代。使用 column_map
作为我们的指南,我们创建了一个新的 inner
字典,将来自 key_list
的键与来自 value_list
的值相关联。
编辑
对于 fully-dynamic 解决方案,您可以通过比较 file1
中的列 headers 与 file2
[= 中的列来即时创建 column_map
25=]
import csv
from collections import defaultdict
joined_data = dict()
column_map = defaultdict(list)
with open("file1.txt") as f1, open("file2.txt") as f2:
kh = next(f1).strip()
vh = next(f2).strip()
key_headers = kh.split(',')
value_headers = vh.split(',')
[column_map[k].append(v) for k in key_headers[1:] for v in value_headers[1:] if v.startswith(k)]
joined_data['ID'] = dict(column_map)
key_list = list(csv.DictReader(f1, fieldnames=key_headers))
value_list = list(csv.DictReader(f2, fieldnames=value_headers))
for kl, vl in zip(key_list, value_list):
inner = {}
for key, value_list in column_map.items():
if kl[key]:
inner[kl[key]] = [vl[el] for el in value_list]
joined_data[kl['ID']] = inner