Pythonfold/reduce多个词典的组合
Python fold/reduce composition of multiple dictionaries
我想实现以下目标。它本质上是任意数量的字典的组合或合并,参考 'seed' 或根字典,在最终结果中累积所有未更改和更新的值。
seed = {
'update': False,
'data': {
'subdata': {
'field1': 5,
'field2': '2018-01-30 00:00:00'
},
'field3': 2,
'field4': None
},
'data_updates': {},
'subdata_updates': {},
'diffs': {}
}
update_1 = {
'update': True,
'data': {
'subdata': {
'field1': 6,
'field2': '2018-01-30 00:00:00'
},
'field3': 2,
'field4': None
},
'data_updates': {},
'subdata_updates': {'field1': 6},
'diffs': {
'field1': {
'field': 'field1',
'before': 5,
'after': 6
}
}
}
update_2 = {
'update': True,
'data': {
'subdata': {
'field1': 5,
'field2': '2018-01-30 00:00:00',
},
'field3': 2,
'field4': 1
},
'data_updates': {'field4': 1},
'subdata_updates': {},
'diffs': {
'field4': {
'field': 'field4',
'before': None,
'after': 1
}
}
}
# I want to be able to pass in an arbitrary number of updates.
assert reduce_maps(seed, *[update_1, update_2]) == {
'update': True,
'data': {
'subdata': {
'field1': 6,
'field2': '2018-01-30 00:00:00',
},
'field3': 2,
'field4': 1
},
'data_updates': {'field4': 1},
'subdata_updates': {'field1': 6},
'diffs': {
'field1': {
'field': 'field1',
'before': 5,
'after': 6
},
'field4': {
'field': 'field4',
'before': None,
'after': 1
}
}
}
您可以假设数据将始终采用这种形状,您还可以假设每个有效载荷只更新一个字段,并且没有两次更新会更新同一个字段。
我可以隐约感觉到 fold 的类似物潜伏在此处的背景中,在 seed
周围构建数据。
给你:
from pprint import pprint
def merge_working(pre, post):
if not (isinstance(pre, dict) and isinstance(post, dict)):
return post
new = pre.copy() # values for unique keys of pre will be preserved
for key, post_value in post.items():
new[key] = merge_working(new.get(key), post_value)
return new
def merge_simplest(pre, post):
if not isinstance(pre, dict):
return post
return {key: merge_simplest(pre[key], post[key])
for key in pre}
merge = merge_working
def reduce_maps(*objects):
new = objects[0]
for post in objects[1:]:
new = merge(new, post)
return new
seed = {
'update': False,
'data': {
'subdata': {
'field1': 5,
'field2': '2018-01-30 00:00:00'
},
'field3': 2,
'field4': None
},
'data_updates': {},
'subdata_updates': {},
'diffs': {}
}
update_1 = {
'update': True,
'data': {
'subdata': {
'field1': 6,
'field2': '2018-01-30 00:00:00'
},
'field3': 2,
'field4': None
},
'data_updates': {},
'subdata_updates': {'field1': 6},
'diffs': {
'field1': {
'field': 'field 1',
'before': 5,
'after': 6
}
}
}
update_2 = {
'update': True,
'data': {
'subdata': {
'field1': 5,
'field2': '2018-01-30 00:00:00',
},
'field3': 2,
'field4': 1
},
'data_updates': {'field4': 1},
'subdata_updates': {}, # was subdata_update
'diffs': {
'field4': {
'field': 'field 4',
'before': None,
'after': 1
}
}
}
result = reduce_maps(*[seed, update_1, update_2])
golden = {
'update': True,
'data': {
'subdata': {
'field1': 5, # was 6
'field2': '2018-01-30 00:00:00',
},
'field3': 2,
'field4': 1
},
'data_updates': {'field4': 1},
'subdata_updates': {'field1': 6}, # was subdata_update
'diffs': {
'field1': {
'field': 'field 1',
'before': 5,
'after': 6
},
'field4': {
'field': 'field 4',
'before': None,
'after': 1
}
}
}
pprint(result)
pprint(golden)
assert result == golden
我已经修复了您数据中我认为的错别字(请参阅代码中的注释)。
请注意,merge
可能需要根据确切的合并规则和可能的数据进行调整。要明白我的意思,请使用 merge = merge_simplest
并理解它失败的原因。如果 "data-agnostic" 形状(理解为不考虑叶子值的字典树)真的相同,则不会。
import copy
from functools import partial, reduce
def traverse(seed, update, sentinel):
for key, value in update.items():
if isinstance(value, dict):
try:
traverse(seed[key], update[key], sentinel)
except KeyError:
seed[key] = value
else:
if key not in seed or value != seed[key] \
and key not in sentinel:
seed[key] = value
sentinel.add(key)
return seed
def reduce_maps(seed, *updates):
seed = copy.deepcopy(seed)
return reduce(
partial(traverse, sentinel=set()), [seed, *updates]
)
我想实现以下目标。它本质上是任意数量的字典的组合或合并,参考 'seed' 或根字典,在最终结果中累积所有未更改和更新的值。
seed = {
'update': False,
'data': {
'subdata': {
'field1': 5,
'field2': '2018-01-30 00:00:00'
},
'field3': 2,
'field4': None
},
'data_updates': {},
'subdata_updates': {},
'diffs': {}
}
update_1 = {
'update': True,
'data': {
'subdata': {
'field1': 6,
'field2': '2018-01-30 00:00:00'
},
'field3': 2,
'field4': None
},
'data_updates': {},
'subdata_updates': {'field1': 6},
'diffs': {
'field1': {
'field': 'field1',
'before': 5,
'after': 6
}
}
}
update_2 = {
'update': True,
'data': {
'subdata': {
'field1': 5,
'field2': '2018-01-30 00:00:00',
},
'field3': 2,
'field4': 1
},
'data_updates': {'field4': 1},
'subdata_updates': {},
'diffs': {
'field4': {
'field': 'field4',
'before': None,
'after': 1
}
}
}
# I want to be able to pass in an arbitrary number of updates.
assert reduce_maps(seed, *[update_1, update_2]) == {
'update': True,
'data': {
'subdata': {
'field1': 6,
'field2': '2018-01-30 00:00:00',
},
'field3': 2,
'field4': 1
},
'data_updates': {'field4': 1},
'subdata_updates': {'field1': 6},
'diffs': {
'field1': {
'field': 'field1',
'before': 5,
'after': 6
},
'field4': {
'field': 'field4',
'before': None,
'after': 1
}
}
}
您可以假设数据将始终采用这种形状,您还可以假设每个有效载荷只更新一个字段,并且没有两次更新会更新同一个字段。
我可以隐约感觉到 fold 的类似物潜伏在此处的背景中,在 seed
周围构建数据。
给你:
from pprint import pprint
def merge_working(pre, post):
if not (isinstance(pre, dict) and isinstance(post, dict)):
return post
new = pre.copy() # values for unique keys of pre will be preserved
for key, post_value in post.items():
new[key] = merge_working(new.get(key), post_value)
return new
def merge_simplest(pre, post):
if not isinstance(pre, dict):
return post
return {key: merge_simplest(pre[key], post[key])
for key in pre}
merge = merge_working
def reduce_maps(*objects):
new = objects[0]
for post in objects[1:]:
new = merge(new, post)
return new
seed = {
'update': False,
'data': {
'subdata': {
'field1': 5,
'field2': '2018-01-30 00:00:00'
},
'field3': 2,
'field4': None
},
'data_updates': {},
'subdata_updates': {},
'diffs': {}
}
update_1 = {
'update': True,
'data': {
'subdata': {
'field1': 6,
'field2': '2018-01-30 00:00:00'
},
'field3': 2,
'field4': None
},
'data_updates': {},
'subdata_updates': {'field1': 6},
'diffs': {
'field1': {
'field': 'field 1',
'before': 5,
'after': 6
}
}
}
update_2 = {
'update': True,
'data': {
'subdata': {
'field1': 5,
'field2': '2018-01-30 00:00:00',
},
'field3': 2,
'field4': 1
},
'data_updates': {'field4': 1},
'subdata_updates': {}, # was subdata_update
'diffs': {
'field4': {
'field': 'field 4',
'before': None,
'after': 1
}
}
}
result = reduce_maps(*[seed, update_1, update_2])
golden = {
'update': True,
'data': {
'subdata': {
'field1': 5, # was 6
'field2': '2018-01-30 00:00:00',
},
'field3': 2,
'field4': 1
},
'data_updates': {'field4': 1},
'subdata_updates': {'field1': 6}, # was subdata_update
'diffs': {
'field1': {
'field': 'field 1',
'before': 5,
'after': 6
},
'field4': {
'field': 'field 4',
'before': None,
'after': 1
}
}
}
pprint(result)
pprint(golden)
assert result == golden
我已经修复了您数据中我认为的错别字(请参阅代码中的注释)。
请注意,merge
可能需要根据确切的合并规则和可能的数据进行调整。要明白我的意思,请使用 merge = merge_simplest
并理解它失败的原因。如果 "data-agnostic" 形状(理解为不考虑叶子值的字典树)真的相同,则不会。
import copy
from functools import partial, reduce
def traverse(seed, update, sentinel):
for key, value in update.items():
if isinstance(value, dict):
try:
traverse(seed[key], update[key], sentinel)
except KeyError:
seed[key] = value
else:
if key not in seed or value != seed[key] \
and key not in sentinel:
seed[key] = value
sentinel.add(key)
return seed
def reduce_maps(seed, *updates):
seed = copy.deepcopy(seed)
return reduce(
partial(traverse, sentinel=set()), [seed, *updates]
)