在 Python 中查找与某个字符串相关的所有元组

Find all tuples related to a certain string in Python

我正在尝试查找与字符串相关的所有元组,而不仅仅是与其匹配。 这是我做的:

from itertools import chain

data = [('A','B'),('B','C'),('B','D'),('B','F'),('F','W'),('W','H'),('G','Z')]
init = 'A'

filtered_init = [item for item in data if item[0] == init or item[1] == init]
elements = list(dict.fromkeys([ i for i in chain(*filtered_init)]))
elements.remove(init)

dat = []
for i in elements:
    sync = [item for item in data if item[0] == i or item[1] == i]
    dat.append(sync)

print(dat)

结果是:

[('A', 'B'), ('B', 'C'), ('B', 'D'), ('B', 'F')]

但是,它只包含A-B相关的级别。 我要查找的是与 init 字符串相关的所有元组,如下图所示:

换句话说,[('A','B'),('B','C'),('B','D'),('B','F'),('F','W'),('W','H')] 就是找到所有可达init的边。 我怎样才能得到它们?

您的问题是找到 connected component of init in an undirected graph defined by an edge list data structure

这个数据结构对于这个问题用起来不是很方便,所以第一步是将其转化为adjacency list. From there, we can apply any standard graph traversal algorithm, such as depth first search。完成后,我们可以将结果转换回您想要的输出边缘列表格式。

from collections import defaultdict

def find_connected_component(edge_list, start):
    # convert to adjacency list
    edges = defaultdict(list)
    for a, b in edge_list:
        edges[a].append(b)
        edges[b].append(a)

    # depth-first search
    stack = [start]
    seen = set()

    while stack:
        node = stack.pop()
        if node not in seen:
            seen.add(node)
            stack.extend(edges[node])

    # convert back to edge list
    return [ edge for edge in edge_list if edge[0] in seen ]

用法:

>>> find_connected_component(data, init)
[('A', 'B'), ('B', 'C'), ('B', 'D'), ('B', 'F'), ('F', 'W'), ('W', 'H')]

一个非常幼稚的解决方案,对于复杂的树可能效率不高。

data = [('A', 'B'), ('B', 'C'), ('B', 'D'), ('B', 'F'),
        ('F', 'W'), ('W', 'H'), ('G', 'Z')]
init = ['A']
result = []
while init:
    initNEW = init.copy()
    init = []
    new = 0
    for edge in data:
        for vertex in initNEW:
            if edge[0] == vertex:
                result.append(edge)
                init.append(edge[1])
                new += 1
    for i in range(len(result) - new, len(result)):
        data.remove(result[i])
print(result)
# [('A', 'B'), ('B', 'C'), ('B', 'D'), ('B', 'F'), ('F', 'W'), ('W', 'H')]

为了更高效,您可以使用 DSU。此解决方案有效 O(N)

from functools import reduce
import random

parent = dict()
init = 'A'
data = [('A','B'),('B','C'),('B','D'),('B','F'),('F','W'),('W','H'),('G','Z')]

def make_set(v):
    parent[v] = v

def find_set(v):
    if v == parent[v]:
        return v
    parent[v] = find_set(parent[v])
    return parent[v]

def union_sets(a, b):
    a, b = map(find_set, [a, b])
    if a != b:
        if random.randint(0, 1):
            a, b = b, a
        parent[b] = a;

elements = set(reduce(lambda x, y: x+y, data))

for v in elements:
    parent[v] = v

for u, v in data:
    union_sets(u, v)

init_set = find_set(init)
edges_in_answer = [e for e in data if find_set(e[0]) == init_set]
print(edges_in_answer)

输出:[('A', 'B'), ('B', 'C'), ('B', 'D'), ('B', 'F'), ('F', 'W'), ('W', 'H')]