如何获得以下嵌套列表的 Treelib 表示?

How can I obtain a Treelib representation of the below nested list?

我正在尝试将嵌套列表转换为 Treelib 表示形式。

所需的树结构是这样的:列表中不属于 list 类型的所有元素都处于同一层次结构中。嵌套列表中的子列表是紧接其前面的元素(必须是非 list 类型的元素)的子元素。例如,

lst = [1,['a','b','c','d',['s','t',['ab','cd',['a','b'],'ef'],'u'],'f']]

将转换为 Treelib 表示

from treelib import Node, Tree

tree = Tree()
tree.create_node(1, "root") # We can assume that the list will contain a root node

tree.create_node('a', 'a', parent='root')
tree.create_node('b', 'b', parent='root')
tree.create_node('c', 'c', parent='root')
tree.create_node('d', 'd', parent='root')
tree.create_node('s', 's', parent='d')
tree.create_node('t', 't', parent='d')
tree.create_node('ab', 'ab', parent='t')
tree.create_node('cd', 'cd', parent='t')
tree.create_node('a', 'a1', parent='cd')
tree.create_node('b', 'b1', parent='cd')
tree.create_node('ef', 'ef', parent='t')
tree.create_node('u', 'u', parent='d')
tree.create_node('f', 'f', parent='root')

tree.show()

期望的输出:

1
├── a
├── b
├── c
├── d
│   ├── s
│   ├── t
│   │   ├── ab
│   │   ├── cd
│   │   │   ├── a
│   │   │   └── b
│   │   └── ef
│   └── u
└── f

我猜想这需要某种递归逻辑来解析树并识别整个层次结构,但我无法为此提出逻辑。我将如何编写代码来为任意嵌套列表(我已在此处手动编写)生成 Treelib 节点?任何帮助将不胜感激。

编辑:这里的一个复杂问题是树的整个块都可以重复。例如,

lst = [1,['a','b','c','d',['s','t',['ab','cd',['a','b'],'ef'],'u'],'f','t',['ab','cd',['a','b'],'ef']]]

应转换为 Treelib 表示


tree = Tree()
tree.create_node(1, "root") # We can assume that the list will contain a root node

tree.create_node('a', 'a', parent='root')
tree.create_node('b', 'b', parent='root')
tree.create_node('c', 'c', parent='root')
tree.create_node('d', 'd', parent='root')
tree.create_node('s', 's', parent='d')
tree.create_node('t', 't', parent='d')
tree.create_node('ab', 'ab', parent='t')
tree.create_node('cd', 'cd', parent='t')
tree.create_node('a', 'a1', parent='cd')
tree.create_node('b', 'b1', parent='cd')
tree.create_node('ef', 'ef', parent='t')
tree.create_node('u', 'u', parent='d')
tree.create_node('f', 'f', parent='root')
tree.create_node('t', 't1', parent='root')
tree.create_node('ab', 'ab1', parent='t1')
tree.create_node('cd', 'cd1', parent='t1')
tree.create_node('a', 'a2', parent='cd1')
tree.create_node('b', 'b2', parent='cd1')
tree.create_node('ef', 'ef1', parent='t1')


tree.show()

期望的输出:

1
├── a
├── b
├── c
├── d
│   ├── s
│   ├── t
│   │   ├── ab
│   │   ├── cd
│   │   │   ├── a
│   │   │   └── b
│   │   └── ef
│   └── u
├── f
└── t
    ├── ab
    ├── cd
    │   ├── a
    │   └── b
    └── ef

谢谢!

您可以递归遍历您的列表,跟踪父级:

import treelib, itertools as it, collections as ct
lst = [1,['a','b','c','d',['s','t',['ab','cd',['a','b'],'ef'],'u'],'f']] 
tree = treelib.Tree()
c = ct.defaultdict(lambda :it.count(1))
def build_tree(d, t, p = None):
   last_p = None
   for a, b in it.groupby(d, key=lambda x:not isinstance(x, list)):
      if a:
         for i in b:
            t.create_node(i, (last_p:=(i if (n:=next(c[i])) == 1 else f'{i}{n}')), parent=p)
      else:
         for i in b:
            build_tree(i, t, p = last_p)

build_tree(lst, tree)
tree.show()

输出:

1
├── a
├── b
├── c
├── d
│   ├── s
│   ├── t
│   │   ├── ab
│   │   ├── cd
│   │   │   ├── a
│   │   │   └── b
│   │   └── ef
│   └── u
└── f

第二棵树的结果:

lst = [1,['a','b','c','d',['s','t',['ab','cd',['a','b'],'ef'],'u'],'f','t',['ab','cd',['a','b'],'ef']]] 
tree = treelib.Tree()
c = ct.defaultdict(lambda :it.count(1))
build_tree(lst, tree)
tree.show()

输出:

1
├── a
├── b
├── c
├── d
│   ├── s
│   ├── t
│   │   ├── ab
│   │   ├── cd
│   │   │   ├── a
│   │   │   └── b
│   │   └── ef
│   └── u
├── f
└── t
    ├── ab
    ├── cd
    │   ├── a
    │   └── b
    └── ef

build_tree 无赋值表达式:

def build_tree(d, t, p = None):
   last_p = None
   for a, b in it.groupby(d, key=lambda x:not isinstance(x, list)):
      if a:
         for i in b:
            n = next(c[i])
            last_p = i if n == 1 else f'{i}{n}'
            t.create_node(i, last_p, parent=p)
      else:
         for i in b:
            build_tree(i, t, p = last_p)

删除重复的树组件:

from itertools import zip_longest as zl
from contextlib import suppress
def tree_eq(tree, t1, t2):
   if not hasattr(t1, 'tag') or not hasattr(t2, 'tag'):
      return False
   if t1 is None or t2 is None:
      return False
   return t1.tag == t2.tag and t1._identifier < t2._identifier and \
   all(tree_eq(tree, *i) for i in zl(tree.children(t1._identifier), tree.children(t2._identifier)))

def prune_tree(d, tree):
   yield d
   for i in tree.children(getattr(d, 'root', d._identifier)):
       yield from prune_tree(i, tree)

tag_d = ct.defaultdict(list)
for i in prune_tree(tree, tree):
   if hasattr(i, 'tag') and tree.children(getattr(i, 'root', i._identifier)):
      tag_d[i.tag].append(i)

for a, *b in tag_d.values():
   for i in b:
      with suppress(treelib.exceptions.NodeIDAbsentError):
         if tree_eq(tree, a, i):
             tree.remove_node(i._identifier)

tree.show()

输出:

1
├── a
├── b
├── c
├── d
│   ├── s
│   ├── t
│   │   ├── ab
│   │   ├── cd
│   │   │   ├── a
│   │   │   └── b
│   │   └── ef
│   └── u
└── f