在 Node.js 中构建多分支树时出现性能问题

Performance issue when building a multi branch tree in Node.js

我正在将 objects 的平面层次结构转换为基于 parent 节点 ID 的嵌套 objects。

问题是当我输入更复杂的结构(更多更深 children)时,这个过程需要很长时间才能完成。

可能与内存或其他递归或冗余的低效使用有关?我不确定。

代码:

const people = [
  {
    id: '738a8f8a',
    parentNode: null
  },
  {
    id: 'd18fd69c',
    parentNode: '738a8f8a'
  },
  {
    id: 'b507c11d',
    parentNode: '738a8f8a'
  },
  {
    id: '171d4709',
    parentNode: 'b507c11d'
  },
  {
    id: '471b1cee',
    parentNode: 'b507c11d'
  }
];

function getBase(base) {
  for (const person of base) {
    if (person['parentNode'] === null) {
      return person;
    }
  }
  return null;
}

function getChildren(parent) {
  const values = people.filter((person) => {
    return person['parentNode'] === parent['id'];
  });
  return Object.values(values);
}

function buildHierarchy(base = null) {
  if (base === null) {
    base = getBase(people);
    if (base === null) {
      return null;
    }
  }
  const children = getChildren(base).map((child) => {
    return buildHierarchy(child);
  });
  base['childrenNodes'] = children;
  return base;
}

console.log(buildHierarchy());

和上面console.log的输出:

  {
    id: '738a8f8a',
    parentNode: null
    childrenNodes: [
      {
        id: 'd18fd69c',
        parentNode: '738a8f8a',
        childrenNodes: [],
      },
      {
        id: 'b507c11d',
        parentNode: '738a8f8a',
        childrenNodes: [
          {
            id: '171d4709',
            parentNode: 'b507c11d',
            childrenNodes: [],
          },
          {
            id: '471b1cee',
            parentNode: 'b507c11d',
            childrenNodes: [],
          },
        ],
      },
    ],
  };

我认为这里的主要瓶颈是算法。 getChildren 遍历整个 people 数组,并为每个节点调用它。随着 people 中元素数量的增加,此成本会增加。自从我上次进行算法分析以来已经有很长时间了,但我会做出有根据的猜测,即当前实现的 time complexity 是 O(n^2).

我会使用 Map 来解决这个问题。我会遍历整个数组一次或两次以构建一个 Map 的 node->children[],这样我们就可以在递归时进行快速 O(1) 查找。这将有助于降低时间复杂度,但作为权衡需要更多内存,因为所有元素都在内存中存储了不止一次。

这是一个例子:

function buildNodeChildLookup(people) {
  const nodeIdToNode = people.reduce((map, curr) => {
    map.set(curr.id, curr);
    return map;
  }, new Map());

  return people.reduce((map, curr) => {
    const children = map.get(curr.parentNode) || [];
    const childNode = nodeIdToNode.get(curr.id);
    children.push(childNode);
    map.set(curr.parentNode, children);
    return map;
  }, new Map());
}

// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);

console.log(nodeIdToChildren)
// Map {
//   null => [ { id: '738a8f8a', parentNode: null } ],
//   '738a8f8a' => [
//     { id: 'd18fd69c', parentNode: '738a8f8a' },
//     { id: 'b507c11d', parentNode: '738a8f8a' }
//   ],
//   'b507c11d' => [
//     { id: '171d4709', parentNode: 'b507c11d' },
//     { id: '471b1cee', parentNode: 'b507c11d' }
//   ]
// }

现在我们可以快速查找“节点 ID”->“子节点”,我们可以从根(父节点为 null 的单个节点)开始递归:

function buildHierarchy(node, nodeToChildren) {
  const children = nodeToChildren.get(node.id) || [];
  return {
    ...node,
    childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
  };
}

如您所见,buildHierarchy 函数现在更轻量,因为它只是快速查找节点的子节点。

综合起来:

const people = [
  {
    id: "738a8f8a",
    parentNode: null,
  },
  {
    id: "d18fd69c",
    parentNode: "738a8f8a",
  },
  {
    id: "b507c11d",
    parentNode: "738a8f8a",
  },
  {
    id: "171d4709",
    parentNode: "b507c11d",
  },
  {
    id: "471b1cee",
    parentNode: "b507c11d",
  },
];

function buildNodeChildLookup(people) {
  const nodeIdToNode = people.reduce((map, curr) => {
    map.set(curr.id, curr);
    return map;
  }, new Map());

  return people.reduce((map, curr) => {
    const children = map.get(curr.parentNode) || [];
    const childNode = nodeIdToNode.get(curr.id);
    children.push(childNode);
    map.set(curr.parentNode, children);
    return map;
  }, new Map());
}

function buildHierarchy(node, nodeToChildren) {
  const children = nodeToChildren.get(node.id) || [];
  return {
    ...node,
    childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
  };
}

// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);
const root = nodeIdToChildren.get(null)[0];
console.log(buildHierarchy(root, nodeIdToChildren));

如果这没有帮助,我建议使用 Chrome 的开发人员工具分析您的应用程序。他们有很好的工具来查找哪些函数需要时间,或者哪些函数耗尽内存。

While writing this answer, I saw @cbr's, and thought it was the same logic. But not entirely, and there seems to be a sensible performance difference (in Chrome at least), so I'll still post this one

我无法用需要很长时间处理的真实数据对此进行测试,但我认为你的瓶颈是在 getChildren 函数中使用 filter。对于每个人,您要遍历整个 people 数组。

我认为在构建层次结构之前只对数据进行一次预处理可以减少时间。为此,我们可以创建一个 Map,其中每个键都是一个人的 ID,值是其子项的数组。

可以这样实现:

// For each person
const childMap = people.reduce((map, person) => {
  // If its parentNode is not already in the map
  if (!map.has(person.parentNode)) {
    // Add it
    map.set(person.parentNode, []);
  }
  // Then, push the current person into that parent ID's children Array
  map.get(person.parentNode).push(person);
  return map;
}, new Map());

那么,您的 getChildren 函数将如下所示:

function getChildren(parent) {
  return childMap.get(parent.id) || [];
}

这是完整的例子,运行 连续 100.000 次:

const people = [
  {
    id: '738a8f8a',
    parentNode: null
  },
  {
    id: 'd18fd69c',
    parentNode: '738a8f8a'
  },
  {
    id: 'b507c11d',
    parentNode: '738a8f8a'
  },
  {
    id: '171d4709',
    parentNode: 'b507c11d'
  },
  {
    id: '471b1cee',
    parentNode: 'b507c11d'
  }
];

const childMap = people.reduce((map, person) => {
  if (!map.has(person.parentNode)) {
    map.set(person.parentNode, []);
  }
  map.get(person.parentNode).push(person);
  return map;
}, new Map());

function getBase(base) {
  for (const person of base) {
    if (person.parentNode === null) {
      return person;
    }
  }
  return null;
}

function getChildren(parent) {
  return childMap.get(parent.id) || [];
}

function buildHierarchy(base = null) {
  if (base === null) {
    base = getBase(people);
    if (base === null) {
      return null;
    }
  }
  const children = getChildren(base);
  base.childrenNodes = children.map(buildHierarchy);
  return base;
}

console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy();
console.timeEnd('x');

您的代码,连续 运行 100.000 次:

const people = [
  {
    id: '738a8f8a',
    parentNode: null
  },
  {
    id: 'd18fd69c',
    parentNode: '738a8f8a'
  },
  {
    id: 'b507c11d',
    parentNode: '738a8f8a'
  },
  {
    id: '171d4709',
    parentNode: 'b507c11d'
  },
  {
    id: '471b1cee',
    parentNode: 'b507c11d'
  }
];

function getBase(base) {
  for (const person of base) {
    if (person['parentNode'] === null) {
      return person;
    }
  }
  return null;
}

function getChildren(parent) {
  const values = people.filter((person) => {
    return person['parentNode'] === parent['id'];
  });
  return Object.values(values);
}

function buildHierarchy(base = null) {
  if (base === null) {
    base = getBase(people);
    if (base === null) {
      return null;
    }
  }
  const children = getChildren(base).map((child) => {
    return buildHierarchy(child);
  });
  base['childrenNodes'] = children;
  return base;
}

console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy();
console.timeEnd('x');

@cbr 的代码,运行 连续 100.000 次:

const people = [
  {
    id: "738a8f8a",
    parentNode: null,
  },
  {
    id: "d18fd69c",
    parentNode: "738a8f8a",
  },
  {
    id: "b507c11d",
    parentNode: "738a8f8a",
  },
  {
    id: "171d4709",
    parentNode: "b507c11d",
  },
  {
    id: "471b1cee",
    parentNode: "b507c11d",
  },
];

function buildNodeChildLookup(people) {
  const nodeIdToNode = people.reduce((map, curr) => {
    map.set(curr.id, curr);
    return map;
  }, new Map());

  return people.reduce((map, curr) => {
    const children = map.get(curr.parentNode) || [];
    const childNode = nodeIdToNode.get(curr.id);
    children.push(childNode);
    map.set(curr.parentNode, children);
    return map;
  }, new Map());
}

function buildHierarchy(node, nodeToChildren) {
  const children = nodeToChildren.get(node.id) || [];
  return {
    ...node,
    childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
  };
}

// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);
const root = nodeIdToChildren.get(null)[0];

console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy(root, nodeIdToChildren);
console.timeEnd('x');