在 Node.js 中构建多分支树时出现性能问题
Performance issue when building a multi branch tree in Node.js
我正在将 objects 的平面层次结构转换为基于 parent 节点 ID 的嵌套 objects。
问题是当我输入更复杂的结构(更多更深 children)时,这个过程需要很长时间才能完成。
可能与内存或其他递归或冗余的低效使用有关?我不确定。
代码:
const people = [
{
id: '738a8f8a',
parentNode: null
},
{
id: 'd18fd69c',
parentNode: '738a8f8a'
},
{
id: 'b507c11d',
parentNode: '738a8f8a'
},
{
id: '171d4709',
parentNode: 'b507c11d'
},
{
id: '471b1cee',
parentNode: 'b507c11d'
}
];
function getBase(base) {
for (const person of base) {
if (person['parentNode'] === null) {
return person;
}
}
return null;
}
function getChildren(parent) {
const values = people.filter((person) => {
return person['parentNode'] === parent['id'];
});
return Object.values(values);
}
function buildHierarchy(base = null) {
if (base === null) {
base = getBase(people);
if (base === null) {
return null;
}
}
const children = getChildren(base).map((child) => {
return buildHierarchy(child);
});
base['childrenNodes'] = children;
return base;
}
console.log(buildHierarchy());
和上面console.log的输出:
{
id: '738a8f8a',
parentNode: null
childrenNodes: [
{
id: 'd18fd69c',
parentNode: '738a8f8a',
childrenNodes: [],
},
{
id: 'b507c11d',
parentNode: '738a8f8a',
childrenNodes: [
{
id: '171d4709',
parentNode: 'b507c11d',
childrenNodes: [],
},
{
id: '471b1cee',
parentNode: 'b507c11d',
childrenNodes: [],
},
],
},
],
};
我认为这里的主要瓶颈是算法。 getChildren
遍历整个 people
数组,并为每个节点调用它。随着 people
中元素数量的增加,此成本会增加。自从我上次进行算法分析以来已经有很长时间了,但我会做出有根据的猜测,即当前实现的 time complexity 是 O(n^2).
我会使用 Map
来解决这个问题。我会遍历整个数组一次或两次以构建一个 Map
的 node->children[],这样我们就可以在递归时进行快速 O(1) 查找。这将有助于降低时间复杂度,但作为权衡需要更多内存,因为所有元素都在内存中存储了不止一次。
这是一个例子:
function buildNodeChildLookup(people) {
const nodeIdToNode = people.reduce((map, curr) => {
map.set(curr.id, curr);
return map;
}, new Map());
return people.reduce((map, curr) => {
const children = map.get(curr.parentNode) || [];
const childNode = nodeIdToNode.get(curr.id);
children.push(childNode);
map.set(curr.parentNode, children);
return map;
}, new Map());
}
// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);
console.log(nodeIdToChildren)
// Map {
// null => [ { id: '738a8f8a', parentNode: null } ],
// '738a8f8a' => [
// { id: 'd18fd69c', parentNode: '738a8f8a' },
// { id: 'b507c11d', parentNode: '738a8f8a' }
// ],
// 'b507c11d' => [
// { id: '171d4709', parentNode: 'b507c11d' },
// { id: '471b1cee', parentNode: 'b507c11d' }
// ]
// }
现在我们可以快速查找“节点 ID”->“子节点”,我们可以从根(父节点为 null
的单个节点)开始递归:
function buildHierarchy(node, nodeToChildren) {
const children = nodeToChildren.get(node.id) || [];
return {
...node,
childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
};
}
如您所见,buildHierarchy
函数现在更轻量,因为它只是快速查找节点的子节点。
综合起来:
const people = [
{
id: "738a8f8a",
parentNode: null,
},
{
id: "d18fd69c",
parentNode: "738a8f8a",
},
{
id: "b507c11d",
parentNode: "738a8f8a",
},
{
id: "171d4709",
parentNode: "b507c11d",
},
{
id: "471b1cee",
parentNode: "b507c11d",
},
];
function buildNodeChildLookup(people) {
const nodeIdToNode = people.reduce((map, curr) => {
map.set(curr.id, curr);
return map;
}, new Map());
return people.reduce((map, curr) => {
const children = map.get(curr.parentNode) || [];
const childNode = nodeIdToNode.get(curr.id);
children.push(childNode);
map.set(curr.parentNode, children);
return map;
}, new Map());
}
function buildHierarchy(node, nodeToChildren) {
const children = nodeToChildren.get(node.id) || [];
return {
...node,
childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
};
}
// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);
const root = nodeIdToChildren.get(null)[0];
console.log(buildHierarchy(root, nodeIdToChildren));
如果这没有帮助,我建议使用 Chrome 的开发人员工具分析您的应用程序。他们有很好的工具来查找哪些函数需要时间,或者哪些函数耗尽内存。
While writing this answer, I saw @cbr's, and thought it was the same logic. But not entirely, and there seems to be a sensible performance difference (in Chrome at least), so I'll still post this one
我无法用需要很长时间处理的真实数据对此进行测试,但我认为你的瓶颈是在 getChildren
函数中使用 filter
。对于每个人,您要遍历整个 people
数组。
我认为在构建层次结构之前只对数据进行一次预处理可以减少时间。为此,我们可以创建一个 Map,其中每个键都是一个人的 ID,值是其子项的数组。
可以这样实现:
// For each person
const childMap = people.reduce((map, person) => {
// If its parentNode is not already in the map
if (!map.has(person.parentNode)) {
// Add it
map.set(person.parentNode, []);
}
// Then, push the current person into that parent ID's children Array
map.get(person.parentNode).push(person);
return map;
}, new Map());
那么,您的 getChildren
函数将如下所示:
function getChildren(parent) {
return childMap.get(parent.id) || [];
}
这是完整的例子,运行 连续 100.000 次:
const people = [
{
id: '738a8f8a',
parentNode: null
},
{
id: 'd18fd69c',
parentNode: '738a8f8a'
},
{
id: 'b507c11d',
parentNode: '738a8f8a'
},
{
id: '171d4709',
parentNode: 'b507c11d'
},
{
id: '471b1cee',
parentNode: 'b507c11d'
}
];
const childMap = people.reduce((map, person) => {
if (!map.has(person.parentNode)) {
map.set(person.parentNode, []);
}
map.get(person.parentNode).push(person);
return map;
}, new Map());
function getBase(base) {
for (const person of base) {
if (person.parentNode === null) {
return person;
}
}
return null;
}
function getChildren(parent) {
return childMap.get(parent.id) || [];
}
function buildHierarchy(base = null) {
if (base === null) {
base = getBase(people);
if (base === null) {
return null;
}
}
const children = getChildren(base);
base.childrenNodes = children.map(buildHierarchy);
return base;
}
console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy();
console.timeEnd('x');
您的代码,连续 运行 100.000 次:
const people = [
{
id: '738a8f8a',
parentNode: null
},
{
id: 'd18fd69c',
parentNode: '738a8f8a'
},
{
id: 'b507c11d',
parentNode: '738a8f8a'
},
{
id: '171d4709',
parentNode: 'b507c11d'
},
{
id: '471b1cee',
parentNode: 'b507c11d'
}
];
function getBase(base) {
for (const person of base) {
if (person['parentNode'] === null) {
return person;
}
}
return null;
}
function getChildren(parent) {
const values = people.filter((person) => {
return person['parentNode'] === parent['id'];
});
return Object.values(values);
}
function buildHierarchy(base = null) {
if (base === null) {
base = getBase(people);
if (base === null) {
return null;
}
}
const children = getChildren(base).map((child) => {
return buildHierarchy(child);
});
base['childrenNodes'] = children;
return base;
}
console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy();
console.timeEnd('x');
@cbr 的代码,运行 连续 100.000 次:
const people = [
{
id: "738a8f8a",
parentNode: null,
},
{
id: "d18fd69c",
parentNode: "738a8f8a",
},
{
id: "b507c11d",
parentNode: "738a8f8a",
},
{
id: "171d4709",
parentNode: "b507c11d",
},
{
id: "471b1cee",
parentNode: "b507c11d",
},
];
function buildNodeChildLookup(people) {
const nodeIdToNode = people.reduce((map, curr) => {
map.set(curr.id, curr);
return map;
}, new Map());
return people.reduce((map, curr) => {
const children = map.get(curr.parentNode) || [];
const childNode = nodeIdToNode.get(curr.id);
children.push(childNode);
map.set(curr.parentNode, children);
return map;
}, new Map());
}
function buildHierarchy(node, nodeToChildren) {
const children = nodeToChildren.get(node.id) || [];
return {
...node,
childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
};
}
// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);
const root = nodeIdToChildren.get(null)[0];
console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy(root, nodeIdToChildren);
console.timeEnd('x');
我正在将 objects 的平面层次结构转换为基于 parent 节点 ID 的嵌套 objects。
问题是当我输入更复杂的结构(更多更深 children)时,这个过程需要很长时间才能完成。
可能与内存或其他递归或冗余的低效使用有关?我不确定。
代码:
const people = [
{
id: '738a8f8a',
parentNode: null
},
{
id: 'd18fd69c',
parentNode: '738a8f8a'
},
{
id: 'b507c11d',
parentNode: '738a8f8a'
},
{
id: '171d4709',
parentNode: 'b507c11d'
},
{
id: '471b1cee',
parentNode: 'b507c11d'
}
];
function getBase(base) {
for (const person of base) {
if (person['parentNode'] === null) {
return person;
}
}
return null;
}
function getChildren(parent) {
const values = people.filter((person) => {
return person['parentNode'] === parent['id'];
});
return Object.values(values);
}
function buildHierarchy(base = null) {
if (base === null) {
base = getBase(people);
if (base === null) {
return null;
}
}
const children = getChildren(base).map((child) => {
return buildHierarchy(child);
});
base['childrenNodes'] = children;
return base;
}
console.log(buildHierarchy());
和上面console.log的输出:
{
id: '738a8f8a',
parentNode: null
childrenNodes: [
{
id: 'd18fd69c',
parentNode: '738a8f8a',
childrenNodes: [],
},
{
id: 'b507c11d',
parentNode: '738a8f8a',
childrenNodes: [
{
id: '171d4709',
parentNode: 'b507c11d',
childrenNodes: [],
},
{
id: '471b1cee',
parentNode: 'b507c11d',
childrenNodes: [],
},
],
},
],
};
我认为这里的主要瓶颈是算法。 getChildren
遍历整个 people
数组,并为每个节点调用它。随着 people
中元素数量的增加,此成本会增加。自从我上次进行算法分析以来已经有很长时间了,但我会做出有根据的猜测,即当前实现的 time complexity 是 O(n^2).
我会使用 Map
来解决这个问题。我会遍历整个数组一次或两次以构建一个 Map
的 node->children[],这样我们就可以在递归时进行快速 O(1) 查找。这将有助于降低时间复杂度,但作为权衡需要更多内存,因为所有元素都在内存中存储了不止一次。
这是一个例子:
function buildNodeChildLookup(people) {
const nodeIdToNode = people.reduce((map, curr) => {
map.set(curr.id, curr);
return map;
}, new Map());
return people.reduce((map, curr) => {
const children = map.get(curr.parentNode) || [];
const childNode = nodeIdToNode.get(curr.id);
children.push(childNode);
map.set(curr.parentNode, children);
return map;
}, new Map());
}
// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);
console.log(nodeIdToChildren)
// Map {
// null => [ { id: '738a8f8a', parentNode: null } ],
// '738a8f8a' => [
// { id: 'd18fd69c', parentNode: '738a8f8a' },
// { id: 'b507c11d', parentNode: '738a8f8a' }
// ],
// 'b507c11d' => [
// { id: '171d4709', parentNode: 'b507c11d' },
// { id: '471b1cee', parentNode: 'b507c11d' }
// ]
// }
现在我们可以快速查找“节点 ID”->“子节点”,我们可以从根(父节点为 null
的单个节点)开始递归:
function buildHierarchy(node, nodeToChildren) {
const children = nodeToChildren.get(node.id) || [];
return {
...node,
childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
};
}
如您所见,buildHierarchy
函数现在更轻量,因为它只是快速查找节点的子节点。
综合起来:
const people = [
{
id: "738a8f8a",
parentNode: null,
},
{
id: "d18fd69c",
parentNode: "738a8f8a",
},
{
id: "b507c11d",
parentNode: "738a8f8a",
},
{
id: "171d4709",
parentNode: "b507c11d",
},
{
id: "471b1cee",
parentNode: "b507c11d",
},
];
function buildNodeChildLookup(people) {
const nodeIdToNode = people.reduce((map, curr) => {
map.set(curr.id, curr);
return map;
}, new Map());
return people.reduce((map, curr) => {
const children = map.get(curr.parentNode) || [];
const childNode = nodeIdToNode.get(curr.id);
children.push(childNode);
map.set(curr.parentNode, children);
return map;
}, new Map());
}
function buildHierarchy(node, nodeToChildren) {
const children = nodeToChildren.get(node.id) || [];
return {
...node,
childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
};
}
// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);
const root = nodeIdToChildren.get(null)[0];
console.log(buildHierarchy(root, nodeIdToChildren));
如果这没有帮助,我建议使用 Chrome 的开发人员工具分析您的应用程序。他们有很好的工具来查找哪些函数需要时间,或者哪些函数耗尽内存。
While writing this answer, I saw @cbr's, and thought it was the same logic. But not entirely, and there seems to be a sensible performance difference (in Chrome at least), so I'll still post this one
我无法用需要很长时间处理的真实数据对此进行测试,但我认为你的瓶颈是在 getChildren
函数中使用 filter
。对于每个人,您要遍历整个 people
数组。
我认为在构建层次结构之前只对数据进行一次预处理可以减少时间。为此,我们可以创建一个 Map,其中每个键都是一个人的 ID,值是其子项的数组。
可以这样实现:
// For each person
const childMap = people.reduce((map, person) => {
// If its parentNode is not already in the map
if (!map.has(person.parentNode)) {
// Add it
map.set(person.parentNode, []);
}
// Then, push the current person into that parent ID's children Array
map.get(person.parentNode).push(person);
return map;
}, new Map());
那么,您的 getChildren
函数将如下所示:
function getChildren(parent) {
return childMap.get(parent.id) || [];
}
这是完整的例子,运行 连续 100.000 次:
const people = [
{
id: '738a8f8a',
parentNode: null
},
{
id: 'd18fd69c',
parentNode: '738a8f8a'
},
{
id: 'b507c11d',
parentNode: '738a8f8a'
},
{
id: '171d4709',
parentNode: 'b507c11d'
},
{
id: '471b1cee',
parentNode: 'b507c11d'
}
];
const childMap = people.reduce((map, person) => {
if (!map.has(person.parentNode)) {
map.set(person.parentNode, []);
}
map.get(person.parentNode).push(person);
return map;
}, new Map());
function getBase(base) {
for (const person of base) {
if (person.parentNode === null) {
return person;
}
}
return null;
}
function getChildren(parent) {
return childMap.get(parent.id) || [];
}
function buildHierarchy(base = null) {
if (base === null) {
base = getBase(people);
if (base === null) {
return null;
}
}
const children = getChildren(base);
base.childrenNodes = children.map(buildHierarchy);
return base;
}
console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy();
console.timeEnd('x');
您的代码,连续 运行 100.000 次:
const people = [
{
id: '738a8f8a',
parentNode: null
},
{
id: 'd18fd69c',
parentNode: '738a8f8a'
},
{
id: 'b507c11d',
parentNode: '738a8f8a'
},
{
id: '171d4709',
parentNode: 'b507c11d'
},
{
id: '471b1cee',
parentNode: 'b507c11d'
}
];
function getBase(base) {
for (const person of base) {
if (person['parentNode'] === null) {
return person;
}
}
return null;
}
function getChildren(parent) {
const values = people.filter((person) => {
return person['parentNode'] === parent['id'];
});
return Object.values(values);
}
function buildHierarchy(base = null) {
if (base === null) {
base = getBase(people);
if (base === null) {
return null;
}
}
const children = getChildren(base).map((child) => {
return buildHierarchy(child);
});
base['childrenNodes'] = children;
return base;
}
console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy();
console.timeEnd('x');
@cbr 的代码,运行 连续 100.000 次:
const people = [
{
id: "738a8f8a",
parentNode: null,
},
{
id: "d18fd69c",
parentNode: "738a8f8a",
},
{
id: "b507c11d",
parentNode: "738a8f8a",
},
{
id: "171d4709",
parentNode: "b507c11d",
},
{
id: "471b1cee",
parentNode: "b507c11d",
},
];
function buildNodeChildLookup(people) {
const nodeIdToNode = people.reduce((map, curr) => {
map.set(curr.id, curr);
return map;
}, new Map());
return people.reduce((map, curr) => {
const children = map.get(curr.parentNode) || [];
const childNode = nodeIdToNode.get(curr.id);
children.push(childNode);
map.set(curr.parentNode, children);
return map;
}, new Map());
}
function buildHierarchy(node, nodeToChildren) {
const children = nodeToChildren.get(node.id) || [];
return {
...node,
childNodes: children.map((child) => buildHierarchy(child, nodeToChildren)),
};
}
// assume only one root!
const nodeIdToChildren = buildNodeChildLookup(people);
const root = nodeIdToChildren.get(null)[0];
console.time('x');
for (let i = 0; i < 100000; i++) buildHierarchy(root, nodeIdToChildren);
console.timeEnd('x');