使用 web worker 的并行排序比串行排序(合并排序)慢
Parallel sort using web workers is slower than serial sort (merge sort)
我正在尝试使用 web-workers 制作合并排序算法的并行版本。
并行版本在小数组上非常慢(我认为是由于 promises 和 web-workers 的开销)但令人惊讶的是它在大数组上也比串行排序慢
平行版
console.log(navigator.hardwareConcurrency + " threads")
var blob = new Blob([
`onmessage = function(e) {
function merge_sort(arr) {
let length = arr.length;
if (length < 2) {
return arr
}
let middle = Math.floor(length/2)
let left = arr.slice(0, middle)
let right = arr.slice(middle)
return merge(merge_sort(left), merge_sort(right))
}
function merge(left, right) {
let result = [];
let i=0;
let j=0;
while(i < left.length && j < right.length) {
if(left[i] < right[j]) {
result.push(left[i]) ;
i++;
} else {
result.push(right[j]);
j++;
}
}
return result.concat(left.slice(i)).concat(right.slice(j))
}
if(e.data.job==='sort'){
postMessage(merge_sort(e.data.arr));
}else{
postMessage(merge(e.data.arr[0],e.data.arr[1]))
}
}`
]);
var blobURL = window.URL.createObjectURL(blob);
var v1 = new Worker(blobURL);
var v2 = new Worker(blobURL);
var v3 = new Worker(blobURL);
var v4 = new Worker(blobURL);
function merge(left, right) {
let result = [];
let i = 0;
let j = 0;
while (i < left.length && j < right.length) {
if (left[i] < right[j]) {
result.push(left[i]);
i++;
} else {
result.push(right[j]);
j++;
}
}
return result.concat(left.slice(i)).concat(right.slice(j))
}
var arr = Array.from({
length: 20000000
}, () => Math.floor(Math.random() * 50000000));
var half1 = []
var half2 = []
var half_1 = []
var half_2 = []
var half_3 = []
var half_4 = []
let middle = Math.floor(arr.length / 2)
half1 = arr.slice(0, middle)
half2 = arr.slice(middle)
let middlehalf1 = Math.floor(half1.length / 2)
half_1 = half1.slice(0, middlehalf1)
half_2 = half1.slice(middlehalf1)
let middlehalf2 = Math.floor(half2.length / 2)
half_3 = half2.slice(0, middlehalf2)
half_4 = half2.slice(middlehalf2)
var t0 = performance.now();
var p1 = new Promise((resolve, reject) => {
v1.postMessage({
job: 'sort',
arr: half_1
});
v1.addEventListener('message', event => resolve(event.data));
v1.addEventListener('error', reject);
})
var p2 = new Promise((resolve, reject) => {
v2.postMessage({
job: 'sort',
arr: half_2
});
v2.addEventListener('message', event => resolve(event.data));
v2.addEventListener('error', reject);
})
var p3 = new Promise((resolve, reject) => {
v3.postMessage({
job: 'sort',
arr: half_3
});
v3.addEventListener('message', event => resolve(event.data));
v3.addEventListener('error', reject);
})
var p4 = new Promise((resolve, reject) => {
v4.postMessage({
job: 'sort',
arr: half_4
});
v4.addEventListener('message', event => resolve(event.data));
v4.addEventListener('error', reject);
})
Promise.all([p1, p2, p3, p4]).then(function(results) {
//console.log( )
var p5 = new Promise((resolve, reject) => {
v1.addEventListener('message', event => resolve(event.data));
v1.addEventListener('error', reject);
})
var p6 = new Promise((resolve, reject) => {
v2.addEventListener('message', event => resolve(event.data));
v2.addEventListener('error', reject);
})
v1.postMessage({
job: 'merge',
arr: [results[0], results[1]]
});
v2.postMessage({
job: 'merge',
arr: [results[2], results[3]]
});
Promise.all([p5, p6]).then(function(arrays) {
merge(arrays[0], arrays[1])
var t1 = performance.now();
console.log(`merge_sort took ${(t1 - t0) / 1000} seconds`)
})
});
连载版
function merge_sort(arr) {
let length = arr.length;
if (length < 2) {
return arr
}
let middle = Math.floor(length/2)
let left = arr.slice(0, middle)
let right = arr.slice(middle)
return merge(merge_sort(left), merge_sort(right))
}
function merge(left, right) {
let result = [];
let i=0;
let j=0;
while(i < left.length && j < right.length) {
if(left[i] < right[j]) {
result.push(left[i]) ;
i++;
} else {
result.push(right[j]);
j++;
}
}
return result.concat(left.slice(i)).concat(right.slice(j))
}
var BigArray =Array.from({ length: 20000000 }, ()=>Math.floor(Math.random() * 50000000));
var t0 = performance.now();
merge_sort(BigArray)
var t1 = performance.now();
console.log(`merge_sort took ${(t1 - t0) / 1000} seconds`)
我也在寻找一个更好的并行排序解决方案(我对 promises 的使用看起来很糟糕)
考虑到您在线程之间复制数据,并且至少复制一次不必要的数据(通过使用concat
).
如果你真的要排序数字,你可以使用数组的type array types和转移所有权之一来回而不是复制他们的内容。那会让你减少复制的数量。如果您的起点是单个数组,则唯一的复制是复制到各个工人的各个类型化数组,然后从这些各个数组复制回主数组。
您甚至可以更进一步,具体取决于您需要使用 shared memory 支持的平台,这样数组只有一个副本,并且每个工作人员(最初)只是在自己的部分工作它的。共享内存在 Chrome 平台上启用,其站点隔离功能可以防止 Spectre 式漏洞。我最后检查它在 Firefox 中被禁用,并且在其他浏览器中被禁用或未实现。
但是,如果您排序的不是真正的数字,那么它们都不会起作用,因为它们与类型化数组相关联。
我正在尝试使用 web-workers 制作合并排序算法的并行版本。
并行版本在小数组上非常慢(我认为是由于 promises 和 web-workers 的开销)但令人惊讶的是它在大数组上也比串行排序慢
平行版
console.log(navigator.hardwareConcurrency + " threads")
var blob = new Blob([
`onmessage = function(e) {
function merge_sort(arr) {
let length = arr.length;
if (length < 2) {
return arr
}
let middle = Math.floor(length/2)
let left = arr.slice(0, middle)
let right = arr.slice(middle)
return merge(merge_sort(left), merge_sort(right))
}
function merge(left, right) {
let result = [];
let i=0;
let j=0;
while(i < left.length && j < right.length) {
if(left[i] < right[j]) {
result.push(left[i]) ;
i++;
} else {
result.push(right[j]);
j++;
}
}
return result.concat(left.slice(i)).concat(right.slice(j))
}
if(e.data.job==='sort'){
postMessage(merge_sort(e.data.arr));
}else{
postMessage(merge(e.data.arr[0],e.data.arr[1]))
}
}`
]);
var blobURL = window.URL.createObjectURL(blob);
var v1 = new Worker(blobURL);
var v2 = new Worker(blobURL);
var v3 = new Worker(blobURL);
var v4 = new Worker(blobURL);
function merge(left, right) {
let result = [];
let i = 0;
let j = 0;
while (i < left.length && j < right.length) {
if (left[i] < right[j]) {
result.push(left[i]);
i++;
} else {
result.push(right[j]);
j++;
}
}
return result.concat(left.slice(i)).concat(right.slice(j))
}
var arr = Array.from({
length: 20000000
}, () => Math.floor(Math.random() * 50000000));
var half1 = []
var half2 = []
var half_1 = []
var half_2 = []
var half_3 = []
var half_4 = []
let middle = Math.floor(arr.length / 2)
half1 = arr.slice(0, middle)
half2 = arr.slice(middle)
let middlehalf1 = Math.floor(half1.length / 2)
half_1 = half1.slice(0, middlehalf1)
half_2 = half1.slice(middlehalf1)
let middlehalf2 = Math.floor(half2.length / 2)
half_3 = half2.slice(0, middlehalf2)
half_4 = half2.slice(middlehalf2)
var t0 = performance.now();
var p1 = new Promise((resolve, reject) => {
v1.postMessage({
job: 'sort',
arr: half_1
});
v1.addEventListener('message', event => resolve(event.data));
v1.addEventListener('error', reject);
})
var p2 = new Promise((resolve, reject) => {
v2.postMessage({
job: 'sort',
arr: half_2
});
v2.addEventListener('message', event => resolve(event.data));
v2.addEventListener('error', reject);
})
var p3 = new Promise((resolve, reject) => {
v3.postMessage({
job: 'sort',
arr: half_3
});
v3.addEventListener('message', event => resolve(event.data));
v3.addEventListener('error', reject);
})
var p4 = new Promise((resolve, reject) => {
v4.postMessage({
job: 'sort',
arr: half_4
});
v4.addEventListener('message', event => resolve(event.data));
v4.addEventListener('error', reject);
})
Promise.all([p1, p2, p3, p4]).then(function(results) {
//console.log( )
var p5 = new Promise((resolve, reject) => {
v1.addEventListener('message', event => resolve(event.data));
v1.addEventListener('error', reject);
})
var p6 = new Promise((resolve, reject) => {
v2.addEventListener('message', event => resolve(event.data));
v2.addEventListener('error', reject);
})
v1.postMessage({
job: 'merge',
arr: [results[0], results[1]]
});
v2.postMessage({
job: 'merge',
arr: [results[2], results[3]]
});
Promise.all([p5, p6]).then(function(arrays) {
merge(arrays[0], arrays[1])
var t1 = performance.now();
console.log(`merge_sort took ${(t1 - t0) / 1000} seconds`)
})
});
连载版
function merge_sort(arr) {
let length = arr.length;
if (length < 2) {
return arr
}
let middle = Math.floor(length/2)
let left = arr.slice(0, middle)
let right = arr.slice(middle)
return merge(merge_sort(left), merge_sort(right))
}
function merge(left, right) {
let result = [];
let i=0;
let j=0;
while(i < left.length && j < right.length) {
if(left[i] < right[j]) {
result.push(left[i]) ;
i++;
} else {
result.push(right[j]);
j++;
}
}
return result.concat(left.slice(i)).concat(right.slice(j))
}
var BigArray =Array.from({ length: 20000000 }, ()=>Math.floor(Math.random() * 50000000));
var t0 = performance.now();
merge_sort(BigArray)
var t1 = performance.now();
console.log(`merge_sort took ${(t1 - t0) / 1000} seconds`)
我也在寻找一个更好的并行排序解决方案(我对 promises 的使用看起来很糟糕)
考虑到您在线程之间复制数据,并且至少复制一次不必要的数据(通过使用concat
).
如果你真的要排序数字,你可以使用数组的type array types和转移所有权之一来回而不是复制他们的内容。那会让你减少复制的数量。如果您的起点是单个数组,则唯一的复制是复制到各个工人的各个类型化数组,然后从这些各个数组复制回主数组。
您甚至可以更进一步,具体取决于您需要使用 shared memory 支持的平台,这样数组只有一个副本,并且每个工作人员(最初)只是在自己的部分工作它的。共享内存在 Chrome 平台上启用,其站点隔离功能可以防止 Spectre 式漏洞。我最后检查它在 Firefox 中被禁用,并且在其他浏览器中被禁用或未实现。
但是,如果您排序的不是真正的数字,那么它们都不会起作用,因为它们与类型化数组相关联。