并行冒泡排序性能
Parallel Bubble-sort performances
我的目标是用Thread实现冒泡排序排序算法,程序好像运行还行。但是性能很糟糕,所以我想知道如何使代码 运行 更快,为什么它 运行 如此糟糕。
代码基于以下算法:
并行冒泡排序(A) - 算法
> 1.For k = 0 to n-2
> 2.If k is even then
> 3. for i = 0 to (n/2)-1 do in parallel
> 4. if A[2i] > A[2i+1] then
> 5. Exchange A[2i] <-> A[2i+1]
> 6.Else
> 7. for i = 0 to (n/2)-2 do in parallel
> 8. if A[2i+1] > A[2i+2] then
> 9. Exchange A[2i+1] <-> A[2i+2]
> 10.Next k
public static void sort(){
int n = input.length; //length of the array to sort
Thread[] thr1 = new Thread[(int)n/2];
Thread[] thr2 = new Thread[(int)n/2];
int count1;
int count2;
for(int i = 0; i<n-1;i++){
if(i % 2 == 0){ // i even
count1 = 0;
for(int j = 0; j<n/2; j++){
final int tmp = j;
count1++;
thr1[tmp] = new Thread(){
public void run(){
if (input[2*tmp]>input[2*tmp+1])
swap(2*tmp,2*tmp+1);
}
};
thr1[tmp].start();
}
//waiting for threads to finish
for(int m = 0; m<count1; m++){
try {
thr1[m].join();
} catch (InterruptedException e) {
e.printStackTrace();
}}
}
else{ // i odd
count2 = 0;
for(int k = 0; k<n/2-1;k++){
final int tmp = k;
count2++;
thr2[tmp] = new Thread(){
public void run(){
if (input[2*tmp+1]>input[2*tmp+2])
swap(2*tmp+1,2*tmp+2);
}
};
thr2[tmp].start();
}
// Waiting for threads to finish
for(int m = 0; m<count2; m++){
try {
thr2[m].join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}}
}
}
编辑:
这是带有ExecutorService的新版本,不幸的是,正如预测的那样,它仍然运行非常糟糕,比顺序版本慢。
public static void sort(){
int n = input.length; //length of the array to sort
ExecutorService executor = Executors.newFixedThreadPool(8);
for(int i = 0; i<n-1;i++){
if(i % 2 == 0){ // i even
for(int j = 0; j<n/2; j++){
final int tmp = j;
executor.submit(new Runnable(){
public void run(){
if (input[2*tmp]>input[2*tmp+1])
swap(2*tmp,2*tmp+1);}
});}
}
else{ // i odd
for(int k = 0; k<n/2-1;k++){
final int tmp = k;
executor.submit(new Runnable(){
public void run(){
if (input[2*tmp+1]>input[2*tmp+2])
swap(2*tmp+1,2*tmp+2);}
});}
}
}
executor.shutdown();
try {
executor.awaitTermination(1, TimeUnit.DAYS);
} catch (InterruptedException e) {
e.printStackTrace();}
}
之所以这么慢,是因为 new Thread()
的调用非常昂贵。与在原始线程中执行所有排序相比,启动另一个线程来执行部分排序需要多数千倍的时钟周期。
此外,即使 new Thread()
不是那么昂贵,您仍然看不到太多(或任何)性能改进,因为排序是内存限制操作,您很可能正在尝试 运行它在单CPU、多核系统上,但是CPU只有一个地址总线和一个数据总线,所以内核大多会等待对方放手数据总线。
我的目标是用Thread实现冒泡排序排序算法,程序好像运行还行。但是性能很糟糕,所以我想知道如何使代码 运行 更快,为什么它 运行 如此糟糕。
代码基于以下算法:
并行冒泡排序(A) - 算法
> 1.For k = 0 to n-2
> 2.If k is even then
> 3. for i = 0 to (n/2)-1 do in parallel
> 4. if A[2i] > A[2i+1] then
> 5. Exchange A[2i] <-> A[2i+1]
> 6.Else
> 7. for i = 0 to (n/2)-2 do in parallel
> 8. if A[2i+1] > A[2i+2] then
> 9. Exchange A[2i+1] <-> A[2i+2]
> 10.Next k
public static void sort(){
int n = input.length; //length of the array to sort
Thread[] thr1 = new Thread[(int)n/2];
Thread[] thr2 = new Thread[(int)n/2];
int count1;
int count2;
for(int i = 0; i<n-1;i++){
if(i % 2 == 0){ // i even
count1 = 0;
for(int j = 0; j<n/2; j++){
final int tmp = j;
count1++;
thr1[tmp] = new Thread(){
public void run(){
if (input[2*tmp]>input[2*tmp+1])
swap(2*tmp,2*tmp+1);
}
};
thr1[tmp].start();
}
//waiting for threads to finish
for(int m = 0; m<count1; m++){
try {
thr1[m].join();
} catch (InterruptedException e) {
e.printStackTrace();
}}
}
else{ // i odd
count2 = 0;
for(int k = 0; k<n/2-1;k++){
final int tmp = k;
count2++;
thr2[tmp] = new Thread(){
public void run(){
if (input[2*tmp+1]>input[2*tmp+2])
swap(2*tmp+1,2*tmp+2);
}
};
thr2[tmp].start();
}
// Waiting for threads to finish
for(int m = 0; m<count2; m++){
try {
thr2[m].join();
} catch (InterruptedException e) {
e.printStackTrace();
}
}}
}
}
编辑:
这是带有ExecutorService的新版本,不幸的是,正如预测的那样,它仍然运行非常糟糕,比顺序版本慢。
public static void sort(){
int n = input.length; //length of the array to sort
ExecutorService executor = Executors.newFixedThreadPool(8);
for(int i = 0; i<n-1;i++){
if(i % 2 == 0){ // i even
for(int j = 0; j<n/2; j++){
final int tmp = j;
executor.submit(new Runnable(){
public void run(){
if (input[2*tmp]>input[2*tmp+1])
swap(2*tmp,2*tmp+1);}
});}
}
else{ // i odd
for(int k = 0; k<n/2-1;k++){
final int tmp = k;
executor.submit(new Runnable(){
public void run(){
if (input[2*tmp+1]>input[2*tmp+2])
swap(2*tmp+1,2*tmp+2);}
});}
}
}
executor.shutdown();
try {
executor.awaitTermination(1, TimeUnit.DAYS);
} catch (InterruptedException e) {
e.printStackTrace();}
}
之所以这么慢,是因为 new Thread()
的调用非常昂贵。与在原始线程中执行所有排序相比,启动另一个线程来执行部分排序需要多数千倍的时钟周期。
此外,即使 new Thread()
不是那么昂贵,您仍然看不到太多(或任何)性能改进,因为排序是内存限制操作,您很可能正在尝试 运行它在单CPU、多核系统上,但是CPU只有一个地址总线和一个数据总线,所以内核大多会等待对方放手数据总线。