MPI死锁梯形函数的线性减少
MPI deadlock on linear reduction of trapezoid function
我正在学习 MPI 并尝试修改 Pacheco's Book 的梯形代码以使用线性缩减,但在 N-1 进程运行后我陷入僵局。我假设这是因为没有办法确保它们以相反的顺序进行。
简而言之,对于N个进程,每个进程计算其积分。进程 N 将其积分发送给 N-1。进程 N-1 到 1 接收、求和,然后发送。进程 0 接收和总计。
这里的大部分代码来自 Pacheco。我的是 if/else 部分。
/* trap.c -- Parallel Trapezoidal Rule, first version
*
* Input: None.
* Output: Estimate of the integral from a to b of f(x)
* using the trapezoidal rule and n trapezoids.
*
* Algorithm:
* 1. Each process calculates "its" interval of
* integration.
* 2. Each process estimates the integral of f(x)
* over its interval using the trapezoidal rule.
* 3a. Each process != 0 sends its integral to 0.
* 3b. Process 0 sums the calculations received from
* the individual processes and prints the result.
*
* Notes:
* 1. f(x), a, b, and n are all hardwired.
* 2. The number of processes (p) should evenly divide
* the number of trapezoids (n = 1024)
*
* See Chap. 4, pp. 56 & ff. in PPMPI.
*/
#include <stdio.h>
/* We'll be using MPI routines, definitions, etc. */
#include "mpi.h"
main(int argc, char** argv) {
int my_rank; /* My process rank */
int p; /* The number of processes */
float a = 0.0; /* Left endpoint */
float b = 1.0; /* Right endpoint */
int n = 1024; /* Number of trapezoids */
float h; /* Trapezoid base length */
float local_a; /* Left endpoint my process */
float local_b; /* Right endpoint my process */
int local_n; /* Number of trapezoids for */
/* my calculation */
float my_integral; /* Integral over my interval */
float received_interm_result;
float interm_result; /* Each process' summed result */
float total; /* Total integral */
int source = my_rank + 1; /* Process sending integral */
int dest; /* All messages go to 0 */
int tag = 0;
MPI_Status status;
float Trap(float local_a, float local_b, int local_n,
float h); /* Calculate local integral */
/* Let the system do what it needs to start up MPI */
MPI_Init(&argc, &argv);
/* Get my process rank */
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* Find out how many processes are being used */
MPI_Comm_size(MPI_COMM_WORLD, &p);
h = (b-a)/n; /* h is the same for all processes */
local_n = n/p; /* So is the number of trapezoids */
/* Length of each process' interval of
* integration = local_n*h. So my interval
* starts at: */
local_a = a + my_rank*local_n*h;
local_b = local_a + local_n*h;
my_integral = Trap(local_a, local_b, local_n, h);
if (my_rank == p-1) { /* Last process only needs to send */
dest = my_rank - 1;
interm_result = my_integral;
printf("Process %d interm result = %f\n", my_rank, interm_result);
MPI_Send(&interm_result, 1, MPI_FLOAT, dest,
tag, MPI_COMM_WORLD);
} else if (my_rank != 0) { /* Middle processes need to receive and send */
dest = my_rank - 1;
MPI_Recv(&received_interm_result, 1, MPI_FLOAT, source, tag,
MPI_COMM_WORLD, &status);
interm_result = received_interm_result + my_integral;
printf("Process %d interm result = %f\n", my_rank, interm_result);
MPI_Send(&interm_result, 1, MPI_FLOAT, dest,
tag, MPI_COMM_WORLD);
} else { /* Process 0 only needs to receive */
MPI_Recv(&received_interm_result, 1, MPI_FLOAT, source, tag,
MPI_COMM_WORLD, &status);
total = received_interm_result + my_integral;
/* Print the result */
printf("With n = %d trapezoids, our estimate\n",
n);
printf("of the integral from %f to %f = %f\n",
a, b, total);
}
/* Shut down MPI */
MPI_Finalize();
} /* main */
float Trap(
float local_a /* in */,
float local_b /* in */,
int local_n /* in */,
float h /* in */) {
float integral; /* Store result in integral */
float x;
int i;
float f(float x); /* function we're integrating */
integral = (f(local_a) + f(local_b))/2.0;
x = local_a;
for (i = 1; i <= local_n-1; i++) {
x = x + h;
integral = integral + f(x);
}
integral = integral*h;
return integral;
} /* Trap */
float f(float x) {
float return_val;
/* Calculate f(x). */
/* Store calculation in return_val. */
return_val = x*x;
return return_val;
} /* f */
您在使用 MPI_Comm_rank
获得正确排名之前将源设置为 myrank+1
,因此进程可能正在等待错误的进程向它们发送某些内容。
我知道你这样做是为了练习,但对于任何希望将其用于生产的人:你应该考虑使用 MPI_Reduce, or if the partial results are relevant MPI_Scan。
我正在学习 MPI 并尝试修改 Pacheco's Book 的梯形代码以使用线性缩减,但在 N-1 进程运行后我陷入僵局。我假设这是因为没有办法确保它们以相反的顺序进行。
简而言之,对于N个进程,每个进程计算其积分。进程 N 将其积分发送给 N-1。进程 N-1 到 1 接收、求和,然后发送。进程 0 接收和总计。
这里的大部分代码来自 Pacheco。我的是 if/else 部分。
/* trap.c -- Parallel Trapezoidal Rule, first version
*
* Input: None.
* Output: Estimate of the integral from a to b of f(x)
* using the trapezoidal rule and n trapezoids.
*
* Algorithm:
* 1. Each process calculates "its" interval of
* integration.
* 2. Each process estimates the integral of f(x)
* over its interval using the trapezoidal rule.
* 3a. Each process != 0 sends its integral to 0.
* 3b. Process 0 sums the calculations received from
* the individual processes and prints the result.
*
* Notes:
* 1. f(x), a, b, and n are all hardwired.
* 2. The number of processes (p) should evenly divide
* the number of trapezoids (n = 1024)
*
* See Chap. 4, pp. 56 & ff. in PPMPI.
*/
#include <stdio.h>
/* We'll be using MPI routines, definitions, etc. */
#include "mpi.h"
main(int argc, char** argv) {
int my_rank; /* My process rank */
int p; /* The number of processes */
float a = 0.0; /* Left endpoint */
float b = 1.0; /* Right endpoint */
int n = 1024; /* Number of trapezoids */
float h; /* Trapezoid base length */
float local_a; /* Left endpoint my process */
float local_b; /* Right endpoint my process */
int local_n; /* Number of trapezoids for */
/* my calculation */
float my_integral; /* Integral over my interval */
float received_interm_result;
float interm_result; /* Each process' summed result */
float total; /* Total integral */
int source = my_rank + 1; /* Process sending integral */
int dest; /* All messages go to 0 */
int tag = 0;
MPI_Status status;
float Trap(float local_a, float local_b, int local_n,
float h); /* Calculate local integral */
/* Let the system do what it needs to start up MPI */
MPI_Init(&argc, &argv);
/* Get my process rank */
MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
/* Find out how many processes are being used */
MPI_Comm_size(MPI_COMM_WORLD, &p);
h = (b-a)/n; /* h is the same for all processes */
local_n = n/p; /* So is the number of trapezoids */
/* Length of each process' interval of
* integration = local_n*h. So my interval
* starts at: */
local_a = a + my_rank*local_n*h;
local_b = local_a + local_n*h;
my_integral = Trap(local_a, local_b, local_n, h);
if (my_rank == p-1) { /* Last process only needs to send */
dest = my_rank - 1;
interm_result = my_integral;
printf("Process %d interm result = %f\n", my_rank, interm_result);
MPI_Send(&interm_result, 1, MPI_FLOAT, dest,
tag, MPI_COMM_WORLD);
} else if (my_rank != 0) { /* Middle processes need to receive and send */
dest = my_rank - 1;
MPI_Recv(&received_interm_result, 1, MPI_FLOAT, source, tag,
MPI_COMM_WORLD, &status);
interm_result = received_interm_result + my_integral;
printf("Process %d interm result = %f\n", my_rank, interm_result);
MPI_Send(&interm_result, 1, MPI_FLOAT, dest,
tag, MPI_COMM_WORLD);
} else { /* Process 0 only needs to receive */
MPI_Recv(&received_interm_result, 1, MPI_FLOAT, source, tag,
MPI_COMM_WORLD, &status);
total = received_interm_result + my_integral;
/* Print the result */
printf("With n = %d trapezoids, our estimate\n",
n);
printf("of the integral from %f to %f = %f\n",
a, b, total);
}
/* Shut down MPI */
MPI_Finalize();
} /* main */
float Trap(
float local_a /* in */,
float local_b /* in */,
int local_n /* in */,
float h /* in */) {
float integral; /* Store result in integral */
float x;
int i;
float f(float x); /* function we're integrating */
integral = (f(local_a) + f(local_b))/2.0;
x = local_a;
for (i = 1; i <= local_n-1; i++) {
x = x + h;
integral = integral + f(x);
}
integral = integral*h;
return integral;
} /* Trap */
float f(float x) {
float return_val;
/* Calculate f(x). */
/* Store calculation in return_val. */
return_val = x*x;
return return_val;
} /* f */
您在使用 MPI_Comm_rank
获得正确排名之前将源设置为 myrank+1
,因此进程可能正在等待错误的进程向它们发送某些内容。
我知道你这样做是为了练习,但对于任何希望将其用于生产的人:你应该考虑使用 MPI_Reduce, or if the partial results are relevant MPI_Scan。