MPI 向量乘法
MPI Vector multiplication
#include<stdio.h>
#include<mpi.h>
int main()
{
int a_r = 0, a_c = 0, v_s = 0, i = 0, rank = 0, size = 0;
int local_row = 0, partial_sum = 0, sum = 0, j = 0;
int my_first_ele = 0, my_last_ele = 0;
int a[10][10], v[10], partial_mul[10] = {0}, mul[10] = {0};
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(rank == 0)
{
printf("Enter the row of array A: ");
scanf("%d", &a_r);
printf("Enter the column of array A: ");
scanf("%d", &a_c);
printf("Enter the array A: ");
for(i = 0; i < a_r; i++)
{
for(j = 0; j < a_c; j++)
scanf("%d", &a[i][j]);
}
printf("Enter the size of vector array: ");
scanf("%d", &v_s);
printf("Enter the vector array: ");
for(i = 0; i < v_s; i++)
{
scanf("%d", &v[i]);
}
MPI_Bcast(&a_r, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&a_c, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&v_s, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(a, a_r*a_c, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(v, v_s, MPI_INT, 0, MPI_COMM_WORLD);
local_row = a_r / size;
my_first_ele = rank * local_row;
my_last_ele = (rank+1) * local_row;
if(a_c == v_s)
{
for(i = my_first_ele; i < my_last_ele; i++)
{
for(j = 0; j < a_c; j++)
{
partial_mul[i] = partial_mul[i] + (a[i][j]*v[j]);
}
}
printf("\nPartial multiplication in Rank 0: \n");
for(i = my_first_ele; i < my_last_ele; i++)
printf("%d \n", partial_mul[i]);
MPI_Gather(partial_mul, local_row, MPI_INT, mul, local_row, MPI_INT, 0, MPI_COMM_WORLD);
printf("\n \nGlobal Multiplication: \n");
for(i = 0; i < a_r; i++)
{
printf("%d \n", mul[i]);
}
}
else
printf("\nCan't multiply. \n");
}
else
{
MPI_Bcast(&a_r, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&a_c, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&v_s, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(a, a_r*a_c, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(v, v_s, MPI_INT, 0, MPI_COMM_WORLD);
local_row = a_r / size;
my_first_ele = rank * local_row;
my_last_ele = (rank+1) * local_row;
if(a_c == v_s)
{
for(i = my_first_ele; i < my_last_ele; i++)
{
for(j = 0; j < a_c; j++)
{
partial_mul[i] = partial_mul[i] + (a[i][j]*v[j]);
}
}
printf("\nPartial multiplication in Rank %d: \n", rank);
for(i = my_first_ele; i < my_last_ele; i++)
printf("%d \n", partial_mul[i]);
MPI_Gather(partial_mul, local_row, MPI_INT, mul, local_row, MPI_INT, 0, MPI_COMM_WORLD);
}
else
printf("\nCan't multiply. \n");
}
MPI_FINALIZE();
}
我对上面的代码有疑问。我的偏乘值是正确的。但是在我的整体乘法中,我只能收集排名 0 的元素,其余值打印为 0。任何人都可以解释这是什么问题。
看看你的数据布局,我认为你误解了 MPI 中的数据结构:所有数据在每个等级中都是分开的,没有隐含的共享或分发。您的矢量 partial_sum
在每个等级上都是分开的,每个等级都有完整的 10 个元素。因此假设 size=2
、a_r=10
和零初始化,计算后内容将如下所示:
- 排名 0:
{x0,x1,x2,x3,x4,0,0,0,0,0}
- 排名 1:
{0,0,0,0,0,x5,x6,x7,x8,x9}
其中 x 是正确的计算值。然后 Gather 将从每个等级中收集前 local_row=5
个元素,结果是 {x0,x1,x2,x3,x4,0,0,0,0,0}
.
您可以通过添加正确的偏移量来解决这个问题:
MPI_Gather(&partial_mul[my_first_ele], local_row, MPI_INT, mul, local_row, MPI_INT, 0, MPI_COMM_WORLD);
但是请不要那样做。相反,您应该重新考虑您的数据结构以真正分发数据,为向量/数组的每个部分保留正确的大小。要将部分数据发送到每个等级,请使用 MPI_Scatter
(与 MPI_Gather
相反)。最困难的是正确设置矩阵。 this excellent answer.
对此进行了详细解释
#include<stdio.h>
#include<mpi.h>
int main()
{
int a_r = 0, a_c = 0, v_s = 0, i = 0, rank = 0, size = 0;
int local_row = 0, partial_sum = 0, sum = 0, j = 0;
int my_first_ele = 0, my_last_ele = 0;
int a[10][10], v[10], partial_mul[10] = {0}, mul[10] = {0};
MPI_Init(NULL, NULL);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
if(rank == 0)
{
printf("Enter the row of array A: ");
scanf("%d", &a_r);
printf("Enter the column of array A: ");
scanf("%d", &a_c);
printf("Enter the array A: ");
for(i = 0; i < a_r; i++)
{
for(j = 0; j < a_c; j++)
scanf("%d", &a[i][j]);
}
printf("Enter the size of vector array: ");
scanf("%d", &v_s);
printf("Enter the vector array: ");
for(i = 0; i < v_s; i++)
{
scanf("%d", &v[i]);
}
MPI_Bcast(&a_r, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&a_c, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&v_s, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(a, a_r*a_c, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(v, v_s, MPI_INT, 0, MPI_COMM_WORLD);
local_row = a_r / size;
my_first_ele = rank * local_row;
my_last_ele = (rank+1) * local_row;
if(a_c == v_s)
{
for(i = my_first_ele; i < my_last_ele; i++)
{
for(j = 0; j < a_c; j++)
{
partial_mul[i] = partial_mul[i] + (a[i][j]*v[j]);
}
}
printf("\nPartial multiplication in Rank 0: \n");
for(i = my_first_ele; i < my_last_ele; i++)
printf("%d \n", partial_mul[i]);
MPI_Gather(partial_mul, local_row, MPI_INT, mul, local_row, MPI_INT, 0, MPI_COMM_WORLD);
printf("\n \nGlobal Multiplication: \n");
for(i = 0; i < a_r; i++)
{
printf("%d \n", mul[i]);
}
}
else
printf("\nCan't multiply. \n");
}
else
{
MPI_Bcast(&a_r, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&a_c, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&v_s, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(a, a_r*a_c, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(v, v_s, MPI_INT, 0, MPI_COMM_WORLD);
local_row = a_r / size;
my_first_ele = rank * local_row;
my_last_ele = (rank+1) * local_row;
if(a_c == v_s)
{
for(i = my_first_ele; i < my_last_ele; i++)
{
for(j = 0; j < a_c; j++)
{
partial_mul[i] = partial_mul[i] + (a[i][j]*v[j]);
}
}
printf("\nPartial multiplication in Rank %d: \n", rank);
for(i = my_first_ele; i < my_last_ele; i++)
printf("%d \n", partial_mul[i]);
MPI_Gather(partial_mul, local_row, MPI_INT, mul, local_row, MPI_INT, 0, MPI_COMM_WORLD);
}
else
printf("\nCan't multiply. \n");
}
MPI_FINALIZE();
}
我对上面的代码有疑问。我的偏乘值是正确的。但是在我的整体乘法中,我只能收集排名 0 的元素,其余值打印为 0。任何人都可以解释这是什么问题。
看看你的数据布局,我认为你误解了 MPI 中的数据结构:所有数据在每个等级中都是分开的,没有隐含的共享或分发。您的矢量 partial_sum
在每个等级上都是分开的,每个等级都有完整的 10 个元素。因此假设 size=2
、a_r=10
和零初始化,计算后内容将如下所示:
- 排名 0:
{x0,x1,x2,x3,x4,0,0,0,0,0}
- 排名 1:
{0,0,0,0,0,x5,x6,x7,x8,x9}
其中 x 是正确的计算值。然后 Gather 将从每个等级中收集前 local_row=5
个元素,结果是 {x0,x1,x2,x3,x4,0,0,0,0,0}
.
您可以通过添加正确的偏移量来解决这个问题:
MPI_Gather(&partial_mul[my_first_ele], local_row, MPI_INT, mul, local_row, MPI_INT, 0, MPI_COMM_WORLD);
但是请不要那样做。相反,您应该重新考虑您的数据结构以真正分发数据,为向量/数组的每个部分保留正确的大小。要将部分数据发送到每个等级,请使用 MPI_Scatter
(与 MPI_Gather
相反)。最困难的是正确设置矩阵。 this excellent answer.