MPI_Get只从共享内存区读取数组的第一个元素
MPI_Get only reads the first element of an array from the shared memory area
我正在尝试使用 MPI_Get 函数访问包含一维数组的共享内存区域。考虑以下程序:
#include <iostream>
#include <ctime>
#include <mpi.h>
void initVector(int* vec, int vecSize)
{
srand(static_cast<unsigned>(time(NULL)));
for (int i = 0; i < vecSize; i++)
vec[i] = std::rand() % 10 + 1;
}
int main(int argc, char** argv)
{
MPI_Init(&argc, &argv);
int commSize{ 0 };
MPI_Comm_size(MPI_COMM_WORLD, &commSize);
int myRank{ 0 };
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
MPI_Comm MPI_COMM_SHARED{ 0 };
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, myRank, MPI_INFO_NULL, &MPI_COMM_SHARED);
int nodeSize{ 0 };
MPI_Comm_size(MPI_COMM_SHARED, &nodeSize);
int nodeRank{ 0 };
MPI_Comm_rank(MPI_COMM_SHARED, &nodeRank);
int* masterInVector{ nullptr };
MPI_Win vectorWinHandle{ 0 };
MPI_Win_allocate_shared(commSize * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_SHARED, &masterInVector, &vectorWinHandle);
int* slaveInVector{ masterInVector };
MPI_Aint winSizeVector{ 0 };
int winDispVector{ 0 };
if (nodeRank != 0)
{
MPI_Win_shared_query(vectorWinHandle, 0, &winSizeVector, &winDispVector, &slaveInVector);
}
if (nodeRank == 0)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, vectorWinHandle);
initVector(slaveInVector, commSize);
MPI_Win_unlock(0, vectorWinHandle);
}
MPI_Barrier(MPI_COMM_WORLD);
int vectorCell{ -1 };
for (int i = 0; i < commSize; i++)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, vectorWinHandle);
MPI_Get(&vectorCell, 1, MPI_INT, 0, winSizeVector + (i * sizeof(int)), 1, MPI_INT, vectorWinHandle);
MPI_Win_unlock(0, vectorWinHandle);
std::cout << "[" << myRank << "]" << vectorCell << std::endl;
}
MPI_Finalize();
return 0;
}
2 个处理器的输出消息:
[0]2
[0]0
[1]-1
[1]-1
2 个处理器的预期输出(请记住输出值是在 initVector
函数中随机生成的):
[0]2
[0]2
[1]7
[1]7
第一个处理器 (myRank=0
) 在 for 循环的第一次迭代期间从共享内存中读取适当的值,在第二次迭代期间从数组中的第二个元素读取 0 值。
无论迭代如何,第二个处理器(myRank=1
)都不会读取任何内容(因为 vectorCell=-1
)。
知道问题出在哪里吗?
问题与 MPI_Win_allocate_shared(...)
函数的 disp_unit
参数有关。根据 specification:
Common choices for disp_unit are 1 (no scaling), and (in C syntax) sizeof(type), for a window that consists of an array of elements of type type. [...]
MPI_Win_allocate_shared(...)
和 MPI_Get(...)
函数的 disp_unit
和 target_disp
参数不匹配。所以代码看起来像:
// ...
MPI_Win_allocate_shared(commSize * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_SHARED, &masterInVector, &vectorWinHandle);
// ...
int vectorCell{ -1 };
for (int i = 0; i < commSize; i++)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, vectorWinHandle);
MPI_Get(&vectorCell, 1, MPI_INT, 0, i, 1, MPI_INT, vectorWinHandle);
MPI_Win_unlock(0, vectorWinHandle);
std::cout << "[" << myRank << "]" << vectorCell << std::endl;
}
// ...
如果我们假设 disp_unit = 1
那么等效代码将是:
// ...
MPI_Win_allocate_shared(commSize * sizeof(int), 1, MPI_INFO_NULL, MPI_COMM_SHARED, &masterInVector, &vectorWinHandle);
// ...
int vectorCell{ -1 };
for (int i = 0; i < commSize; i++)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, vectorWinHandle);
MPI_Get(&vectorCell, 1, MPI_INT, 0, i * sizeof(int), 1, MPI_INT, vectorWinHandle);
MPI_Win_unlock(0, vectorWinHandle);
std::cout << "[" << myRank << "]" << vectorCell << std::endl;
}
// ...
我正在尝试使用 MPI_Get 函数访问包含一维数组的共享内存区域。考虑以下程序:
#include <iostream>
#include <ctime>
#include <mpi.h>
void initVector(int* vec, int vecSize)
{
srand(static_cast<unsigned>(time(NULL)));
for (int i = 0; i < vecSize; i++)
vec[i] = std::rand() % 10 + 1;
}
int main(int argc, char** argv)
{
MPI_Init(&argc, &argv);
int commSize{ 0 };
MPI_Comm_size(MPI_COMM_WORLD, &commSize);
int myRank{ 0 };
MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
MPI_Comm MPI_COMM_SHARED{ 0 };
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, myRank, MPI_INFO_NULL, &MPI_COMM_SHARED);
int nodeSize{ 0 };
MPI_Comm_size(MPI_COMM_SHARED, &nodeSize);
int nodeRank{ 0 };
MPI_Comm_rank(MPI_COMM_SHARED, &nodeRank);
int* masterInVector{ nullptr };
MPI_Win vectorWinHandle{ 0 };
MPI_Win_allocate_shared(commSize * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_SHARED, &masterInVector, &vectorWinHandle);
int* slaveInVector{ masterInVector };
MPI_Aint winSizeVector{ 0 };
int winDispVector{ 0 };
if (nodeRank != 0)
{
MPI_Win_shared_query(vectorWinHandle, 0, &winSizeVector, &winDispVector, &slaveInVector);
}
if (nodeRank == 0)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, vectorWinHandle);
initVector(slaveInVector, commSize);
MPI_Win_unlock(0, vectorWinHandle);
}
MPI_Barrier(MPI_COMM_WORLD);
int vectorCell{ -1 };
for (int i = 0; i < commSize; i++)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, vectorWinHandle);
MPI_Get(&vectorCell, 1, MPI_INT, 0, winSizeVector + (i * sizeof(int)), 1, MPI_INT, vectorWinHandle);
MPI_Win_unlock(0, vectorWinHandle);
std::cout << "[" << myRank << "]" << vectorCell << std::endl;
}
MPI_Finalize();
return 0;
}
2 个处理器的输出消息:
[0]2
[0]0
[1]-1
[1]-1
2 个处理器的预期输出(请记住输出值是在 initVector
函数中随机生成的):
[0]2
[0]2
[1]7
[1]7
第一个处理器 (myRank=0
) 在 for 循环的第一次迭代期间从共享内存中读取适当的值,在第二次迭代期间从数组中的第二个元素读取 0 值。
无论迭代如何,第二个处理器(myRank=1
)都不会读取任何内容(因为 vectorCell=-1
)。
知道问题出在哪里吗?
问题与 MPI_Win_allocate_shared(...)
函数的 disp_unit
参数有关。根据 specification:
Common choices for disp_unit are 1 (no scaling), and (in C syntax) sizeof(type), for a window that consists of an array of elements of type type. [...]
MPI_Win_allocate_shared(...)
和 MPI_Get(...)
函数的 disp_unit
和 target_disp
参数不匹配。所以代码看起来像:
// ...
MPI_Win_allocate_shared(commSize * sizeof(int), sizeof(int), MPI_INFO_NULL, MPI_COMM_SHARED, &masterInVector, &vectorWinHandle);
// ...
int vectorCell{ -1 };
for (int i = 0; i < commSize; i++)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, vectorWinHandle);
MPI_Get(&vectorCell, 1, MPI_INT, 0, i, 1, MPI_INT, vectorWinHandle);
MPI_Win_unlock(0, vectorWinHandle);
std::cout << "[" << myRank << "]" << vectorCell << std::endl;
}
// ...
如果我们假设 disp_unit = 1
那么等效代码将是:
// ...
MPI_Win_allocate_shared(commSize * sizeof(int), 1, MPI_INFO_NULL, MPI_COMM_SHARED, &masterInVector, &vectorWinHandle);
// ...
int vectorCell{ -1 };
for (int i = 0; i < commSize; i++)
{
MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, MPI_MODE_NOCHECK, vectorWinHandle);
MPI_Get(&vectorCell, 1, MPI_INT, 0, i * sizeof(int), 1, MPI_INT, vectorWinHandle);
MPI_Win_unlock(0, vectorWinHandle);
std::cout << "[" << myRank << "]" << vectorCell << std::endl;
}
// ...