openmp omp declare uniform 这在 GCC 中不受支持吗?
openmp omp declare uniform this not supported in GCC?
我有一个简单的矩阵 class,我想对其加法运算符进行矢量化。但是,统一这似乎不受 GCC 支持(使用英特尔 C++ 编译器工作正常)。如果有任何解决方法,我很好奇。 (下面是编译命令的代码)
如果您有任何意见,请告诉我。
#include <iostream>
#include <cassert>
#include <omp.h>
#define printvar(a) std::cerr << (#a) << " = " << (a) << std::endl;
#define printline std::cerr << "FILE " << __FILE__ << " LINE " << __LINE__ << " FUNC: " << __func__ << std::endl;
#define NUMEL 10000
#pragma omp declare simd simdlen(2)
#pragma omp declare simd simdlen(4)
#pragma omp declare simd simdlen(8)
#pragma omp declare simd simdlen(16)
int myadd(int a, int b){
return a+b;
}
template <typename V>
class Matrix {
public:
V* data;
int nrows, ncols;
bool istemp = false;
std::string name;
Matrix(){
data = NULL;
}
Matrix(int nrows, int ncols, V val, std::string name = "none"){
this->nrows = nrows;
this->ncols = ncols;
this->name = name;
data = (V*) malloc(sizeof(V)*nrows*ncols);
for(int i = 0; i < numel(); i++){
csi0(i) = val;
}
}
Matrix(const Matrix& m){
this->nrows = m.nrows;
this->ncols = m.ncols;
this->data = (V*) malloc(sizeof(V)*nrows*ncols);
for(int i = 0; i < numel(); i++){
csi0(i) = m.csi0(i);
}
}
inline void swap(Matrix<V>& a, Matrix<V>& b){
V* data = a.data; a.data = b.data; b.data = data;
int nrows = a.nrows; a.nrows = b.nrows; b.nrows = nrows;
int ncols = a.ncols; a.ncols = b.ncols; b.ncols = ncols;
}
Matrix<V>& operator=(const Matrix<V>& m){
Matrix<V> tmp(m);
swap(*this,tmp);
}
#pragma omp declare simd
#pragma omp declare simd simdlen(4) uniform(this)
#pragma omp declare simd simdlen(8) uniform(this)
#pragma omp declare simd simdlen(16) uniform(this)
V& csi0 (int i) const {
return this->data[i];
}
V& cij0 (int i, int j) const {
return data[i+nrows*j];
}
int numel() const {
return nrows*ncols;
}
friend Matrix<V> operator+(const Matrix<V>& a, const Matrix<V>& b){
assert(a.nrows == b.nrows && a.ncols == b.ncols);
Matrix<V> retmat(a.nrows, a.ncols, 0, "retmat");
#pragma omp parallel for simd
for(int i = 0; i < a.numel(); i++){
retmat.csi0(i) = a.csi0(i) + b.csi0(i);
}
// retmat.istemp = true;
return retmat;
}
~Matrix(){
if(data != NULL && !istemp){
free(data);
}
}
friend std::ostream& operator<<(std::ostream& os, Matrix<V> m){
os << "{{" << std::endl;
for(int i = 0; i < m.nrows; i++){
for(int j = 0; j < m.ncols; j++){
os << m.csi0(i) << ", ";
}
os << std::endl;
}
os << "}}" << std::endl;
return os;
}
};
int main(int argc, char const *argv[])
{
Matrix<int> a(4,5,1), b(4,5,2), c;
c = a + b;
printvar(c)
}
int main_1(int argc, char const *argv[])
{
int a[NUMEL], b[NUMEL], c[NUMEL];
#pragma omp parallel for
for(int i = 0; i < NUMEL; i++){
c[i] = myadd(a[i], b[i]);
}
return 0;
}
编译命令
g++.exe -fopt-info-all=all.optrpt -O3 -fopenmp -fopenmp-simd -mthreads -mavx -fmax-errors=5 -c gcc_ompsimd_test.cpp -Fo:gcc_ompsimd_test.o
gcc_ompsimd_test.cpp:65:46: error: expected unqualified-id before 'this'
#pragma omp declare simd simdlen(4) uniform(this)
^~~~
gcc_ompsimd_test.cpp:66:46: error: expected unqualified-id before 'this'
#pragma omp declare simd simdlen(8) uniform(this)
^~~~
gcc_ompsimd_test.cpp:67:47: error: expected unqualified-id before 'this'
#pragma omp declare simd simdlen(16) uniform(this)
在 gcc 中,您似乎不能在 class 声明中使用 uniform(this)
。在 clang 和 Intel 编译器中是可以的。因此,成员函数的定义不应在 class 声明中:
#pragma omp declare simd simdlen(16) uniform(this)
template <typename V>
V& Matrix<V>::csi0 (int i) const
{
return data[i];
}
从 OpenMP 4.5 开始,规范包含以下行 (2.8.2)。
The special this pointer can be used as if it was one of the arguments
to the function in any of the linear, aligned, or uniform clauses.
所以任何 OpenMP 4.5 compilant 编译器都应该支持它。
我有一个简单的矩阵 class,我想对其加法运算符进行矢量化。但是,统一这似乎不受 GCC 支持(使用英特尔 C++ 编译器工作正常)。如果有任何解决方法,我很好奇。 (下面是编译命令的代码)
如果您有任何意见,请告诉我。
#include <iostream>
#include <cassert>
#include <omp.h>
#define printvar(a) std::cerr << (#a) << " = " << (a) << std::endl;
#define printline std::cerr << "FILE " << __FILE__ << " LINE " << __LINE__ << " FUNC: " << __func__ << std::endl;
#define NUMEL 10000
#pragma omp declare simd simdlen(2)
#pragma omp declare simd simdlen(4)
#pragma omp declare simd simdlen(8)
#pragma omp declare simd simdlen(16)
int myadd(int a, int b){
return a+b;
}
template <typename V>
class Matrix {
public:
V* data;
int nrows, ncols;
bool istemp = false;
std::string name;
Matrix(){
data = NULL;
}
Matrix(int nrows, int ncols, V val, std::string name = "none"){
this->nrows = nrows;
this->ncols = ncols;
this->name = name;
data = (V*) malloc(sizeof(V)*nrows*ncols);
for(int i = 0; i < numel(); i++){
csi0(i) = val;
}
}
Matrix(const Matrix& m){
this->nrows = m.nrows;
this->ncols = m.ncols;
this->data = (V*) malloc(sizeof(V)*nrows*ncols);
for(int i = 0; i < numel(); i++){
csi0(i) = m.csi0(i);
}
}
inline void swap(Matrix<V>& a, Matrix<V>& b){
V* data = a.data; a.data = b.data; b.data = data;
int nrows = a.nrows; a.nrows = b.nrows; b.nrows = nrows;
int ncols = a.ncols; a.ncols = b.ncols; b.ncols = ncols;
}
Matrix<V>& operator=(const Matrix<V>& m){
Matrix<V> tmp(m);
swap(*this,tmp);
}
#pragma omp declare simd
#pragma omp declare simd simdlen(4) uniform(this)
#pragma omp declare simd simdlen(8) uniform(this)
#pragma omp declare simd simdlen(16) uniform(this)
V& csi0 (int i) const {
return this->data[i];
}
V& cij0 (int i, int j) const {
return data[i+nrows*j];
}
int numel() const {
return nrows*ncols;
}
friend Matrix<V> operator+(const Matrix<V>& a, const Matrix<V>& b){
assert(a.nrows == b.nrows && a.ncols == b.ncols);
Matrix<V> retmat(a.nrows, a.ncols, 0, "retmat");
#pragma omp parallel for simd
for(int i = 0; i < a.numel(); i++){
retmat.csi0(i) = a.csi0(i) + b.csi0(i);
}
// retmat.istemp = true;
return retmat;
}
~Matrix(){
if(data != NULL && !istemp){
free(data);
}
}
friend std::ostream& operator<<(std::ostream& os, Matrix<V> m){
os << "{{" << std::endl;
for(int i = 0; i < m.nrows; i++){
for(int j = 0; j < m.ncols; j++){
os << m.csi0(i) << ", ";
}
os << std::endl;
}
os << "}}" << std::endl;
return os;
}
};
int main(int argc, char const *argv[])
{
Matrix<int> a(4,5,1), b(4,5,2), c;
c = a + b;
printvar(c)
}
int main_1(int argc, char const *argv[])
{
int a[NUMEL], b[NUMEL], c[NUMEL];
#pragma omp parallel for
for(int i = 0; i < NUMEL; i++){
c[i] = myadd(a[i], b[i]);
}
return 0;
}
编译命令
g++.exe -fopt-info-all=all.optrpt -O3 -fopenmp -fopenmp-simd -mthreads -mavx -fmax-errors=5 -c gcc_ompsimd_test.cpp -Fo:gcc_ompsimd_test.o
gcc_ompsimd_test.cpp:65:46: error: expected unqualified-id before 'this'
#pragma omp declare simd simdlen(4) uniform(this)
^~~~
gcc_ompsimd_test.cpp:66:46: error: expected unqualified-id before 'this'
#pragma omp declare simd simdlen(8) uniform(this)
^~~~
gcc_ompsimd_test.cpp:67:47: error: expected unqualified-id before 'this'
#pragma omp declare simd simdlen(16) uniform(this)
在 gcc 中,您似乎不能在 class 声明中使用 uniform(this)
。在 clang 和 Intel 编译器中是可以的。因此,成员函数的定义不应在 class 声明中:
#pragma omp declare simd simdlen(16) uniform(this)
template <typename V>
V& Matrix<V>::csi0 (int i) const
{
return data[i];
}
从 OpenMP 4.5 开始,规范包含以下行 (2.8.2)。
The special this pointer can be used as if it was one of the arguments to the function in any of the linear, aligned, or uniform clauses.
所以任何 OpenMP 4.5 compilant 编译器都应该支持它。