我如何将这个循环与 openmp 并行?

How can I parallel this loop with open mp?

我不知道如何并行这个循环,因为我有很多因变量而且我很困惑 你能帮助和指导我吗? 第一名是:

for (int a = 0; a < sigmaLen; ++a) {
        int f = freq[a];
        if (f >= sumFreqLB)
            if (updateRemainingDistances(s, a, pos))
                if (prunePassed(pos + 1)) {
                    lmer[pos] = a;
                    enumerateStrings(pos + 1, sumFreqLB - f);
                }
    }

第二个是:

void preprocessLowerBounds() {
    int i = stackSz - 1;
    int pairOffset = (i * (i - 1)) >> 1;
    for (int k = L; k; --k) {
        int *dsn = dist[k] + pairOffset;
        int *ds = dist[k - 1] + pairOffset;
        int *s = colS[k - 1];
        char ci = s[i];
        for (int j = 0; j < i; ++j) {
            char cj = s[j];
            *ds++ = (*dsn++) + (ci != cj);
        }
    }

真的还有一个是:

    void enumerateSubStrings(int rowNumber, int remainQTolerance) {
    int nItems = rowSize[rowNumber][stackSz];
    if (shouldGenerateNeighborhood(rowNumber, nItems)) {
        bruteForceIt(rowNumber, nItems);
    } else {
        indexType *row = rowItem[rowNumber];
        for (int j = 0; j < nItems; ++j) {
            indexType ind = row[j];
            addString(lmers + ind);
            preprocessLowerBounds();
            uint threshold = maxLB[stackSz] - addMaxFreq();
            if (hasSolution(0, threshold)) {
                if (getValid<hasPreprocessedPairs, useQ>(rowNumber + 1,
                        (stackSz <= 2 ? n : smallN), threshold + LminusD,
                        ind, remainQTolerance)) {
                    enumerateSubStrings<hasPreprocessedPairs, useQ>(
                            rowNumber + 1, remainQTolerance);
                }
            }
            removeLastString();
        }
    }

void addString(const char *t) {
    int *mf = colMf[stackSz + 1];
    for (int j = 0; j < L; ++j) {
        int c = t[j];
        colS[j][stackSz] = c;
        mf[j] = colMaxFreq[j] + (colMaxFreq[j] == colFreq[j][c]++);
    }
    colMaxFreq = mf;
    ++stackSz;
}


void preprocessLowerBounds() {
    int i = stackSz - 1;
    int pairOffset = (i * (i - 1)) >> 1;
    for (int k = L; k; --k) {
        int *dsn = dist[k] + pairOffset;
        int *ds = dist[k - 1] + pairOffset;
        int *s = colS[k - 1];
        char ci = s[i];
        for (int j = 0; j < i; ++j) {
            char cj = s[j];
            *ds++ = (*dsn++) + (ci != cj);
        }
    }
}

void removeLastString() {
    --stackSz;
    for (int j = 0; j < L; ++j)
        --colFreq[j][colS[j][stackSz]];
    colMaxFreq = colMf[stackSz];
}

好的,要让 OpenMP 并行化循环,您基本上遵循这两条规则,第一条永远不要从不同线程写入同一内​​存位置,第二条规则永远不要依赖于可能修改另一个线程的内存区域的读取,现在在第一个循环中,您只需更改 lmer 变量,其他操作是只读变量,我假设这些变量不会同时从代码的另一部分更改,因此第一个循环如下:

#pragma omp for private(s,a,pos) //According to my intuition these variables are global or belong to a class, so you must convert private to each thread, on the other hand sumFreqLB and freq not included because only these reading
for (int a = 0; a < sigmaLen; ++a) {
    int f = freq[a];
    if (f >= sumFreqLB)
        if (updateRemainingDistances(s, a, pos))
            if (prunePassed(pos + 1)) {

                #pragma omp critical //Only one thread at a time can enter otherwise you will fail at runtime
                {             
                lmer[pos] = a;
                }
                enumerateStrings(pos + 1, sumFreqLB - f);
            }
}

在第二个循环中,我无法理解你是如何使用 for 的,但是你没有问题,因为你只使用读取并且只修改了线程局部变量。

您必须确保函数 updateRemainingDistances、prunePassed 和 enumerateStrings 中没有使用静态或全局变量。

在下面的函数中,您使用的大部分只读操作可以从多个线程完成(如果有任何线程修改这些变量)并写入本地内存位置,因此只需更改 FOR 的形状,OpenMP 就可以识别 FOR .

void preprocessLowerBounds() {
int i = stackSz - 1;
int pairOffset = (i * (i - 1)) >> 1;

#pragma omp for
for (int var=0; var<=k-L; var++){  

    int newK=k-var;//This will cover the initial range and in the same order    

    int *dsn = dist[newK] + pairOffset;
    int *ds = dist[newK - 1] + pairOffset;
    int *s = colS[newK - 1];
    char ci = s[i];
    for (int j = 0; j < i; ++j) {
        char cj = s[j];
        *ds++ = (*dsn++) + (ci != cj);
    }
}

在最后一个函数中,你使用了很多函数,我不知道它们的源代码,因此无法知道它们是否正在寻找下面的可并行化示例,以下示例是错误的:

std::vector myVector;

void notParalelizable_1(int i){
miVector.push_back(i); 
}

void notParalelizable_2(int i){
static int A=0;
A=A+i;
}

int varGlobal=0;
void notParalelizable_3(int i){
varGlobal=varGlobal+i;
}

void oneFunctionParalelizable(int i)
{
int B=i;
}

int main()
{

#pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_1(i);//Error because myVector is modified simultaneously from multiple threads, The error here is that myVector not store the values in ascending order as this necessarily being accesing  by multiple threads, this more complex functions can generate erroneous results or even errors in run time.
 }



#pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_2(i);//Error because A is modified simultaneously from multiple threads
 }

 #pragma omp for
 for(int i=0;i<10;i++)
 {
 notParalelizable_3(i);//Error because varGlobal is modified simultaneously from multiple threads
 }   

 #pragma omp for
 for(int i=0;i<10;i++)
 {
 oneFunctionParalelizable(i);//no problem
 }

//The following code is correct
int *vector=new int[10];

#pragma omp for
 for(int i=0;i<10;i++)
 {
 vector[i]=i;//No problem because each thread writes to a different memory pocicion
 } 

//The following code is wrong
int k=2;
#pragma omp for
for(int i=0;i<10;i++)
{
k=k+i; //The result of the k variable at the end will be wrong as it is modified from different threads  
}

 return 0;   
}