在c中优化和向量化随机函数

Optimize and vectorize random function in c

我编写了一个函数来生成伪随机数,但我的编译器没有对此进行矢量化 not vectorized: complicated access pattern。 我怎样才能使编译器对其进行矢量化? 我使用命令 gcc -O3 -ffast-math -funroll-all-loops -ftree-vectorize -fopt-info-vec-missed -lm -g -o .O3

    unsigned int seed;
    unsigned int temp;
    #define val13 13
    unsigned int var1 = 214013;
    unsigned int var2 = 2531011;
    inline int myRandom() {
      seed = (var1*seed + var2);
      return (seed>>val13);
    }

如果我将代码更改为此我设法进行矢量化,但我没有得到预期的结果。

inline int myRandom() {
          return ((var1*seed + var2)>>val13);
        }

我怎样才能让编译器向量化这个函数并得到预期的结果?

种子上的生成器函数f是一个线性同余生成器,很容易与自身组合实现f2, f3, f4,等等。然后可以实现矢量化生成器,它从一个种子准备初始矢量块,然后使用 fB 来计算 B 车道的结果,每个车道彼此独立。这是一个概念证明。各种修饰都是可能的,例如保留种子向量而不是单个种子并修改 FillRandomVector 以处理任意 N.

#include <stdio.h>
#include <stdlib.h>


#define Block   4   //  Number of lanes (elements) in a vector block.


static const unsigned int var1 = 214013;
static const unsigned int var2 = 2531011;


//  Original generator function, modified to take a pointer to the seed.
static inline int MyRandom(unsigned int *seed)
{
    *seed = var1 * *seed + var2;
    return *seed >> 13;
}


//  New parameters for a vectorized generator.
static unsigned int var1Vector;
static unsigned int var2Vector;


/*  Initialize parameters for vectorized generator by computing them from the
    scalar parameters.
*/
static void Initialize(void)
{
    var1Vector = 1;
    var2Vector = 0;
    for (int i = 0; i < Block; ++i)
    {
        var1Vector *= var1;
        var2Vector = var2Vector * var1 + var2;
    }
}


//  Fill array with generated numbers using scalar method.
static void FillRandomScalar(unsigned int *seed, size_t N, int *Destination)
{
    for (size_t n = 0; n < N; ++n)
        Destination[n] = MyRandom(seed);
}


/*  Fill array with generated numbers using vectorizable method.

    For proof of concept only, so handles only certain N:

        N must be a positive multiple of Block.
*/
static void FillRandomVector(unsigned int *seed, size_t N, int *Destination)
{
    //  Prepare a vector of seeds and generate the first block of results.
    unsigned seedVector[Block];
    unsigned int S = *seed;
    for (size_t n = 0; n < Block; ++n)
    {
        S = S * var1 + var2;
        seedVector[n] = S;
        Destination[n] = S >> 13;
    }

    //  Generate the remaining results using independent lanes.
    for (size_t n = Block; n < N; n += Block)
        for (size_t lane = 0; lane < Block; ++lane)
        {
            seedVector[lane] = seedVector[lane] * var1Vector + var2Vector;
            Destination[n + lane] = seedVector[lane] >> 13;
        }

    *seed = seedVector[Block-1];
}


#define N   100


int main(void)
{
    Initialize();

    int expected0[N], observed0[N];
    int expected1[N], observed1[N];

    unsigned int seed;

    seed = 17;
    FillRandomScalar(&seed, N, expected0);
    FillRandomScalar(&seed, N, expected1);

    seed = 17;
    FillRandomVector(&seed, N, observed0);
    FillRandomVector(&seed, N, observed1);

    for (size_t n = 0; n < N; ++n)
    {
        if (observed0[n] != expected0[n])
        {
            printf("observed0[%zu] = %d, but expected0 %d.\n",
                n, observed0[n], expected0[n]);
            exit(EXIT_FAILURE);
        }
    }
    for (size_t n = 0; n < N; ++n)
    {
        if (observed1[n] != expected1[n])
        {
            printf("observed1[%zu] = %d, but expected1 %d.\n",
                n, observed1[n], expected1[n]);
            exit(EXIT_FAILURE);
        }
    }
}