将主机函数作为 global 中的函数指针或 CUDA 中的 device 函数传递

Question

我目前正在开发 CPU 函数的 GPU 版本（例如，函数 Calc(int a, int b, double* c, souble* d, CalcInvFunction GetInv )），其中宿主函数作为函数指针传递（例如，在上面的示例中，GetInv 是 CalcInvFunction 类型的宿主函数）。我的问题是，如果我必须将 Calc() 函数完全放在 GPU 中，我必须将 GetInv 函数作为函数指针参数传递到设备 function/kernel 函数中，这可能吗？

Answer 1

是的，对于 Calc 的 GPU 实现，您应该将 GetInv 作为 __device__ 函数指针传递。

这是可能的，这里有一些有效的例子：

Ex. 1

Ex. 3

上面的大部分示例都演示了将设备函数指针一路带回主机代码。对于您的特定情况，这可能不是必需的。但是从上面可以看出如何获取 __device__ 函数指针（在设备代码中）并在内核中使用它。

Answer 2

最后，我已经能够在 cuda 内核函数（__global__ 函数）中将主机函数作为函数指针传递。感谢 Robert Crovella 和 njuffa 的回答。我已经能够将 class 成员函数（cpu 函数）作为函数指针传递给 cuda 内核。但是，主要问题是，我只能传递静态 class 成员函数。我无法传递未声明为静态的函数。例如：

/**/ __host__ __device__ static int CellfunPtr( void*ptr, int a ); /**/

上面的函数可以工作是因为这个成员函数被声明为静态成员函数。如果我不将此成员函数声明为静态成员， /**/ __host__ __device__ int CellfunPtr( void*ptr, int a ); /**/

那就不行了

完整代码有四个文件

第一个文件

/*start of fundef.h file*/

typedef int (*pFunc_t)(void* ptr, int N);

/*end of fundef.h file*/

第二个文件

/*start of solver.h file*/

    class CalcVars {

       int eqnCount;
       int numCell;                      
       int numTri;
       int numTet;

    public:
       double* cellVel; 
       double* cellPre;

    /** Constructor */

    CalcVars(
        const int eqnCount_,             
        const int numCell_,          
        const int numTri_,             
        const int numTet_                
    );

    /** Destructor */

    ~CalcVars(void);

    public:

      void 
          CalcAdv();


      __host__ __device__ 
      static int 
          CellfunPtr(
          void*ptr, int a
    );

    };

/*end of solver.h file*/

第三个文件

/*start of solver.cu file*/

     #include "solver.h"
     __device__ pFunc_t pF1_d = CalcVars::CellfunPtr;

    pFunc_t pF1_h ;


    __global__ void kernel(int*a, pFunc_t func, void* thisPtr_){
        int tid = threadIdx.x;
        a[tid] = (*func)(thisPtr_, a[tid]); 
    };

    /* Constructor */

    CalcVars::CalcVars(
        const int eqnCount_,             
        const int numCell_,          
        const int numTri_,             
        const int numTet_   

    )
    {
        this->eqnCount = eqnCount_;
        this->numCell = numCell_;
        this->numTri = numTri_;

        this->cellVel = (double*) calloc((size_t) eqnCount, sizeof(double)); 
        this->cellPre = (double*) calloc((size_t) eqnCount, sizeof(double)); 

    }

    /* Destructor */

    CalcVars::~CalcVars(void)
    {
       free(this->cellVel);
       free(this->cellPre);

    }


    void 
    CalcVars::CalcAdv(
    ){

        /*int b1 = 0;

        b1 = CellfunPtr(this, 1);*/

       int Num = 50;
       int *a1, *a1_dev;

        a1 = (int *)malloc(Num*sizeof(int));

        cudaMalloc((void**)&a1_dev, Num*sizeof(int));

        for(int i = 0; i <Num; i++){
            a1[i] = i;
        }

        cudaMemcpy(a1_dev, a1, Num*sizeof(int), cudaMemcpyHostToDevice);

        //copy addresses of device functions to host 
        cudaMemcpyFromSymbol(&pF1_h, pF1_d, sizeof(pFunc_t));


        kernel<<<1,42>>>(a1_dev, pF1_h, this);

        cudaDeviceSynchronize();

        cudaMemcpy(a1, a1_dev, Num*sizeof(int), cudaMemcpyDeviceToHost);


    };


    int 
    CalcVars::CellfunPtr(
        void* ptr, int a
    ){
        //CalcVars* ClsPtr = (CalcVars*)ptr;
        printf("Printing from CPU function\n");
        //int eqn_size = ClsPtr->eqnCount;
        //printf("The number is %d",eqn_size);
        return a-1;

    };

/*end of solver.cu file*/

第四个文件

/*start of main.cpp file*/

    #include "solver.h"


    int main(){

        int n_Eqn, n_cell, n_tri, n_tetra;
        n_Eqn = 100;
        n_cell = 200;
        n_tri = 300;
        n_tetra = 400;

       CalcVars* calcvars;

       calcvars = new CalcVars(n_Eqn, n_cell, n_tri, n_tetra );

       calcvars->CalcAdv();

       system("pause");

    }

/*end of main.cpp file*/

将主机函数作为 global 中的函数指针或 CUDA 中的 device 函数传递

Passing Host Function as a function pointer in global OR device function in CUDA

cuda

gpu

gpgpu

将主机函数作为 __global__ 中的函数指针或 CUDA 中的 __device__ 函数传递

Passing Host Function as a function pointer in __global__ OR __device__ function in CUDA

cuda

gpu

gpgpu

将主机函数作为 global 中的函数指针或 CUDA 中的 device 函数传递

Passing Host Function as a function pointer in global OR device function in CUDA