无法在 CUDA 内核中打印设备变量值

Unable to print device variable value within kernel in CUDA

我想了解指针在 CUDA 中的功能。

下面给出了一个简单的程序,它为设备中分配的变量赋值。 (我在 NVIDIA Quadro K2000 显卡上使用 CUDA 工具包 8.0)

当我使用 printf() 在内核中打印值时,它显示了错误的值。

但是,当我从主机函数执行 cudaMemcopy 然后从主机函数打印时,它显示正确的值...

//CODE...
#include <stdio.h>
#include <stdlib.h>
#include<cuda.h>
#include<cuda_runtime.h>
#define N 3

__global__ void tempker(int *jk,int value)
{
    (*jk) = value*2;
    printf("displayed from inside the kernel :\nvalue of jk = %d\nvalue of *jk = %d\n",jk,*jk);

}
int tempfunc(int *kp)
{
    int *jk = NULL,*lm=NULL;
    lm = (int *)(malloc(sizeof(int)));
    *lm = 150;
    cudaError_t err = cudaSuccess;
    cudaMalloc((void**)&jk, sizeof(int));
    printf("jk pointer after cudaMalloc: displayed from host = %d\n",jk);
    tempker<<<1,1>>>(jk,150);
    err = cudaGetLastError();//brief Returns the last error from a runtime call
    cudaDeviceSynchronize();

    err = cudaMemcpy(lm, jk, (sizeof(int)), cudaMemcpyDeviceToHost);
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to copy jk from device to host (error code %s)!\n", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    printf("Displayed in host function after memcopy: value of *lm = *jk = %d\n",*lm);
    cudaFree(jk);
    err = cudaMalloc((void**)&kp, sizeof(int));
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to allocate device kp (error code %s)!\n", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    tempker<<<1,1>>>(kp,(N*N*N));
    err = cudaGetLastError();//brief Returns the last error from a runtime call
    cudaDeviceSynchronize();

    err = cudaMemcpy(lm, kp, (sizeof(int)), cudaMemcpyDeviceToHost);
    if (err != cudaSuccess)
    {
        fprintf(stderr, "Failed to copy kp from device to host (error code %s)!\n", cudaGetErrorString(err));
        exit(EXIT_FAILURE);
    }
    printf("Displayed in host function after memcopy: value of *lm = *kp = %d\n",*lm);
    cudaFree(kp);
    free(lm);
    return 100;
}

int main(){

    int *kp = NULL;
    printf("tempfunc(): return value = %d\n",tempfunc(kp));
    return 0;
}

输出:

jk pointer after cudaMalloc: displayed from host = 13238272
displayed from inside the kernel :
value of jk = 13238272
value of *jk = 9
Displayed in host function after memcopy: value of *lm = *jk = 300
displayed from inside the kernel :
value of jk = 13238272
value of *jk = 9
Displayed in host function after memcopy: value of *lm = *kp = 54
tempfunc(): return value = 100

问题是: 是否可以在内核中打印设备中分配的变量值?

使用 "%d" 显示整数值被认为是正确的。 使用 "%d" 显示地址可能会导致不可预测的行为,具体取决于您的编译器。

只需使用 "%p"。因为它需要一个空指针,所以明确地转换你的指针。

所以在你的内核中:

printf("value = %d, address = %p\n",*jk,(void *)jk);

如果您进一步查看 printf,可以获得更多信息。