指向结构中数组的 cudaFree 指针上的 CUDA 分段错误
CUDA Segmentation fault on cudaFree pointer to array in struct
我在 CUDA 设备上得到了一个结构,它包含一个指向数组的指针。计算、访问元素和一切正常,但是当我努力成为一个好孩子并打电话给
cudaFree(my_struct->pointer_to_array)
我遇到了分段错误。 cudaFree(my_struct)
但是效果很好。有什么我想念的吗?
请查找以下最小示例:
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cassert>
typedef struct {
int n;
float *arr;
} DummyStruct;
__global__ void check(DummyStruct *d) {
printf("EL %f", d->arr[0]);
}
int main() {
cudaError_t status;
// create host pointer to dummy struct
DummyStruct *dummy;
dummy = (DummyStruct *)malloc(sizeof(DummyStruct));
int arr_size = 32;
dummy->n = 0;
float *arr = (float *) malloc(sizeof(float) * arr_size);
for (int i=0; i < 32; i++) {
arr[i] = i;
}
// allocate device array
float *d_arr;
status = cudaMalloc(&d_arr, arr_size * sizeof(float));
assert( status == cudaSuccess );
status = cudaMemcpy(d_arr, arr, arr_size * sizeof(float), cudaMemcpyHostToDevice);
assert( status == cudaSuccess );
free(arr);
// for some reason this should happen here and not d_sp->coeff = d_coeff ...
dummy->arr = d_arr;
// allocate and ship struct to device
DummyStruct* d_dummy;
status = cudaMalloc(&d_dummy, sizeof(DummyStruct));
assert( status == cudaSuccess );
status = cudaMemcpy(d_dummy, dummy, sizeof(DummyStruct), cudaMemcpyHostToDevice);
assert( status == cudaSuccess );
// free host struct
free(dummy);
// check whether array access works
check<<<1, 1>>>(d_dummy);
// THIS causes Segmentation fault (core dumped)
status = cudaFree(d_dummy->arr);
assert( status == cudaSuccess );
status = cudaFree(d_dummy);
assert( status == cudaSuccess );
}
这条语句:
status = cudaFree(d_dummy->arr);
需要取消引用 device 指针(d_dummy
- 这是用设备分配器分配的,即 cudaMalloc
)在 host代码。这在 CUDA 中是非法的。
由于您已经知道 (d_dummy->arr) == d_arr
,释放嵌入指针的一种可能方法是:
status = cudaFree(d_arr);
类似的概念(取消引用主机代码中的设备指针)是此处注释的基础:
// for some reason this should happen here and not d_sp->coeff = d_coeff ...
我在 CUDA 设备上得到了一个结构,它包含一个指向数组的指针。计算、访问元素和一切正常,但是当我努力成为一个好孩子并打电话给
cudaFree(my_struct->pointer_to_array)
我遇到了分段错误。 cudaFree(my_struct)
但是效果很好。有什么我想念的吗?
请查找以下最小示例:
#include <stdio.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cassert>
typedef struct {
int n;
float *arr;
} DummyStruct;
__global__ void check(DummyStruct *d) {
printf("EL %f", d->arr[0]);
}
int main() {
cudaError_t status;
// create host pointer to dummy struct
DummyStruct *dummy;
dummy = (DummyStruct *)malloc(sizeof(DummyStruct));
int arr_size = 32;
dummy->n = 0;
float *arr = (float *) malloc(sizeof(float) * arr_size);
for (int i=0; i < 32; i++) {
arr[i] = i;
}
// allocate device array
float *d_arr;
status = cudaMalloc(&d_arr, arr_size * sizeof(float));
assert( status == cudaSuccess );
status = cudaMemcpy(d_arr, arr, arr_size * sizeof(float), cudaMemcpyHostToDevice);
assert( status == cudaSuccess );
free(arr);
// for some reason this should happen here and not d_sp->coeff = d_coeff ...
dummy->arr = d_arr;
// allocate and ship struct to device
DummyStruct* d_dummy;
status = cudaMalloc(&d_dummy, sizeof(DummyStruct));
assert( status == cudaSuccess );
status = cudaMemcpy(d_dummy, dummy, sizeof(DummyStruct), cudaMemcpyHostToDevice);
assert( status == cudaSuccess );
// free host struct
free(dummy);
// check whether array access works
check<<<1, 1>>>(d_dummy);
// THIS causes Segmentation fault (core dumped)
status = cudaFree(d_dummy->arr);
assert( status == cudaSuccess );
status = cudaFree(d_dummy);
assert( status == cudaSuccess );
}
这条语句:
status = cudaFree(d_dummy->arr);
需要取消引用 device 指针(d_dummy
- 这是用设备分配器分配的,即 cudaMalloc
)在 host代码。这在 CUDA 中是非法的。
由于您已经知道 (d_dummy->arr) == d_arr
,释放嵌入指针的一种可能方法是:
status = cudaFree(d_arr);
类似的概念(取消引用主机代码中的设备指针)是此处注释的基础:
// for some reason this should happen here and not d_sp->coeff = d_coeff ...