仅测量部分数据在 OpenCL 中的传输时间

Measure transfer time in OpenCL only for a part of the data

我正在尝试测量将部分数据(其中一个缓冲区的内容)传输到 GPU 所花费的时间。

我试过用这个:

cl_command_queue queueGPU = clCreateCommandQueue(GPUcontext, GPUdeviceIds[0], CL_QUEUE_PROFILING_ENABLE, &error);

cl_event transfer1;
clEnqueueWriteBuffer(queueGPU, data, CL_TRUE, 0, dataSize, loadedData, 0, nullptr, &transfer1);
clWaitForEvents(1, &transfer1);

cl_event transfer2;
clEnqueueWriteBuffer (queueGPU, indices, CL_TRUE, 0, sizeof(int) * queryCount, inputData, 0, nullptr, &transfer2);
clWaitForEvents(1, &transfer2);
unsigned long start = 0;
unsigned long end = 0;
clGetEventProfilingInfo(transfer2, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &start, NULL);
clGetEventProfilingInfo(transfer2, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &end, NULL);
unsigned long transferTime = end - start;

但它returns与传输所有数据的时间完全相同。我做错了什么?

似乎时间是一样的,因为每个clEnqueueWriteBuffer 都需要它自己的事件,我用同一个事件测量了所有数据的时间。这让我有时间只使用其中一个缓冲区。

这应该可以正常工作:

// Transfer time for both buffers

cl_event transfer1;
clEnqueueWriteBuffer(queueGPU, data, CL_TRUE, 0, dataSize, loadedData, 0, nullptr, &transfer1);
clWaitForEvents(1, &transfer1);
unsigned long start = 0;
unsigned long end = 0;
clGetEventProfilingInfo(transfer1, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &start, NULL);
clGetEventProfilingInfo(transfer1, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &end, NULL);
unsigned long transferTime1 = end - start;

cl_event transfer2;
clEnqueueWriteBuffer(queueGPU, indices, CL_TRUE, 0, sizeof(int) * queryCount, inputData, 0, nullptr, &transfer2);
clWaitForEvents(1, &transfer2);
clGetEventProfilingInfo(transfer2, CL_PROFILING_COMMAND_START, sizeof (cl_ulong), &start, NULL);
clGetEventProfilingInfo(transfer2, CL_PROFILING_COMMAND_END, sizeof (cl_ulong), &end, NULL);
unsigned long transferTime = end - start + transferTime1;