从模板中提取原始数据以用于 CUDA

Question

以下代码是 PCL（点云）库中的一个片段。它计算图像的积分和。

template <class DataType, unsigned Dimension> class IntegralImage2D
{
    public:
        static const unsigned dim_fst = Dimension;
        typedef cv::Vec<typename TypeTraits<DataType>::IntegralType, dim_fst> FirstType;
        std::vector<FirstType>  img_fst;

        //.... lots of methods missing here that actually calculate the integral sum

        /** \brief Compute the first order sum within a given rectangle
          * \param[in] start_x x position of rectangle
          * \param[in] start_y y position of rectangle
          * \param[in] width width of rectangle
          * \param[in] height height of rectangle
          */
        inline FirstType getFirstOrderSum(unsigned start_x, unsigned start_y, unsigned width, unsigned height) const
        {
            const unsigned upper_left_idx  = start_y * (wdt + 1) + start_x;
            const unsigned upper_right_idx = upper_left_idx + width;
            const unsigned lower_left_idx  =(start_y + height) * (wdt + 1) + start_x;
            const unsigned lower_right_idx = lower_left_idx + width;

            return(img_fst[lower_right_idx] + img_fst[upper_left_idx] - img_fst[upper_right_idx] - img_fst[lower_left_idx]);
        }

目前使用以下代码获取结果：

IntegralImage2D<float,3> iim_xyz;

IntegralImage2D<float, 3>::FirstType  fo_elements;
IntegralImage2D<float, 3>::SecondType so_elements;
fo_elements = iim_xyz.getFirstOrderSum(pos_x - rec_wdt_2, pos_y - rec_hgt_2, rec_wdt, rec_hgt);
so_elements = iim_xyz.getSecondOrderSum(pos_x - rec_wdt_2, pos_y - rec_hgt_2, rec_wdt, rec_hgt);

但是我正在尝试并行化代码（将 getFirstOrderSum 编写为 CUDA 设备函数）。由于 CUDA 无法识别这些 FirstType 和 SecondType 对象（或与此相关的任何 opencv 对象），我正在努力（我是 C++ 的新手）从模板中提取原始数据。

如果可能的话，我想将 img_fst 对象转换为我可以在 cuda 内核上分配的某种向量或数组。

似乎 img_fst 的类型是 std::vector<cv::Matx<double,3,1>

Answer 1

事实证明，您可以像使用法向量一样传递原始数据。

void computation(ps::IntegralImage2D<float, 3> iim_xyz){
    cv::Vec<double, 3>* d_img_fst = 0;
    cudaErrorCheck(cudaMalloc((void**)&d_img_fst, sizeof(cv::Vec<double, 3>)*(iim_xyz.img_fst.size())));
    cudaErrorCheck(cudaMemcpy(d_img_fst, &iim_xyz.img_fst[0], sizeof(cv::Vec<double, 3>)*(iim_xyz.img_fst.size()), cudaMemcpyHostToDevice));
//..
}

__device__ double* getFirstOrderSum(unsigned start_x, unsigned start_y, unsigned width, unsigned height, int wdt, cv::Vec<double, 3>* img_fst)
{
    const unsigned upper_left_idx = start_y * (wdt + 1) + start_x;
    const unsigned upper_right_idx = upper_left_idx + width;
    const unsigned lower_left_idx = (start_y + height) * (wdt + 1) + start_x;
    const unsigned lower_right_idx = lower_left_idx + width;

    double* result = new double[3];
    result[0] = img_fst[lower_right_idx].val[0] + img_fst[upper_left_idx].val[0] - img_fst[upper_right_idx].val[0] - img_fst[lower_left_idx].val[0];
    result[1] = img_fst[lower_right_idx].val[1] + img_fst[upper_left_idx].val[1] - img_fst[upper_right_idx].val[1] - img_fst[lower_left_idx].val[1];
    result[2] = img_fst[lower_right_idx].val[2] + img_fst[upper_left_idx].val[2] - img_fst[upper_right_idx].val[2] - img_fst[lower_left_idx].val[2];

    return result; //i have to delete this pointer otherwise I will create memory leak
}

从模板中提取原始数据以用于 CUDA

Extracting raw data from template for use in CUDA

c++

templates

opencv

cuda