使用 CUDA 仅将 1/3 的 RGB 图像转换为灰度

Converting only 1/3 of RGB image to greyscale using CUDA

我正在尝试使用 CUDA 将 RGB 图像转换为灰度图像。我想用 stbi_load 读取图像,将其传递给我调用内核的 convertToGreyscale() 并将图像保存在 unsigned char* 中——从那里我可以使用它和应用可变阈值、索贝尔、多阈值等。问题是只有 1/3 的图像正在处理并且实际上受到内核(灰度内核)的影响。 这是我的内核:

__global__ void greyscale(unsigned char* originalImg, unsigned char* d_greyImg, int width, int height, int channels) {

    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    unsigned int id = x + y * width;
    if (x < width && y < height) {
        unsigned char r = originalImg[id];
        unsigned char g = originalImg[id + 1];
        unsigned char b = originalImg[id + 2];
        int offset = (r+g+b) / channels;
        for (int i = 0; i < channels; i++) {
            d_greyImg[id + i] = offset;
        }
    }
}

这是代码的另一部分:

#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"

#include "cuda_runtime.h"
#include "cuda_runtime_api.h"

#include "device_launch_parameters.h"

#include <stdio.h>
#define THREADS 8

void convertToGreyscale(unsigned char* originalImg, unsigned char* greyImg, int width, int height, int channels)
{
    unsigned char* d_originalImg = NULL;
    unsigned char* d_greyImg = NULL;
    int size = width * height * channels * sizeof(unsigned char);

    cudaMalloc(&d_originalImg, size);
    cudaMalloc(&d_greyImg, size);

    cudaMemcpy(d_originalImg, originalImg, size, cudaMemcpyHostToDevice);
    cudaMemcpy(d_greyImg, greyImg, size, cudaMemcpyHostToDevice);

    dim3 dimBlock(THREADS, THREADS);
    dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);
    greyscale << <dimGrid, dimBlock >> > (d_originalImg, d_greyImg, width, height, channels);

    cudaMemcpy(greyImg, d_greyImg, size, cudaMemcpyDeviceToHost);

    cudaFree(d_originalImg);
}

void sobelFilter(unsigned char* originalImg, unsigned char* sobelImg, int width, int height, int channels)
{
    dim3 dimBlock(THREADS, THREADS, 1);
    dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);
    unsigned char* d_originalImg = NULL;
    int size = width * height;

    cudaMalloc(&d_originalImg, size * channels * sizeof(unsigned char));

    cudaMemcpy(d_originalImg, originalImg, size * channels * sizeof(unsigned char), cudaMemcpyHostToDevice);

    sobel << <dimGrid, dimBlock >> > (d_originalImg, width, height);

    cudaMemcpy(sobelImg, d_originalImg, size * channels , cudaMemcpyDeviceToHost);

    cudaFree(d_originalImg);
}

int main()
{
    // read the image
    int width, height, channels;
    unsigned char* originalImg = stbi_load("lenna.png", &width, &height, &channels, 0);

    size_t img_size = width * height * channels;
    unsigned char* greyImg = (unsigned char*) malloc(img_size);
    unsigned char* sobelImg = (unsigned char*) malloc(img_size);

    convertToGreyscale(originalImg, greyImg, width, height, channels);
    stbi_write_jpg("greyscale.png", width, height, channels, greyImg, 100);

    /*sobelFilter(originalImg, sobelImg, width, height, channels);
    stbi_write_jpg("sobel.png", width, height, channels, sobelImg, 100);*/

    return 0;

}

您可能想要 dimGrid((width+dimBlock.x-1) / dimBlock.x, (height+dimBlock.y-1) / dimBlock.y); 以确保您不会错过角落。

我认为主要问题是在不考虑通道的情况下读取您的输入图像。 您的线程将具有值 x,但在某些时候也具有值 x+1(和相同的 y)。但是,如果您了解如何定义 id,您会注意到您不是 3 乘 3 地读取,而是 1 乘 1 地读取,因为这两个线程将具有 idid+1作为价值。您需要确保 id 的计算考虑了 channels,但请注意,它不适用于 grryscale,您需要一个不同的(或除以 3)。