使用 CUDA 仅将 1/3 的 RGB 图像转换为灰度
Converting only 1/3 of RGB image to greyscale using CUDA
我正在尝试使用 CUDA 将 RGB 图像转换为灰度图像。我想用 stbi_load 读取图像,将其传递给我调用内核的 convertToGreyscale() 并将图像保存在 unsigned char* 中——从那里我可以使用它和应用可变阈值、索贝尔、多阈值等。问题是只有 1/3 的图像正在处理并且实际上受到内核(灰度内核)的影响。
这是我的内核:
__global__ void greyscale(unsigned char* originalImg, unsigned char* d_greyImg, int width, int height, int channels) {
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int id = x + y * width;
if (x < width && y < height) {
unsigned char r = originalImg[id];
unsigned char g = originalImg[id + 1];
unsigned char b = originalImg[id + 2];
int offset = (r+g+b) / channels;
for (int i = 0; i < channels; i++) {
d_greyImg[id + i] = offset;
}
}
}
这是代码的另一部分:
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
#include "cuda_runtime.h"
#include "cuda_runtime_api.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#define THREADS 8
void convertToGreyscale(unsigned char* originalImg, unsigned char* greyImg, int width, int height, int channels)
{
unsigned char* d_originalImg = NULL;
unsigned char* d_greyImg = NULL;
int size = width * height * channels * sizeof(unsigned char);
cudaMalloc(&d_originalImg, size);
cudaMalloc(&d_greyImg, size);
cudaMemcpy(d_originalImg, originalImg, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_greyImg, greyImg, size, cudaMemcpyHostToDevice);
dim3 dimBlock(THREADS, THREADS);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);
greyscale << <dimGrid, dimBlock >> > (d_originalImg, d_greyImg, width, height, channels);
cudaMemcpy(greyImg, d_greyImg, size, cudaMemcpyDeviceToHost);
cudaFree(d_originalImg);
}
void sobelFilter(unsigned char* originalImg, unsigned char* sobelImg, int width, int height, int channels)
{
dim3 dimBlock(THREADS, THREADS, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);
unsigned char* d_originalImg = NULL;
int size = width * height;
cudaMalloc(&d_originalImg, size * channels * sizeof(unsigned char));
cudaMemcpy(d_originalImg, originalImg, size * channels * sizeof(unsigned char), cudaMemcpyHostToDevice);
sobel << <dimGrid, dimBlock >> > (d_originalImg, width, height);
cudaMemcpy(sobelImg, d_originalImg, size * channels , cudaMemcpyDeviceToHost);
cudaFree(d_originalImg);
}
int main()
{
// read the image
int width, height, channels;
unsigned char* originalImg = stbi_load("lenna.png", &width, &height, &channels, 0);
size_t img_size = width * height * channels;
unsigned char* greyImg = (unsigned char*) malloc(img_size);
unsigned char* sobelImg = (unsigned char*) malloc(img_size);
convertToGreyscale(originalImg, greyImg, width, height, channels);
stbi_write_jpg("greyscale.png", width, height, channels, greyImg, 100);
/*sobelFilter(originalImg, sobelImg, width, height, channels);
stbi_write_jpg("sobel.png", width, height, channels, sobelImg, 100);*/
return 0;
}
您可能想要 dimGrid((width+dimBlock.x-1) / dimBlock.x, (height+dimBlock.y-1) / dimBlock.y);
以确保您不会错过角落。
我认为主要问题是在不考虑通道的情况下读取您的输入图像。
您的线程将具有值 x
,但在某些时候也具有值 x+1
(和相同的 y
)。但是,如果您了解如何定义 id
,您会注意到您不是 3 乘 3 地读取,而是 1 乘 1 地读取,因为这两个线程将具有 id
和 id+1
作为价值。您需要确保 id
的计算考虑了 channels
,但请注意,它不适用于 grryscale,您需要一个不同的(或除以 3)。
我正在尝试使用 CUDA 将 RGB 图像转换为灰度图像。我想用 stbi_load 读取图像,将其传递给我调用内核的 convertToGreyscale() 并将图像保存在 unsigned char* 中——从那里我可以使用它和应用可变阈值、索贝尔、多阈值等。问题是只有 1/3 的图像正在处理并且实际上受到内核(灰度内核)的影响。 这是我的内核:
__global__ void greyscale(unsigned char* originalImg, unsigned char* d_greyImg, int width, int height, int channels) {
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
unsigned int id = x + y * width;
if (x < width && y < height) {
unsigned char r = originalImg[id];
unsigned char g = originalImg[id + 1];
unsigned char b = originalImg[id + 2];
int offset = (r+g+b) / channels;
for (int i = 0; i < channels; i++) {
d_greyImg[id + i] = offset;
}
}
}
这是代码的另一部分:
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
#include "cuda_runtime.h"
#include "cuda_runtime_api.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#define THREADS 8
void convertToGreyscale(unsigned char* originalImg, unsigned char* greyImg, int width, int height, int channels)
{
unsigned char* d_originalImg = NULL;
unsigned char* d_greyImg = NULL;
int size = width * height * channels * sizeof(unsigned char);
cudaMalloc(&d_originalImg, size);
cudaMalloc(&d_greyImg, size);
cudaMemcpy(d_originalImg, originalImg, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_greyImg, greyImg, size, cudaMemcpyHostToDevice);
dim3 dimBlock(THREADS, THREADS);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);
greyscale << <dimGrid, dimBlock >> > (d_originalImg, d_greyImg, width, height, channels);
cudaMemcpy(greyImg, d_greyImg, size, cudaMemcpyDeviceToHost);
cudaFree(d_originalImg);
}
void sobelFilter(unsigned char* originalImg, unsigned char* sobelImg, int width, int height, int channels)
{
dim3 dimBlock(THREADS, THREADS, 1);
dim3 dimGrid(width / dimBlock.x, height / dimBlock.y);
unsigned char* d_originalImg = NULL;
int size = width * height;
cudaMalloc(&d_originalImg, size * channels * sizeof(unsigned char));
cudaMemcpy(d_originalImg, originalImg, size * channels * sizeof(unsigned char), cudaMemcpyHostToDevice);
sobel << <dimGrid, dimBlock >> > (d_originalImg, width, height);
cudaMemcpy(sobelImg, d_originalImg, size * channels , cudaMemcpyDeviceToHost);
cudaFree(d_originalImg);
}
int main()
{
// read the image
int width, height, channels;
unsigned char* originalImg = stbi_load("lenna.png", &width, &height, &channels, 0);
size_t img_size = width * height * channels;
unsigned char* greyImg = (unsigned char*) malloc(img_size);
unsigned char* sobelImg = (unsigned char*) malloc(img_size);
convertToGreyscale(originalImg, greyImg, width, height, channels);
stbi_write_jpg("greyscale.png", width, height, channels, greyImg, 100);
/*sobelFilter(originalImg, sobelImg, width, height, channels);
stbi_write_jpg("sobel.png", width, height, channels, sobelImg, 100);*/
return 0;
}
您可能想要 dimGrid((width+dimBlock.x-1) / dimBlock.x, (height+dimBlock.y-1) / dimBlock.y);
以确保您不会错过角落。
我认为主要问题是在不考虑通道的情况下读取您的输入图像。
您的线程将具有值 x
,但在某些时候也具有值 x+1
(和相同的 y
)。但是,如果您了解如何定义 id
,您会注意到您不是 3 乘 3 地读取,而是 1 乘 1 地读取,因为这两个线程将具有 id
和 id+1
作为价值。您需要确保 id
的计算考虑了 channels
,但请注意,它不适用于 grryscale,您需要一个不同的(或除以 3)。