【发布时间】:2021-05-01 22:12:53
【问题描述】:
我正在尝试使用 stbi_image 作为库来使用 cuda 模糊图像以加载和保存图像。编译代码时没有错误,但是当我尝试查看结果时,它只是一个空白图像。那是代码。
#include "lodepng.h"
#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image.h"
#include "stb_image_write.h"
#include <iostream>
#define BLUR_SIZE 7
#define R 0
#define G 1
#define B 2
__global__ void blurKernel(unsigned char* in, unsigned char* out, int width, int height, int num_channel, int channel) {
int col = blockIdx.x * blockDim.x + threadIdx.x;
int row = blockIdx.y * blockDim.y + threadIdx.y;
if(col < width && row < height) {
int pixVal = 0;
int pixels = 0;
for(int blurRow = -BLUR_SIZE; blurRow < BLUR_SIZE + 1; ++blurRow) {
for(int blurCol = -BLUR_SIZE; blurCol < BLUR_SIZE + 1; ++blurCol) {
int curRow = row + blurRow;
int curCol = col + blurCol;
if(curRow > -1 && curRow < height && curCol > -1 && curCol < width) {
pixVal += in[curRow * width * num_channel + curCol * num_channel + channel];
pixels++;
}
}
}
out[row * width * num_channel + col * num_channel + channel] = (unsigned char)(pixVal/pixels);
}
}
int main() {
int width, height,n;
unsigned char *image = stbi_load("image4.png",&width,&height,&n,0);
unsigned char *output = (unsigned char*)malloc(width * height * n *sizeof(unsigned char));
unsigned char* Dev_Input_Image = NULL;
unsigned char* Dev_Output_Image = NULL;
cudaMalloc((void**)&Dev_Input_Image, sizeof(unsigned char)* height * width * n);
cudaMalloc((void**)&Dev_Output_Image, sizeof(unsigned char)* height * width * n);
cudaMemcpy(Dev_Input_Image, image, sizeof(unsigned char) * height * width * n, cudaMemcpyHostToDevice);
//kernel call
dim3 blockSize(16, 16, 1);
dim3 gridSize(width/blockSize.x, height/blockSize.y,1);
blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,R);
blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,G);
blurKernel <<<gridSize, blockSize>>>(Dev_Input_Image, Dev_Output_Image, width, height,n,B);
cudaDeviceSynchronize();
cudaMemcpy(image, Dev_Output_Image, sizeof(unsigned char) * height * width * n, cudaMemcpyDeviceToHost);
stbi_write_png("output_stbimage.png", width, height, n, image, width * n);
cudaFree(Dev_Input_Image);
cudaFree(Dev_Output_Image);
return 0;
}
我尝试了所有可能的方法,但我无法找到我卡在的地方。我尝试以串行方式执行此操作,它完美地使用相同的逻辑(我的意思是处理每个通道上的模糊)。希望有人可以帮助我
【问题讨论】:
-
你的内核有运行时错误吗?
-
如果您的原始图像有 4 个通道,您可能需要复制您的 Alpha 通道。
-
我没有运行时错误,我还尝试使用 alpha 通道将其设置为最大值,以便为图像提供非透明背景
-
看起来不错,如果出现小错误,它至少应该产生一些东西。尝试将刚刚加载的图像保存到新文件并检查它。然后只在 CUDA 中复制输入输出...