【问题标题】:How to bind a float* array to a 1D texture in cuda?如何将 float* 数组绑定到 cuda 中的一维纹理?
【发布时间】:2016-09-02 00:37:46
【问题描述】:

我试图通过将纹理内存绑定到线性设备数组(不是 cudaArray)来了解如何使用纹理内存。我的代码很简单(如下)。我有一个包含 8 个数字的 float* 数组,我试图将其绑定到 1D 纹理,然后在我的内核函数中,我尝试读取纹理并将值放入输出数组。但是当我运行这个测试时,我的输出数组中的所有值都是零:

输入 = 0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000
输出 = 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000

我在这里错过了什么?

texture<float, 1, cudaReadModeElementType> texInput;

__global__ void copyKernel(float*output, int n) {
for (int i = 0; i < n; i++) {
    output[i] = tex1D(texInput, (float)i);
}
}

int main(int argc, char*argv[]) {

const int WIDTH = 8;

float* hInput = (float*)malloc(sizeof(float) * WIDTH);
float*hOutput = (float*)malloc(sizeof(float) * WIDTH);

for (int i = 0; i < WIDTH; i++) {
    hInput[i] = (float)i;
}

float* dInput = NULL, *dOutput = NULL;

size_t offset = 0;

texInput.addressMode[0] = cudaAddressModeBorder;
texInput.addressMode[1] = cudaAddressModeBorder;
texInput.filterMode = cudaFilterModePoint;
texInput.normalized = false;

checkCudaErrors(cudaMalloc((void**)&dInput, sizeof(float)*WIDTH));
checkCudaErrors(cudaMalloc((void**)&dOutput, sizeof(float)*WIDTH));

cudaMemcpy(dInput, hInput, sizeof(float)*WIDTH, cudaMemcpyHostToDevice);

cudaBindTexture(&offset, texInput, dInput, sizeof(float)*WIDTH);


copyKernel<<<1,1>>>(dOutput, WIDTH);

cudaMemcpy(hOutput, dOutput, sizeof(float)*WIDTH, cudaMemcpyDeviceToHost);
printf("\nInput = ");

for (int i = 0; i < WIDTH; i++) {
        printf("%f\t",hInput[i]);
    }
printf("\nOutput = ");
for (int i = 0; i < WIDTH; i++) {
    printf("%f\t",hOutput[i]);
}

return 0;
}

【问题讨论】:

    标签: cuda


    【解决方案1】:

    根据the documentationtex1D() 在底层分配是 CUDA 数组时使用。对于线性内存绑定纹理,正确的纹理函数是tex1Dfetch()

    对代码的修改(仅)使它对我有用:

    $ cat t1139.cu
    #include <stdio.h>
    #include <helper_cuda.h>
    
    texture<float, 1, cudaReadModeElementType> texInput;
    
    __global__ void copyKernel(float*output, int n) {
    for (int i = 0; i < n; i++) {
        output[i] = tex1Dfetch(texInput, i);
    }
    }
    
    int main(int argc, char*argv[]) {
    
    const int WIDTH = 8;
    
    float* hInput = (float*)malloc(sizeof(float) * WIDTH);
    float*hOutput = (float*)malloc(sizeof(float) * WIDTH);
    
    for (int i = 0; i < WIDTH; i++) {
        hInput[i] = (float)i;
    }
    
    float* dInput = NULL, *dOutput = NULL;
    
    size_t offset = 0;
    
    texInput.addressMode[0] = cudaAddressModeBorder;
    texInput.addressMode[1] = cudaAddressModeBorder;
    texInput.filterMode = cudaFilterModePoint;
    texInput.normalized = false;
    
    checkCudaErrors(cudaMalloc((void**)&dInput, sizeof(float)*WIDTH));
    checkCudaErrors(cudaMalloc((void**)&dOutput, sizeof(float)*WIDTH));
    
    cudaMemcpy(dInput, hInput, sizeof(float)*WIDTH, cudaMemcpyHostToDevice);
    
    cudaBindTexture(&offset, texInput, dInput, sizeof(float)*WIDTH);
    
    
    copyKernel<<<1,1>>>(dOutput, WIDTH);
    
    cudaMemcpy(hOutput, dOutput, sizeof(float)*WIDTH, cudaMemcpyDeviceToHost);
    printf("\nInput = ");
    
    for (int i = 0; i < WIDTH; i++) {
            printf("%f\t",hInput[i]);
        }
    printf("\nOutput = ");
    for (int i = 0; i < WIDTH; i++) {
        printf("%f\t",hOutput[i]);
    }
    
    return 0;
    }
    $ nvcc -I/usr/local/cuda/samples/common/inc t1139.cu -o t1139
    $ cuda-memcheck ./t1139
    ========= CUDA-MEMCHECK
    
    Input = 0.000000        1.000000        2.000000        3.000000        4.0000005.000000        6.000000        7.000000
    Output = 0.000000       1.000000        2.000000        3.000000        4.0000005.000000        6.000000        7.000000        ========= ERROR SUMMARY: 0 errors
    $
    

    【讨论】:

      猜你喜欢
      • 2016-07-14
      • 1970-01-01
      • 1970-01-01
      • 2015-12-23
      • 2012-02-18
      • 2011-02-08
      • 1970-01-01
      • 2020-04-29
      • 1970-01-01
      相关资源
      最近更新 更多