CUDA NVCC 编译器错误 [重复]答案

【问题标题】：CUDA NVCC compiler errors [duplicate]CUDA NVCC 编译器错误 [重复]
【发布时间】：2014-02-07 17:29:51
【问题描述】：

为什么当我尝试编译 cuda 程序时会抛出这些错误：clang: error: unsupported option '-dumpspecs' 和 clang: error: no input files？

我在终端上输入：nvcc -o hello matrix_product.cu 就像互联网教程向我展示的那样，但似乎不起作用...我是否必须添加一些变量或类似的东西？

而file.cu的代码是：

using namespace std;

void CheckCudaError(string &e);


__global__ void productMatrix(int *matrix_a, int *matrix_b, int *matrix_c)
{

    int blockidx = blockIdx.x;
    int blockidy = blockIdx.y;

    int threadx = threadIdx.x;
    int thready = threadIdx.y;

    __shared__ int Asub[BLOCK_SIZE][BLOCK_SIZE];
    __shared__ int Bsub[BLOCK_SIZE][BLOCK_SIZE];

    Asub[threadx][thready] = matrix_a[blockidx * BLOCK_SIZE + threadx + blockidy * BLOCK_SIZE + thready];
    Bsub[threadx][thready] = matrix_b[blockidx * BLOCK_SIZE + threadx + blockidy * BLOCK_SIZE + thready];

    __syncthreads();

    int suma;

    for (int i = 0; i < BLOCK_SIZE; ++i)
    {
        suma += Asub[e][thready]* Bsub[threadx][e];
    }

    __syncthreads();

    matrix_c[blockidx * BLOCK_SIZE + threadx + blockidy * BLOCK_SIZE + thready] = suma;


}


int main(){

    //Creamos punteros para apuntar tanto al dispositivo como a memoria.
    int *h_a, *h_b;
    int *d_a, *d_b, *d_c;

    int NumBlocks = 100 * 100 / BLOCK_SIZE;
    int num_elements = NumBlocks * BLOCK_SIZE;


    //Apuntamos los punteros hacia un espacio de 100*100 elementos en el host
    h_a = malloc(num_elements * sizeof(int));
    h_b = malloc(num_elements * sizeof(int));
    CheckCudaError("malloc_host_error");


    //LLenamos la memoria
    for (int i = 0; i < num_elements; ++i)
    {
        h_a[i] = i;
        h_b[i] = num_elements - 1 - i;
    }


    //Apuntamos los punteros del dispositivo hacia una reserva de memoria de 100*100 elementos.
    cudaMalloc(&d_a, num_elements * sizeof(int));
    cudaMalloc(&d_b, num_elements * sizeof(int));
    cudaMalloc(&d_c, num_elements * sizeof(int));
    CheckCudaError("malloc_device_error");


    /*Copiamos los elementos del host ya llenados anteriormente (llenamos memoria,
        copiando las matrizes del host hacia la tarjeta gráfica (device).*/
    cudaMemcpy(d_a, h_a, num_elements * sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_b, h_b, num_elements * sizeof(int), cudaMemcpyHostToDevice);
    CheckCudaError("memcpy_error");


    free(h_b); 
    CheckCudaError("Free_host_error");


    //Establecemos el num de threas y blocks que utilizaremos
    dim3 gridDim (NumBlocks, NumBlocks);
    dim3 blockDim (BLOCK_SIZE, BLOCK_SIZE);
    //LLamamos la función.
    productMatrix <<< gridDim, blockDim >>> (d_a, d_b, d_c);
    CheckCudaError("Calling_device_function_error");


    /*Esperamos a que todos los threads hayan hecho su trabajo (multiplicar las matrizes)
        antes de copy back.*/
    cudaThreadSyncronize();
    CheckCudaError("Syncronize_threads_error");


    //Una vez sincronizados los volvemos a copiar hacia el host.
    cudaMemcpy(h_a, d_c, num_elements * sizeof(int), cudaMemcpyDeviceToHost);
    CheckCudaError("mempcy_host_error");


    //Imprimimos por pantalla
    for (int i = 0; i < num_elements; ++i) cout << h_a[i];


    //Aliberamos memoria en el device
    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);

    //Aliveramos meomria en host.
    free(h_a);

    CheckCudaError("free_device_error");

}


void CheckCudaError(string &e)
{   
    //Obtenemos el ultimo error.
    cudaError_t err = cudaGetLastError();
    //Si hay error imprime el error por pantalla
    if(cudaSuccess != err){
        cout << e << endl;
    }
}

【问题讨论】：

似乎 nvcc 向系统编译器 (gcc) 询问有关系统 (-dumpspecs) 的信息，但是您有一些叫做 clang 的东西假装是 gcc 并且失败了。我确定 nvidia 有给你的说明，但你可以尝试在 PATH 前面添加一个包含真正 gcc 的目录。
stackoverflow.com/q/19649541/1918193 的副本已经是...的副本

标签： c++ c cuda nvidia nvcc

【解决方案1】：

请尝试明确地将 NVCC 指向 clang 编译器。

NVCC := nvcc -ccbin /usr/bin/clang

或许

NVCC := nvcc -ccbin /usr/local/cuda/bin/clang

另外不要忘记添加所有必要的包括：

nvcc -I/usr/local/cuda-5.0/include -I. -I.. -I../../common/inc -o MonteCarlo_kernel.o -c MonteCarlo_kernel.cu

【讨论】：

这与问题完全无关。如何指定包含文件可能会更改错误clang: error: unsupported option '-dumpspecs'？（注意编译器是clang，不是gcc，你用的是clang吗？）
好的，现在您已经编辑了问题的确切原因作为解决方案，这与您的第一个答案同样错误。这里的重点是 nvcc 将尝试确定主机编译器是 gcc 还是 clang，但要做到这一点，它必须明确指向 clang，而不是指向 clang 的 gcc 符号链接，这正是问题的根本原因以及你的答案会做什么。
@talonmies 已更正，请查看