【问题标题】:How to remove CL_INVALID_PLATFORM error in opencl code?如何删除 opencl 代码中的 CL_INVALID_PLATFORM 错误?
【发布时间】:2013-10-02 15:42:26
【问题描述】:

使用 OpenCL 进行简单的矩阵乘法:

// Multiply two matrices A * B = C

#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <oclUtils.h>

#define WA 3
#define HA 3
#define WB 3
#define HB 3
#define WC 3
#define HC 3

// Allocates a matrix with random float entries.
void randomInit(float* data, int size)
{
   for (int i = 0; i < size; ++i)
   data[i] = rand() / (float)RAND_MAX;
}

/////////////////////////////////////////////////////////
// Program main
/////////////////////////////////////////////////////////

int
main(int argc, char** argv)
{

   // set seed for rand()
   srand(2006);

   // 1. allocate host memory for matrices A and B
   unsigned int size_A = WA * HA;
   unsigned int mem_size_A = sizeof(float) * size_A;
   float* h_A = (float*) malloc(mem_size_A);

   unsigned int size_B = WB * HB;
   unsigned int mem_size_B = sizeof(float) * size_B;
   float* h_B = (float*) malloc(mem_size_B);

   // 2. initialize host memory
   randomInit(h_A, size_A);
   randomInit(h_B, size_B);

   // 3. print out A and B
   printf("\n\nMatrix A\n");
   for(int i = 0; i < size_A; i++)
   {
      printf("%f ", h_A[i]);
      if(((i + 1) % WA) == 0)
      printf("\n");
   }

   printf("\n\nMatrix B\n");
   for(int i = 0; i < size_B; i++)
   {
      printf("%f ", h_B[i]);
      if(((i + 1) % WB) == 0)
      printf("\n");
   }

   // 4. allocate host memory for the result C
   unsigned int size_C = WC * HC;
   unsigned int mem_size_C = sizeof(float) * size_C;
   float* h_C = (float*) malloc(mem_size_C);

   // 5. Initialize OpenCL
   // OpenCL specific variables
   cl_context clGPUContext;
   cl_command_queue clCommandQue;
   cl_program clProgram;
   cl_kernel clKernel;

   size_t dataBytes;
   size_t kernelLength;
   cl_int errcode;

   // OpenCL device memory for matrices
   cl_mem d_A;
   cl_mem d_B;
   cl_mem d_C;

   /*****************************************/
   /* Initialize OpenCL */
   /*****************************************/

   clGPUContext = clCreateContextFromType(0, 
                   CL_DEVICE_TYPE_GPU, 
                   NULL, NULL, &errcode);
   shrCheckError(errcode, CL_SUCCESS);

   // get the list of GPU devices associated 
   // with context
   errcode = clGetContextInfo(clGPUContext, 
              CL_CONTEXT_DEVICES, 0, NULL, 
              &dataBytes);
   cl_device_id *clDevices = (cl_device_id *)
              malloc(dataBytes);
   errcode |= clGetContextInfo(clGPUContext, 
              CL_CONTEXT_DEVICES, dataBytes, 
              clDevices, NULL);
   //shrCheckError(errcode, CL_SUCCESS);

   //Create a command-queue
   clCommandQue = clCreateCommandQueue(clGPUContext, 
                  clDevices[0], 0, &errcode);
   //shrCheckError(errcode, CL_SUCCESS);

   // Setup device memory
   d_C = clCreateBuffer(clGPUContext, 
          CL_MEM_READ_WRITE, 
          mem_size_A, NULL, &errcode);
   d_A = clCreateBuffer(clGPUContext, 
          CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
          mem_size_A, h_A, &errcode);
   d_B = clCreateBuffer(clGPUContext, 
          CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
          mem_size_B, h_B, &errcode);


   // 6. Load and build OpenCL kernel
   char *clMatrixMul = oclLoadProgSource("kernel.cl",
                        "// My comment\n", 
                        &kernelLength);
   //shrCheckError(clMatrixMul != NULL, shrTRUE);

   clProgram = clCreateProgramWithSource(clGPUContext, 
                1, (const char **)&clMatrixMul, 
                &kernelLength, &errcode);
   //shrCheckError(errcode, CL_SUCCESS);

   errcode = clBuildProgram(clProgram, 0, 
              NULL, NULL, NULL, NULL);
   //shrCheckError(errcode, CL_SUCCESS);

   clKernel = clCreateKernel(clProgram, 
               "matrixMul", &errcode);
   //shrCheckError(errcode, CL_SUCCESS);


   // 7. Launch OpenCL kernel
   size_t localWorkSize[2], globalWorkSize[2];

   int wA = WA;
   int wC = WC;
   errcode = clSetKernelArg(clKernel, 0, 
              sizeof(cl_mem), (void *)&d_C);
   errcode |= clSetKernelArg(clKernel, 1, 
              sizeof(cl_mem), (void *)&d_A);
   errcode |= clSetKernelArg(clKernel, 2, 
              sizeof(cl_mem), (void *)&d_B);
   errcode |= clSetKernelArg(clKernel, 3, 
              sizeof(int), (void *)&wA);
   errcode |= clSetKernelArg(clKernel, 4, 
              sizeof(int), (void *)&wC);
   //shrCheckError(errcode, CL_SUCCESS);

   localWorkSize[0] = 3;
   localWorkSize[1] = 3;
   globalWorkSize[0] = 3;
   globalWorkSize[1] = 3;

   errcode = clEnqueueNDRangeKernel(clCommandQue, 
              clKernel, 2, NULL, globalWorkSize, 
              localWorkSize, 0, NULL, NULL);
   //shrCheckError(errcode, CL_SUCCESS);

   // 8. Retrieve result from device
   errcode = clEnqueueReadBuffer(clCommandQue, 
              d_C, CL_TRUE, 0, mem_size_C, 
              h_C, 0, NULL, NULL);
   //shrCheckError(errcode, CL_SUCCESS);

   // 9. print out the results
   printf("\n\nMatrix C (Results)\n");
   for(int i = 0; i < size_C; i++)
   {
      printf("%f ", h_C[i]);
      if(((i + 1) % WC) == 0)
      printf("\n");
   }
   printf("\n");

   // 10. clean up memory
   free(h_A);
   free(h_B);
   free(h_C);

   clReleaseMemObject(d_A);
   clReleaseMemObject(d_C);
   clReleaseMemObject(d_B);

   free(clDevices);
   free(clMatrixMul);
   clReleaseContext(clGPUContext);
   clReleaseKernel(clKernel);
   clReleaseProgram(clProgram);
   clReleaseCommandQueue(clCommandQue);

}

在上面的代码中,我不断收到错误:

/************ **********//初始化OpenCL / /**********************/ clGPUContext = clCreateContextFromType(0, CL_DEVICE_TYPE_GPU, NULL, NULL, &errcode); shrCheckError(errcode, CL_SUCCESS);

返回的错误码是-32,表示:CL_INVALID_PLATFORM"

如何消除此错误?

操作系统:Windows 7、32 位、NVIDIA GPU GeForce 610

【问题讨论】:

    标签: opencl nvidia


    【解决方案1】:

    Nvidia 驱动程序希望您提供一个非 NULL 属性指针作为 clCreateContextFromType 调用的第一个参数。

    clCreateContextFromType 的 Khronos 规范指出,如果为属性参数传递 NULL,则选择的平台取决于实现。在 Nvidia 的情况下,选择似乎是如果传递了 NULL 指针,则根本不选择任何平台。请参阅clCreateContextFromType 了解更多信息。

    另一方面,此行为与 cl_khr_icd extension 中的问题 #3 一致,如果您通过 ICD 使用 OpenCL,则该问题将适用,并指出:

    3: How will the ICD handle a NULL cl_platform_id?
    
    RESOLVED: The NULL platform is not supported by the ICD.
    

    要将属性传递给clCreateContextFromType,首先使用clGetPlatformIDs 查询平台。然后用所需的平台 ID 构造一个属性数组并将其传递给clCreateContextFromType。遵循 C99 的编译器应该可以使用以下内容:

       // query the number of platforms
       cl_uint numPlatforms;
       errcode = clGetPlatformIDs(0, NULL, &numPlatforms);
       shrCheckError(errcode, CL_SUCCESS); 
    
       // now get all the platform IDs
       cl_platform_id platforms[numPlatforms];
       errcode = clGetPlatformIDs(numPlatforms, platforms, NULL);
       shrCheckError(errcode, CL_SUCCESS);
    
       // set platform property - we just pick the first one
       cl_context_properties properties[] = {CL_CONTEXT_PLATFORM, (int) platforms[0], 0};
       clGPUContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &errcode);
       shrCheckError(errcode, CL_SUCCESS);
    

    【讨论】:

      猜你喜欢
      • 2017-12-09
      • 2020-12-24
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2018-11-29
      • 1970-01-01
      • 2012-07-11
      • 1970-01-01
      相关资源
      最近更新 更多