【问题标题】:not able to fill the array allocated on the gpu无法填充在 gpu 上分配的数组
【发布时间】:2018-05-03 20:41:00
【问题描述】:

请帮帮我。我有以下代码

...
#include <accelmath.h>
#include <openacc.h>
const long int G=100000;
const unsigned int GL=100000;
const long int K=G;
const int LE=1.0f;
struct Particle
{
  float x;
  float rs;
};
Particle particles[GL];
int sort[GL];
int ind01[GL];
long int MAX_ELEMENT=1;
int POSITION1;
int POSITION0;
int LIFE=0;
bool start=true;
int mini;
int count0;
int count1;
int GL1;
int js;

#pragma acc declare device_resident(ind01,POSITION0,POSITION1,mini,GL1,js,MAX_ELEMENT,count0,count1,K)
#pragma acc declare create(LIFE,particles,sort)

 void function_device()
 {
   #pragma acc update host(LIFE,particles) async
    std::cout<<"LIFE before="<<LIFE<<std::endl;
   #pragma acc update device(LIFE,particles) async
   #pragma acc parallel num_gangs(1) vector_length(1) present(particles) async
   {
     count0=0;
     count1=0;
     if(LIFE<K)
     {
       particles[LIFE].x=5.0;
       particles[LIFE].rs=MAX_ELEMENT;
       ++MAX_ELEMENT;
       ++LIFE;
      }
    }
     #pragma acc loop vector reduction(+:count0,count1)
     for(int i=0; i<LIFE; ++i)
     {
       if(particles[i].x>=LE)
       {
         sort[i]=1;
         count1=count1+1;
       }
       else
       {
         sort[i]=0;
         count0=count0+1;
       }
     }   
   #pragma acc parallel num_gangs(1) vector_length(1)
     {
       GL1=LIFE-1;
       count0=GL1;
       count1=0;
     }
     #pragma acc loop seq
     for(int i=0; i<LIFE; ++i)
     {
       if(sort[i]==1)
       {
         ind01[count1++]=i;
       }
       else
       {
         ind01[count0--]=i;
       }
     }
    #pragma acc parallel num_gangs(1) vector_length(1)
    {
      mini=GL1-count0;
      if(count1<mini) mini=count1;
      js=0;
    }
    ...
    #pragma acc update host(LIFE) async
  }
  int main(int argc, char **argv)
  {
    acc_init(acc_device_nvidia);
    int step=1;
while(start==true || LIFE>0)
{
      std::cout<<" LIFE="<<LIFE<<std::endl;
      start=false;
      function_device();   
      std::cout<<"MAIN LOOP # "<<step<<std::endl;
      ++step;
    }
  }

填完分配在gpu上的数组后:

     particles[LIFE].x=5.0;

但打印的输出:

     #pragma acc update host(LIFE,particles) async
     std::cout<<"LIFE after injector="<<LIFE<<std::endl;
     for(int i=0; i<LIFE; ++i) std::cout<<" particles: "<<particles[i]<<std::endl;

和:

    #pragma acc update host(LIFE,sort) async
     std::cout<<"LIFE after 1="<<LIFE<<" c0="<<count0<<" c1="<<count1<<std::endl;
     for(int i=0; i<LIFE; ++i) std::cout<<"sort: "<<sort[i]<<std::endl;

是 LIFE=1 数组只包含 0,好像我没有用 5.0f 填充数组粒子。如何填写数组“粒子”?我是否在 gpu 上正确启动了一个串行代码:

    #pragma acc parallel num_gangs(1) vector_length(1)
    {
      mini=GL1-count0;
      if(count1<mini) mini=count1;
      js=0;
    }

如果 openacc 指令的使用有错误,请指出。为什么

     #pragma acc update host(LIFE) async

没有异步指令就不能工作?

【问题讨论】:

    标签: openacc


    【解决方案1】:

    我更新了您的代码以使其正常工作。您有一些需要“并行”的孤立循环,而且由于其中几个变量是全局变量,因此您需要使用 atomic。在原子中使用“declare create”中定义的变量时,我确实遇到了编译器问题,因此需要将一些变量移动到数据区域。我会将此情况报告给我们的工程师。

    注意,我在 LIFE 到达 G 时也设置了终止条件,否则它似乎进入了无限循环。

    #include <iostream>
    #include <accelmath.h>
    #include <openacc.h>
    const long int G=100000;
    const unsigned int GL=100000;
    const long int K=G;
    const int LE=1.0f;
    struct Particle
    {
      float x;
      float rs;
    };
    Particle particles[GL];
    int sort[GL];
    int ind01[GL];
    int MAX_ELEMENT;
    int POSITION1;
    int POSITION0;
    int LIFE;
    bool start=true;
    int mini;
    int GL1;
    int js;
    int count0;
    int count1;
    
    #pragma acc declare device_resident(ind01,POSITION0,POSITION1,mini,GL1,js)
    #pragma acc declare create(particles,sort)
    
     void function_device()
     {
       #pragma acc parallel num_gangs(1) vector_length(1) present(particles)
       {
    #pragma acc atomic write
          count0=0;
    #pragma acc atomic write
          count1=0;
         if(LIFE<K)
         {
           particles[LIFE].x=5.0;
           particles[LIFE].rs=MAX_ELEMENT;
    #pragma acc atomic update
           ++MAX_ELEMENT;
    #pragma acc atomic update
           ++LIFE;
          }
        }
         #pragma acc parallel loop
         for(int i=0; i<LIFE; ++i)
         {
           if(particles[i].x>=LE)
           {
             sort[i]=1;
      #pragma acc atomic update
             count1=count1+1;
           }
           else
           {
             sort[i]=0;
      #pragma acc atomic update
             count0=count0+1;
           }
         }
       #pragma acc parallel num_gangs(1) vector_length(1)
         {
           GL1=LIFE-1;
           count0=GL1;
           count1=0;
         }
         #pragma acc parallel loop
         for(int i=0; i<LIFE; ++i)
         {
           int cnt;
           if(sort[i]==1)
           {
            #pragma acc atomic capture
            {
              cnt = count1++;
            }
            ind01[cnt]=i;
           }
           else
           {
            #pragma acc atomic capture
            {
              cnt = count0--;
            }
            ind01[cnt]=i;
           }
         }
        #pragma acc parallel num_gangs(1) vector_length(1)
        {
          mini=GL1-count0;
          if(count1<mini) mini=count1;
          js=0;
        }
      }
      int main(int argc, char **argv)
      {
        acc_init(acc_device_nvidia);
        int step=1;
        LIFE=0;
        MAX_ELEMENT=1;
       #pragma acc data copyin(LIFE,MAX_ELEMENT,count0,count1)
    {
    while(start==true || (LIFE>0 && LIFE < G ))
    {
          std::cout<<" LIFE="<<LIFE<<std::endl;
          start=false;
          function_device();
       #pragma acc update self(LIFE)
          std::cout<<"MAIN LOOP # "<<step<<std::endl;
          ++step;
        }
    }
      }
    

    【讨论】:

    • 感谢您的回答。你看,当我在你的代码中的循环 #pragma acc parallel loop for(int i=0; i
    • 你写#pragma acc atomic write count0=0;在全局变量的初始化位置,但为什么不使用 #pragma acc atomic write 指令在 #pragma acc parallel num_gangs(1) vector_length(1) { GL1=LIFE-1;计数0=GL1;计数1=0; }
    • 非常感谢您回信!
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2019-03-03
    • 2018-09-07
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多