【发布时间】:2020-12-09 15:11:54
【问题描述】:
我有这个与 OpenMP 并行的 FOR 循环:
#pragma omp parallel for default(none) private(k,cell) shared(sim,mesh)
for(i=0;i<mesh->ncells;i++){
cell=&(mesh->cell[i]);
for(k=0;k<sim->nvar;k++){
cell->U_aux[k]=cell->U[k];
cell->U[k]-=sim->dt*((cell->w2->fL_star[k]-cell->w4->fR_star[k])/cell->dx + (cell->w3->fL_star[k]-cell->w1->fR_star[k])/cell->dy + (cell->w6->fL_star[k]-cell->w5->fR_star[k])/cell->dz);
}
}
}
使用cell 指向结构mesh 中单元格数组的指针。我想将它与 OpenACC 并行化。
结构sim 只包含标量,但结构mesh 包含cell 和wall 的一维数组
struct t_mesh_{
int ncells;
t_cell *cell;
t_wall *wall;
// and other variables not used in this function...
};
结构cell定义为:
struct t_cell_{
double dx,dy,dz;
double *U,*U_aux;
t_wall *w1, *w2, *w3, *w4, *w5, *w6;
// and other variables not used in this function...
};
请注意,*U,*U_aux 是小型一维数组,但 *w1, *w2, *w3, *w4, *w5, *w6 是指向数组 wall 的指针:
struct t_wall_{
double *fR_star,*fL_star;
// and other variables not used in this function...
};
其中fR_star 和fL_star 是小型一维数组。
¿我应该如何定义 OpenACC pragma 和管理内存?
提前致谢。
[已编辑]
在下面附加的脚本中,显示了结构的创建和分配,以及在感兴趣的循环中使用的指针。大部分代码都被省略了,只保留问题中提到的部分:
//macros omitted
////////////////////////////////////////////////////
////////////// S T R U C T U R E S ///////////////
////////////////////////////////////////////////////
typedef struct t_node_ t_node;
typedef struct t_cell_ t_cell;
typedef struct t_wall_ t_wall;
typedef struct t_mesh_ t_mesh;
typedef struct t_sim_ t_sim;
struct t_node_{
int id;
double x,y;
};
struct t_cell_{
int id;
int l,m;
double *U; //array of cell-valued variables
double dx,dy;
double xc,yc;
int n1,n2,n3,n4;
int w1_id,w2_id,w3_id,w4_id;
t_wall *w1, *w2, *w3, *w4; //pointers to mesh->wall
};
struct t_wall_{
int id;
double *fR_star,*fL_star; //array of left and right fluxes at each wall (same dimension than U)
int cellR_id, cellL_id;
t_cell *cellR, *cellL; //pointers to the right and left cells of the wall (mesh->cell)
double nx, ny;
};
struct t_mesh_{
int xcells, ycells;
double dx, dy;
int ncells; //number of cells
int nwalls; //number of walls
int nnodes;
t_cell *cell; //array of cell structures
t_wall *wall; //array of wall structures
t_node *node; //array of node structures
t_sim *sim;
};
struct t_sim_{
double dt,t,CFL;
double tf, tVolc;
int rk_steps;
int order;
int nvar; //number of variables (dimension of U, fR_star, fL_star...)
};
////////////////////////////////////////////////////
////// F U N C T I O N D E F I N I T I O N //////
////////////////////////////////////////////////////
int create_mesh(t_mesh *mesh,t_sim *sim);
void update_cellK1(t_mesh *mesh, t_sim *sim);
////////////////////////////////////////////////////
////// P R E - P R O C. F U N C T I O N S ///////
////////////////////////////////////////////////////
int create_mesh(t_mesh *mesh, t_sim *sim){
int i,l,m,k,aux,p;
int xcells,ycells;
t_cell *cell;
t_wall *wall;
t_node *node;
int semiSt;
mesh->sim=sim;
//Cells
xcells=mesh->xcells;
ycells=mesh->ycells;
mesh->ncells=xcells*ycells;
mesh->cell=(t_cell*)malloc(mesh->ncells*sizeof(t_cell));
cell=mesh->cell;
//Walls
mesh->nwalls=2*mesh->ncells+xcells+mesh->ycells;
mesh->wall=(t_wall*)malloc(mesh->nwalls*sizeof(t_wall));
wall=mesh->wall;
for(k=0;k<mesh->nwalls;k++){
wall[k].id=k;
wall[k].fR_star=(double*)malloc(sim->nvar*sizeof(double));
wall[k].fL_star=(double*)malloc(sim->nvar*sizeof(double));
}
//Walls and nodes of the cells
for(m=0;m<ycells-1;m++){
for(l=0;l<xcells-1;l++){
k=xcells*m+l;
cell[k].id=k;
cell[k].l=l;
cell[k].m=m;
cell[k].w1_id=2*(k)+m;
// ...
cell[k].w1=&(mesh->wall[cell[k].w1_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w2=&(mesh->wall[cell[k].w2_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w3=&(mesh->wall[cell[k].w3_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
cell[k].w4=&(mesh->wall[cell[k].w4_id]); // <------- cells' walls used in function "update_cellK1()" are pointers to mesh->wall
// ...
// ...
}
}
//Assigment of wall's neighbour cells
for(m=0;m<ycells;m++){
for(l=0;l<xcells;l++){
k=xcells*m+l;
cell[k].w1->cellR_id=cell[k].id;
// ...
cell[k].w1->cellR=&(cell[k]);
cell[k].w4->cellR=&(cell[k]);
cell[k].w2->cellL=&(cell[k]);
cell[k].w3->cellL=&(cell[k]);
//...
//other special cases omitted
//...
}
}
//Allocation of arrays of variables "U" in cells and walls
for(k=0;k<mesh->ncells;k++){
mesh->cell[k].U =(double*)malloc(sim->nvar*sizeof(double));
}
return 1;
}
void update_cellK1(t_mesh *mesh, t_sim *sim){
int i,k;
t_cell *cell;
#pragma omp parallel for default(none) private(k,cell) shared(sim,mesh)
for(i=0;i<mesh->ncells;i++){
cell=&(mesh->cell[i]);
for(k=0;k<sim->nvar;k++){
cell->U[k]-=sim->dt*((cell->w2->fL_star[k]-cell->w4->fR_star[k])/cell->dx + (cell->w3->fL_star[k]-cell->w1->fR_star[k])/cell->dy);
}
}
}
////////////////////////////////////////////////////
//////////////////// M A I N ///////////////////////
////////////////////////////////////////////////////
int main(int argc, char * argv[]){
int i, j, k, p;
t_mesh *mesh;
t_sim *sim;
double tf,t;
int nIt;
double timeac;
omp_set_num_threads(NTHREADS);
//Mesh and sim allocation
mesh=(t_mesh*)malloc(sizeof(t_mesh));
sim =(t_sim*)malloc(sizeof(t_sim));
////////////////////////////////////////////////////
////////////// P R E - P R O C E S S ///////////////
////////////////////////////////////////////////////
//...
//variable initialization and file reading omitted
//cell->dx= ...
//cell->dy= ...
//...
create_mesh(mesh,sim);
update_initial(mesh); //this function (omitted) assings the initial values of cell[k].U[0] ... cell[k].U[4]
////////////////////////////////////////////////////
////////////// C A L C U L A T I O N ///////////////
////////////////////////////////////////////////////
tf=sim->tf;
sim->t=0.0;
t=0.0;
while(t<tf){
compute_fluxes(mesh,sim); //this function (omitted) computes *fR_star,*fL_star of walls (line 32), which are then used in "update_cellK1()"
update_dt(mesh,sim); //this function (omitted) computes sim->dt
update_cellK1(mesh,sim);
t+=sim->dt; //Time updated
sim->t=t;
}
return 1;
}
【问题讨论】:
标签: arrays c pointers parallel-processing openacc