【发布时间】:2021-08-31 01:05:51
【问题描述】:
我编写了简单的代码来使用 OpenC 求解平流方程,并将结果写入 netcdf 文件。该代码在编译期间不会产生任何错误消息,并且它运行时没有任何错误。但似乎内核没有做任何事情。内核将数值方案循环了大约 3000 次,如果它正常工作,我应该会看到一些非常不同的东西。有没有办法查明内核是否正常工作,比如打印?
下面是内核
void pbndry(int in_x_siz, int in_y_siz, int in_z_siz, global float *in_arr)
{
int i,j,k;
// Periodic boundary
// x-direction
for(k=1;k<in_z_siz+1;k++)
for(j=1;j<in_y_siz+1;j++)
{
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 0] =
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + in_x_siz];
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + (in_x_siz+1)] =
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + 1];
}
// y-direction
for(k=1;k<in_z_siz+1;k++)
for(i=1;i<in_x_siz+1;i++)
{
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 0 * (in_x_siz+2) + i] =
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + in_y_siz * (in_x_siz+2) + i];
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + (in_y_siz+1) * (in_x_siz+2) + i] =
in_arr[k * (in_y_siz+2) * (in_x_siz+2) + 1 * (in_x_siz+2) + i];
}
// z-direction
for(j=1;j<in_y_siz+1;j++)
for(i=1;i<in_x_siz+1;i++)
{
in_arr[0 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
in_arr[in_z_siz * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];
in_arr[(in_z_siz+1) * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i] =
in_arr[1 * (in_y_siz+2) * (in_x_siz+2) + j * (in_x_siz+2) + i];
}
}
kernel void leapfrog3d(
const int x_siz,
const int y_siz,
const int z_siz,
const int t_siz,
global float *in_p_tf,
global float *in_p_tn,
global float *in_p_tp,
const float u_vel,
const float v_vel,
const float w_vel,
const float c,
global float *in_p_rs
)
{
int nx = x_siz;
int ny = y_siz;
int nz = z_siz;
int nt = t_siz;
float u = u_vel;
float v = v_vel;
float w = w_vel;
float C = c ;
int i = get_global_id(0);
int j = get_global_id(1);
int k = get_global_id(2);
int idx0, idx_i0, idx_i1, idx_j0, idx_j1, idx_k0, idx_k1;
for(int t=1;t<t_siz;t++)
{
idx0 = i + j * (nx+2) + k * (nx+2) * (ny+2);
idx_i0 = (i+1) + j * (nx+2) + k * (nx+2) * (ny+2);
idx_j0 = i + (j+1) * (nx+2) + k * (nx+2) * (ny+2);
idx_k0 = i + j * (nx+2) + (k+1) * (nx+2) * (ny+2);
idx_i1 = (i-1) + j * (nx+2) + k * (nx+2) * (ny+2);
idx_j1 = i + (j-1) * (nx+2) + k * (nx+2) * (ny+2);
idx_k1 = i + j * (nx+2) + (k-1) * (nx+2) * (ny+2);
in_p_tf[idx0] = in_p_tp[idx0]
- u_vel * C * (in_p_tn[idx_i0] - in_p_tn[idx_i1])
- v_vel * C * (in_p_tn[idx_j0] - in_p_tn[idx_j1])
- w_vel * C * (in_p_tn[idx_k0] - in_p_tn[idx_k1]);
pbndry(nx,ny,nz,in_p_tf);
in_p_tp = in_p_tn;
in_p_tn = in_p_tf;
}
in_p_rs = in_p_tf;
}
【问题讨论】:
标签: opencl