【发布时间】:2020-10-09 05:41:27
【问题描述】:
我正在努力使一些 C 代码适应 Python。总的来说,我对 C 语言非常缺乏经验,而且代码是用 C90 编写的,在本地机器上编译时不会提供准确的结果。
正因为如此,我想“面向未来”我正在使用的程序。虽然我很好地掌握了该程序的功能,但与它的 C 等效程序相比,它的运行速度非常慢。我想看看是否可以优化此代码而无需将 Cython 添加到组合中。
了解这段代码在做什么。它从一个起点开始,周围是一个 5x5x5 的小立方体。在立方体内,计算到每个像元中心点的旅行时间(表示为 time0[index])。从那里开始,立方体在每个维度上扩展 1 个单元格,直到计算预定义区域(在我的例子中为 400x400x53)的行程时间。
这样做的问题在于,计算行程时间依赖于相邻像元,这些像元是在 for 循环的前面步骤中计算的(每次像元扩展最多可以进行 160000 次迭代)。立方体的每一面墙都由一个 for 循环展开,每个循环包含 49 个 if 语句。对于所有 6 个循环,总共有 294 个 if 语句,它们针对立方体维度的每次扩展运行。
长话短说,这里执行了很多 if 语句。我已经为立方体的扩展顶部包含了下面的代码示例。我肯定会在这里得到一些建议,因为这段代码超出了我之前从事的任何项目。
if dz1 > 0:
ii = 0
for j in range(y1+1,y2):
for i in range(x1+1,x2):
sort.time[ii] = T0[i,j,z1+1]
sort.i1[ii] = i
sort.i2[ii] = j
ii += 1
print(j,i)
sort_index = np.argsort(sort.time[0:ii])
sort.time[0:ii] = sort.time[sort_index]
sort.i1[0:ii] = sort.i1[sort_index]
sort.i2[0:ii] = sort.i2[sort_index]
for i in range(0,ii):
X1 = int(sort.i1[i])
X2 = int(sort.i2[i])
index = int(z1*nxy + X2*nx + X1)
lasti = int((z1+1)*nxy + X2*nx + X1)
fhead = 0
guess = time0[index]
if time0[index+1] < 1.e9 and time0[index+nx+1] < 1.e9 and time0[index+nx] < 1.e9 and X2 < ny-1 and X1 < nx-1:
attempt = fdh3d(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1+1,X2+1,z1+1],T0[X1,X2+1,z1+1],T0[X1+1,X2,z1],T0[X1+1,X2+1,z1],T0[X1,X2+1,z1],S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1+1,X2,z1+1],S0[X1+1,X2+1,z1+1],S0[X1,X2+1,z1+1],S0[X1+1,X2,z1],S0[X1+1,X2+1,z1],S0[X1,X2+1,z1])
if attempt < guess:
guess = attempt
if time0[index-1] < 1.e9 and time0[index+nx-1] < 1.e9 and time0[index+nx] < 1.e9 and X2<ny-1 and X1>0:
attempt = fdh3d(T0[X1,X2,z1+1],T0[X1-1,X2,z1+1],T0[X1-1,X2+1,z1+1],T0[X1,X2+1,z1+1],T0[X1-1,X2,z1],T0[X1-1,X2+1,z1],T0[X1,X2+1,z1],S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1-1,X2,z1+1],S0[X1-1,X2+1,z1+1],S0[X1,X2+1,z1+1],S0[X1-1,X2,z1],S0[X1-1,X2+1,z1],S0[X1,X2+1,z1])
if attempt<guess:
guess = attempt
if time0[index+1] < 1.e9 and time0[index-nx+1] < 1.e9 and time0[index-nx] < 1.e9 and X2>0 and X1<nx-1:
attempt = fdh3d(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1+1,X2-1,z1+1],T0[X1,X2-1,z1+1],T0[X1+1,X2,z1],T0[X1+1,X2-1,z1],T0[X1,X2-1,z1],S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1+1,X2,z1+1],S0[X1+1,X2-1,z1+1],S0[X1,X2-1,z1+1],S0[X1+1,X2,z1 ],S0[X1+1,X2-1,z1],S0[X1,X2-1,z1])
if attempt<guess:
guess = attempt
if time0[index-1] < 1.e9 and time0[index-nx-1] < 1.e9 and time0[index-nx] < 1.e9 and X2>0 and X1>0:
attempt = fdh3d(T0[X1,X2,z1+1],T0[X1-1,X2,z1+1],T0[X1-1,X2-1,z1+1],T0[X1,X2-1,z1+1],T0[X1-1,X2,z1 ],T0[X1-1,X2-1,z1],T0[X1,X2-1,z1],S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1-1,X2,z1+1],S0[X1-1,X2-1,z1+1],S0[X1,X2-1,z1+1],S0[X1-1,X2,z1 ],S0[X1-1,X2-1,z1],S0[X1,X2-1,z1])
if attempt<guess:
guess = attempt
if guess > 1.0e9:
if time0[index+1] < 1.e9 and X1<nx-1 and X2>y1+1 and X2<y2-1:
attempt = fdhne(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1+1,X2,z1],T0[X1+1,X2-1,z1+1],T0[X1+1,X2+1,z1+1], S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1+1,X2,z1+1],S0[X1+1,X2,z1] )
if attempt<guess:
guess = attempt
if time0[index-1] < 1.e9 and X1>0 and X2>y1+1 and X2<y2-1:
attempt = fdhne(T0[X1,X2,z1+1],T0[X1-1,X2,z1+1],T0[X1-1,X2,z1],T0[X1-1,X2-1,z1+1],T0[X1-1,X2+1,z1+1], S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1-1,X2,z1+1],S0[X1-1,X2,z1] )
if attempt<guess:
guess = attempt
if time0[index+nx] < 1.e9 and X2<ny-1 and X1>x1+1 and X1<x2-1:
attempt = fdhne(T0[X1,X2,z1+1],T0[X1,X2+1,z1+1],T0[X1,X2+1,z1],T0[X1-1,X2+1,z1+1],T0[X1+1,X2+1,z1+1], S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1,X2+1,z1+1],S0[X1,X2+1,z1] )
if attempt<guess:
guess = attempt
if time0[index-nx] < 1.e9 and X2>0 and X1>x1+1 and X1<x2-1:
attempt = fdhne(T0[X1,X2,z1+1],T0[X1,X2-1,z1+1],T0[X1,X2-1,z1],T0[X1-1,X2-1,z1+1],T0[X1+1,X2-1,z1+1], S0[X1,X2,z1],S0[X1,X2,z1+1],S0[X1,X2-1,z1+1],S0[X1,X2-1,z1])
if attempt<guess:
guess = attempt
if time0[index+1] < 1.e9 and X1<nx-1:
attempt = fdh2d(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1+1,X2,z1],S0[X1,X2,z1], S0[X1,X2,z1+1],S0[X1+1,X2,z1+1],S0[X1+1,X2,z1])
if attempt<guess:
guess = attempt
if time0[index-1] < 1.e9 and X1>0:
attempt = fdh2d(T0[X1,X2,z1+1],T0[X1-1,X2,z1+1],T0[X1-1,X2,z1],S0[X1,X2,z1], S0[X1,X2,z1+1],S0[X1-1,X2,z1+1],S0[X1-1,X2,z1])
if attempt<guess:
guess = attempt
if time0[index+nx] < 1.e9 and X2<ny-1:
attempt = fdh2d(T0[X1,X2,z1+1],T0[X1,X2+1,z1+1],T0[X1,X2+1,z1],S0[X1,X2,z1], S0[X1,X2,z1+1],S0[X1,X2+1,z1+1],S0[X1,X2+1,z1])
if attempt<guess:
guess = attempt
if time0[index-nx] < 1.e9 and X2>0:
attempt = fdh2d(T0[X1,X2,z1+1],T0[X1,X2-1,z1+1],T0[X1,X2-1,z1],S0[X1,X2,z1], S0[X1,X2,z1+1],S0[X1,X2-1,z1+1],S0[X1,X2-1,z1])
if attempt<guess:
guess = attempt
if time0[index+1] < 1.e9 and time0[index+nx+1] < 1.e9 and time0[index+nx] < 1.e9 and X2<ny-1 and X1<nx-1:
attempt = fdh2d(T0[X1+1,X2,z1],T0[X1+1,X2+1,z1],T0[X1,X2+1,z1],S0[X1,X2,z1], S0[X1+1,X2,z1],S0[X1+1,X2+1,z1],S0[X1,X2+1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index+1] < 1.e9 and time0[index-nx+1] < 1.e9 and time0[index-nx] < 1.e9 and X2>0 and X1<nx-1:
attempt = fdh2d(T0[X1+1,X2,z1],T0[X1+1,X2-1,z1],T0[X1,X2-1,z1],S0[X1,X2,z1], S0[X1+1,X2,z1],S0[X1+1,X2-1,z1],S0[X1,X2-1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index-1] < 1.e9 and time0[index+nx-1] < 1.e9 and time0[index+nx] < 1.e9 and X2<ny-1 and X1>0:
attempt = fdh2d(T0[X1-1,X2,z1],T0[X1-1,X2+1,z1],T0[X1,X2+1,z1],S0[X1,X2,z1],S0[X1-1,X2,z1],S0[X1-1,X2+1,z1],S0[X1,X2+1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index-1] < 1.e9 and time0[index-nx-1] < 1.e9 and time0[index-nx] < 1.e9 and X2>0 and X1>0:
attempt = fdh2d(T0[X1-1,X2,z1],T0[X1-1,X2-1,z1],T0[X1,X2-1,z1],S0[X1,X2,z1],S0[X1-1,X2,z1],S0[X1-1,X2-1,z1],S0[X1,X2-1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if guess > 1.0e9:
if X1>x1+1 and X1<x2-1 and X2>y1+1 and X2<y2-1:
attempt = fdhnf(T0[X1,X2,z1+1],T0[X1+1,X2,z1+1],T0[X1,X2+1,z1+1],T0[X1-1,X2,z1+1],T0[X1,X2-1,z1+1],S0[X1,X2,z1],S0[X1,X2,z1+1] )
if attempt<guess:
guess = attempt
attempt = T0[X1,X2,z1+1] + .5*(S0[X1,X2,z1]+S0[X1,X2,z1+1])
if attempt<guess:
guess = attempt
if time0[index+1]<1.e9 and X1<nx-1:
attempt = T0[X1+1,X2,z1] + .5*(S0[X1,X2,z1]+S0[X1+1,X2,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index-1]<1.e9 and X1>0:
attempt = T0[X1-1,X2,z1] + .5*(S0[X1,X2,z1]+S0[X1-1,X2,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index+nx]<1.e9 and X2<ny-1:
attempt = T0[X1,X2+1,z1] + .5*(S0[X1,X2,z1]+S0[X1,X2+1,z1])
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if time0[index-nx]<1.e9 and X2>0:
attempt = T0[X1,X2-1,z1] + .5*(S0[X1,X2,z1]+S0[X1,X2-1,z1]);
if attempt<guess:
fhead=(guess-attempt)/slow0[index]
guess=attempt
if guess < time0[index]:
time0[index] = guess
T0[X1,X2,z1] = guess
if fhead > headtest:
headw[5]+=1
if z1 == 0:
dz1 = 0
z1-=1
我也尝试通过减小 for 循环的大小来加速代码,但这实际上减慢了代码的速度!这是我所做的一个示例:
if dz1 > 0:
ii = 0
sort.time = np.ndarray.flatten(T0[x1+1:x2,y1+1:y2,z1+1])
sort.i1 = np.array([(i) for i, j in product(range(x1+1,x2), range(y1+1,y2))])
sort.i2 = np.array([(j) for i, j in product(range(x1+1,x2), range(y1+1,y2))])
ii = sort.time.size
sort_index = np.argsort(sort.time)
sort.time = sort.time[sort_index]
sort.i1 = sort.i1[sort_index]
sort.i2 = sort.i2[sort_index]
for X1,X2 in zip(sort.i1,sort.i2):
index = z1*nxy + X2*nx + X1
【问题讨论】:
-
我发现将 Numba 模块用于这段代码中调用的一些较小的函数确实有助于加快速度。
标签: python-3.x if-statement optimization nested-loops