【发布时间】:2016-03-04 19:12:05
【问题描述】:
我有以下代码:
function [Ps,Pd,Pv] = Arii2010_Modified_1Pixel(C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
MeanOrientationAngleStep = 1/100;
StandardDeviationStep = 1/100;
NumberOfAngles = floor(2*pi/MeanOrientationAngleStep);
NumberOfStandandardDeviations = 1/StandardDeviationStep;
for i = 0:NumberOfAngles
for j = 0:NumberOfStandandardDeviations
theta = -pi+i*MeanOrientationAngleStep;
sigma = j*StandardDeviationStep;
Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
end
end
end
function Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
coef1 = 1/2-(((1-2*sigma^2)*(1-sigma^2)*cos(2*theta))/(2*(1+sigma^2)));
coef2 = ((1-4*sigma^2)*(1-3*sigma^2)*(1-2*sigma^2)*(1-sigma^2)*cos(4*theta))/(8*(1+sigma^2)*(1+2*sigma^2)*(1+3*sigma^2));
coef3 = ((1-2*sigma^2)*(1-sigma^2)*sin(2*theta))/(2*sqrt(2)*(1+sigma^2));
coef4 = ((1-4*sigma^2)*(1-3*sigma^2)*(1-2*sigma^2)*(1-sigma^2)*sin(4*theta))/(4*sqrt(2)*(1+sigma^2)*(1+2*sigma^2)*(1+3*sigma^2));
Fv1 = C11/(coef1+coef2);
aQuadratic1 = -2*coef2*(coef1+coef2);
aQuadratic2 = (coef3-coef4)^2;
aQuadratic = aQuadratic1 - aQuadratic2;
bQuadratic1 = 2*C11*coef2-C22*(coef1+coef2);
bQuadratic2 = -2*C12_real*(coef3-coef4);
bQuadratic = bQuadratic1 - bQuadratic2;
cQuadratic1 = C11*C22;
cQuadratic2 = C12_real^2+C12_imag^2;
cQuadratic = cQuadratic1 - cQuadratic2;
rQuadratic = roots([aQuadratic bQuadratic cQuadratic]);
sel1 = rQuadratic == real(rQuadratic);
rQuadratic = rQuadratic(sel1);
sel2 = rQuadratic == abs(rQuadratic);
rQuadratic = rQuadratic(sel2);
Fv2 = max(rQuadratic);
aCubic1 = 2*coef2*(1-coef1+coef2)*(coef1+coef2);
aCubic2 = 2*coef2*(coef3-coef4)*(coef3+coef4);
aCubic3 = -(coef1+coef2)*(coef3+coef4)^2;
aCubic4 = 2*coef2^3;
aCubic5 = -(1-coef1+coef2)*(coef3-coef4)^2;
aCubic = aCubic1+aCubic2-aCubic3-aCubic4-aCubic5;
bCubic1 = -2*coef2*(C11*(1-coef1+coef2)+C33*(coef1+coef2))+C22*(1-coef1+coef2)*(coef1+coef2);
bCubic2 = -2*coef2*(C23_real*(coef3-coef4)+C12_real*(coef3+coef4))+2*C13_real*(coef3-coef4)*(coef3+coef4);
bCubic3 = 2*C23_real*(coef1+coef2)*(coef3+coef4)+C11*(coef3+coef4)^2;
bCubic4 = (4*C13_real+C22)*coef2^2;
bCubic5 = 2*C12_real*(1-coef1+coef2)*(coef3-coef4)+C33*(coef3-coef4)^2;
bCubic = bCubic1+bCubic2-bCubic3-bCubic4-bCubic5;
cCubic1 = 2*C11*C33*coef2-C11*C22*(1-coef1+coef2)-C22*C33*(coef1+coef2);
cCubic2 = 2*(C12_real*C23_real-C12_imag*C23_imag)*coef2-2*(C13_real*C23_real+C13_imag*C23_imag)*...
(coef3-coef4)-2*(C12_real*C13_real+C12_imag*C13_imag)*(coef3+coef4);
cCubic3 = -(C23_real^2+C23_imag^2)*(coef1+coef2)-2*C11*C23_real*(coef3+coef4);
cCubic4 = 2*(C13_real^2+C13_imag^2+C13_real*C22)*coef2;
cCubic5 = -(C12_real^2+C12_imag^2)*(1-coef1+coef2)-2*C33*C12_real*(coef3-coef4);
cCubic = cCubic1+cCubic2-cCubic3-cCubic4-cCubic5;
dCubic1 = C11*C22*C33;
dCubic2 = 2*(C12_real*(C13_real*C23_real+C13_imag*C23_imag)+C12_imag*(C13_imag*C23_real-C13_real*C23_imag));
dCubic3 = C11*(C23_real^2+C23_imag^2);
dCubic4 = C22*(C13_real^2+C13_imag^2);
dCubic5 = C33*(C12_real^2+C12_imag^2);
dCubic = dCubic1+dCubic2-dCubic3-dCubic4-dCubic5;
rCubic = roots([aCubic bCubic cCubic dCubic]);
sel3 = rCubic == real(rCubic);
rCubic = rCubic(sel3);
sel4 = rCubic == abs(rCubic);
rCubic = rCubic(sel4);
Fv3 = max(rCubic);
Fv = min([Fv1 Fv2 Fv3]);
end
我想知道是否有像bsxfun、blockproc 或arrayfun 这样的函数来矢量化这部分代码?
for i = 0:NumberOfAngles
for j = 0:NumberOfStandandardDeviations
theta = -pi+i*MeanOrientationAngleStep;
sigma = j*StandardDeviationStep;
Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
end
end
【问题讨论】:
-
由于
FindFv似乎包含roots,它一次接受一个多项式,我认为您不能进行太多矢量化:您可能需要调用FindFvNumberOfAngles*NumberOfStandandardDeviations次。如果不是为了这个,您可以对FindFv中的每个操作进行矢量化处理,并使用max/min和感兴趣的维度上的可选输入参数。它可以帮助向量化roots之前的所有内容,并使用双循环来覆盖每个多项式。它会更快,但我不知道多少。 -
@AndrasDeak 事实上我不想向量化
FindFv函数中的代码。我想对调用FindFv函数的部分for i = 0:NumberOfAngles for j = 0:NumberOfStandandardDeviations theta = -pi+i*MeanOrientationAngleStep; sigma = j*StandardDeviationStep; Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33); end end进行矢量化 -
不幸的是,矢量化不是魔术棒和精灵尘的秘诀:) 如果您调用一个函数 1000 次,而该函数一次只能工作一个,那么您无能为力.有一件事:在 MEX 中重写函数并预编译它。
-
@AndrasDeak 能不能多解释一下最后一句话,介绍好的资源入手?
-
MEX 是一种编写 C 代码的方法,它不太可能与 matlab 接口。 Some docs and examples are here。由于 C 和很容易遇到的微妙问题,使用 MEX 是一件很痛苦的事情。但它使用编译后的代码,因此比高级 MATLAB 更快。无论如何,我对 MEX 和正确的矢量化都不太熟悉。我会尝试首先对您的函数进行矢量化并将循环放在
roots位周围,这可能更简单/更快。我真的不知道你的胜算。
标签: performance matlab vectorization nested-loops mex