I have the following code:
function [Ps,Pd,Pv] = Arii2010_Modified_1Pixel(C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
MeanOrientationAngleStep = 1/100;
StandardDeviationStep = 1/100;
NumberOfAngles = floor(2*pi/MeanOrientationAngleStep);
NumberOfStandandardDeviations = 1/StandardDeviationStep;
for i = 0:NumberOfAngles
for j = 0:NumberOfStandandardDeviations
theta = -pi+i*MeanOrientationAngleStep;
sigma = j*StandardDeviationStep;
Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
end
end
end
function Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
coef1 = 1/2-(((1-2*sigma^2)*(1-sigma^2)*cos(2*theta))/(2*(1+sigma^2)));
coef2 = ((1-4*sigma^2)*(1-3*sigma^2)*(1-2*sigma^2)*(1-sigma^2)*cos(4*theta))/(8*(1+sigma^2)*(1+2*sigma^2)*(1+3*sigma^2));
coef3 = ((1-2*sigma^2)*(1-sigma^2)*sin(2*theta))/(2*sqrt(2)*(1+sigma^2));
coef4 = ((1-4*sigma^2)*(1-3*sigma^2)*(1-2*sigma^2)*(1-sigma^2)*sin(4*theta))/(4*sqrt(2)*(1+sigma^2)*(1+2*sigma^2)*(1+3*sigma^2));
Fv1 = C11/(coef1+coef2);
aQuadratic1 = -2*coef2*(coef1+coef2);
aQuadratic2 = (coef3-coef4)^2;
aQuadratic = aQuadratic1 - aQuadratic2;
bQuadratic1 = 2*C11*coef2-C22*(coef1+coef2);
bQuadratic2 = -2*C12_real*(coef3-coef4);
bQuadratic = bQuadratic1 - bQuadratic2;
cQuadratic1 = C11*C22;
cQuadratic2 = C12_real^2+C12_imag^2;
cQuadratic = cQuadratic1 - cQuadratic2;
rQuadratic = roots([aQuadratic bQuadratic cQuadratic]);
sel1 = rQuadratic == real(rQuadratic);
rQuadratic = rQuadratic(sel1);
sel2 = rQuadratic == abs(rQuadratic);
rQuadratic = rQuadratic(sel2);
Fv2 = max(rQuadratic);
aCubic1 = 2*coef2*(1-coef1+coef2)*(coef1+coef2);
aCubic2 = 2*coef2*(coef3-coef4)*(coef3+coef4);
aCubic3 = -(coef1+coef2)*(coef3+coef4)^2;
aCubic4 = 2*coef2^3;
aCubic5 = -(1-coef1+coef2)*(coef3-coef4)^2;
aCubic = aCubic1+aCubic2-aCubic3-aCubic4-aCubic5;
bCubic1 = -2*coef2*(C11*(1-coef1+coef2)+C33*(coef1+coef2))+C22*(1-coef1+coef2)*(coef1+coef2);
bCubic2 = -2*coef2*(C23_real*(coef3-coef4)+C12_real*(coef3+coef4))+2*C13_real*(coef3-coef4)*(coef3+coef4);
bCubic3 = 2*C23_real*(coef1+coef2)*(coef3+coef4)+C11*(coef3+coef4)^2;
bCubic4 = (4*C13_real+C22)*coef2^2;
bCubic5 = 2*C12_real*(1-coef1+coef2)*(coef3-coef4)+C33*(coef3-coef4)^2;
bCubic = bCubic1+bCubic2-bCubic3-bCubic4-bCubic5;
cCubic1 = 2*C11*C33*coef2-C11*C22*(1-coef1+coef2)-C22*C33*(coef1+coef2);
cCubic2 = 2*(C12_real*C23_real-C12_imag*C23_imag)*coef2-2*(C13_real*C23_real+C13_imag*C23_imag)*...
(coef3-coef4)-2*(C12_real*C13_real+C12_imag*C13_imag)*(coef3+coef4);
cCubic3 = -(C23_real^2+C23_imag^2)*(coef1+coef2)-2*C11*C23_real*(coef3+coef4);
cCubic4 = 2*(C13_real^2+C13_imag^2+C13_real*C22)*coef2;
cCubic5 = -(C12_real^2+C12_imag^2)*(1-coef1+coef2)-2*C33*C12_real*(coef3-coef4);
cCubic = cCubic1+cCubic2-cCubic3-cCubic4-cCubic5;
dCubic1 = C11*C22*C33;
dCubic2 = 2*(C12_real*(C13_real*C23_real+C13_imag*C23_imag)+C12_imag*(C13_imag*C23_real-C13_real*C23_imag));
dCubic3 = C11*(C23_real^2+C23_imag^2);
dCubic4 = C22*(C13_real^2+C13_imag^2);
dCubic5 = C33*(C12_real^2+C12_imag^2);
dCubic = dCubic1+dCubic2-dCubic3-dCubic4-dCubic5;
rCubic = roots([aCubic bCubic cCubic dCubic]);
sel3 = rCubic == real(rCubic);
rCubic = rCubic(sel3);
sel4 = rCubic == abs(rCubic);
rCubic = rCubic(sel4);
Fv3 = max(rCubic);
Fv = min([Fv1 Fv2 Fv3]);
end
I'm wondering if there's any function like bsxfun
, blockproc
or arrayfun
to vectorize this part of the code?
for i = 0:NumberOfAngles
for j = 0:NumberOfStandandardDeviations
theta = -pi+i*MeanOrientationAngleStep;
sigma = j*StandardDeviationStep;
Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
end
end
I've commented that you should probably either put the loops inside your vectorized worker function, or/and use compiled MEX code. Here's my take on the first approach:
function [Ps,Pd,Pv] = Arii2010_Modified_1Pixel(C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
MeanOrientationAngleStep = 1/100;
StandardDeviationStep = 1/100;
NumberOfAngles = floor(2*pi/MeanOrientationAngleStep);
NumberOfStandandardDeviations = 1/StandardDeviationStep;
%// set up vectorized input
thetav = -pi + (0:NumberOfAngles)*MeanOrientationAngleStep;
sigmav = (0:NumberOfStandandardDeviations)*StandardDeviationStep;
%// grid for parameter pairs
[theta, sigma] = meshgrid(thetav,sigmav);
%// run vectorized function
Fv_vec = FindFv_vec(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
end
function Fv_vec = FindFv_vec(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
coef1 = 1./2-(((1-2.*sigma.^2).*(1-sigma.^2).*cos(2.*theta))./(2.*(1+sigma.^2)));
coef2 = ((1-4.*sigma.^2).*(1-3.*sigma.^2).*(1-2.*sigma.^2).*(1-sigma.^2).*cos(4.*theta))./(8.*(1+sigma.^2).*(1+2.*sigma.^2).*(1+3.*sigma.^2));
coef3 = ((1-2.*sigma.^2).*(1-sigma.^2).*sin(2.*theta))./(2.*sqrt(2).*(1+sigma.^2));
coef4 = ((1-4.*sigma.^2).*(1-3.*sigma.^2).*(1-2.*sigma.^2).*(1-sigma.^2).*sin(4.*theta))./(4.*sqrt(2).*(1+sigma.^2).*(1+2.*sigma.^2).*(1+3.*sigma.^2));
%// this is OK:
Fv1 = C11./(coef1+coef2);
aQuadratic1 = -2.*coef2.*(coef1+coef2);
aQuadratic2 = (coef3-coef4).^2;
aQuadratic = aQuadratic1 - aQuadratic2;
bQuadratic1 = 2.*C11.*coef2-C22.*(coef1+coef2);
bQuadratic2 = -2.*C12_real.*(coef3-coef4);
bQuadratic = bQuadratic1 - bQuadratic2;
cQuadratic1 = C11.*C22;
cQuadratic2 = C12_real.^2+C12_imag.^2;
cQuadratic = cQuadratic1 - cQuadratic2;
%// move this down to one block for looping:
%//rQuadratic = roots([aQuadratic bQuadratic cQuadratic]);
%//sel1 = rQuadratic == real(rQuadratic);
%//rQuadratic = rQuadratic(sel1);
%//sel2 = rQuadratic == abs(rQuadratic);
%//rQuadratic = rQuadratic(sel2);
%//Fv2 = max(rQuadratic);
aCubic1 = 2.*coef2.*(1-coef1+coef2).*(coef1+coef2);
aCubic2 = 2.*coef2.*(coef3-coef4).*(coef3+coef4);
aCubic3 = -(coef1+coef2).*(coef3+coef4).^2;
aCubic4 = 2.*coef2.^3;
aCubic5 = -(1-coef1+coef2).*(coef3-coef4).^2;
aCubic = aCubic1+aCubic2-aCubic3-aCubic4-aCubic5;
bCubic1 = -2.*coef2.*(C11.*(1-coef1+coef2)+C33.*(coef1+coef2))+C22.*(1-coef1+coef2).*(coef1+coef2);
bCubic2 = -2.*coef2.*(C23_real.*(coef3-coef4)+C12_real.*(coef3+coef4))+2.*C13_real.*(coef3-coef4).*(coef3+coef4);
bCubic3 = 2.*C23_real.*(coef1+coef2).*(coef3+coef4)+C11.*(coef3+coef4).^2;
bCubic4 = (4.*C13_real+C22).*coef2.^2;
bCubic5 = 2.*C12_real.*(1-coef1+coef2).*(coef3-coef4)+C33.*(coef3-coef4).^2;
bCubic = bCubic1+bCubic2-bCubic3-bCubic4-bCubic5;
cCubic1 = 2.*C11.*C33.*coef2-C11.*C22.*(1-coef1+coef2)-C22.*C33.*(coef1+coef2);
cCubic2 = 2.*(C12_real.*C23_real-C12_imag.*C23_imag).*coef2-2.*(C13_real.*C23_real+C13_imag.*C23_imag).*...
(coef3-coef4)-2.*(C12_real.*C13_real+C12_imag.*C13_imag).*(coef3+coef4);
cCubic3 = -(C23_real.^2+C23_imag.^2).*(coef1+coef2)-2.*C11.*C23_real.*(coef3+coef4);
cCubic4 = 2.*(C13_real.^2+C13_imag.^2+C13_real.*C22).*coef2;
cCubic5 = -(C12_real.^2+C12_imag.^2).*(1-coef1+coef2)-2.*C33.*C12_real.*(coef3-coef4);
cCubic = cCubic1+cCubic2-cCubic3-cCubic4-cCubic5;
dCubic1 = C11.*C22.*C33;
dCubic2 = 2.*(C12_real.*(C13_real.*C23_real+C13_imag.*C23_imag)+C12_imag.*(C13_imag.*C23_real-C13_real.*C23_imag));
dCubic3 = C11.*(C23_real.^2+C23_imag.^2);
dCubic4 = C22.*(C13_real.^2+C13_imag.^2);
dCubic5 = C33.*(C12_real.^2+C12_imag.^2);
dCubic = dCubic1+dCubic2-dCubic3-dCubic4-dCubic5;
%// move this down to one block for looping:
%//rCubic = roots([aCubic bCubic cCubic dCubic]);
%//sel3 = rCubic == real(rCubic);
%//rCubic = rCubic(sel3);
%//sel4 = rCubic == abs(rCubic);
%//rCubic = rCubic(sel4);
%//Fv3 = max(rCubic);
Fv_vec = zeros(size(theta));
for k=1:numel(theta)
rQuadratic = roots([aQuadratic(k) bQuadratic(k) cQuadratic(k)]);
%//sel1 = rQuadratic == real(rQuadratic);
%//rQuadratic = rQuadratic(sel1);
%//sel2 = rQuadratic == abs(rQuadratic);
%//rQuadratic = rQuadratic(sel2);
%// suggestion instead: (with optional tol=1e-10 or something else earlier)
rQuadratic = rQuadratic(abs(rQuadratic-abs(rQuadratic))<1e-10); % allow some tolerance due to floating-point
Fv2 = max(rQuadratic);
rCubic = roots([aCubic(k) bCubic(k) cCubic(k) dCubic(k)]);
%//sel3 = rCubic == real(rCubic);
%//rCubic = rCubic(sel3);
%//sel4 = rCubic == abs(rCubic);
%//rCubic = rCubic(sel4);
rCubic = rCubic(abs(rCubic-abs(rCubic))<1e-10);
Fv3 = max(rCubic);
Fv_vec(k) = min([Fv1(k); Fv2; Fv3]); %// Fv1 is a matrix!
end
end
Since you didn't give any sample inputs, I haven't try to come up with one, so I haven't tested it.
What I've done is change every arithmetic operation to their vectorized pairs (.*
,./
,.^
), and restructure the code to use a single loop where the roots
are being computed. The input theta
and sigma
can be matrices now, corresponding to pairs of the input vectors (constructed with meshgrid
). My solution assumes that all the input parameters are scalars. I've also changed the part for finding the largest absolute value, allowing for some small errors due to floating-point arithmetic.