Search code examples
performancematlabvectorizationnested-loopsmex

Is there any way to vectorize this matlab code?


I have the following code:

function [Ps,Pd,Pv] = Arii2010_Modified_1Pixel(C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
MeanOrientationAngleStep = 1/100;
StandardDeviationStep = 1/100;
NumberOfAngles = floor(2*pi/MeanOrientationAngleStep);
NumberOfStandandardDeviations = 1/StandardDeviationStep;
for i = 0:NumberOfAngles
    for j = 0:NumberOfStandandardDeviations
        theta = -pi+i*MeanOrientationAngleStep;
        sigma = j*StandardDeviationStep;
        Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
    end
end
end

function Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
coef1 = 1/2-(((1-2*sigma^2)*(1-sigma^2)*cos(2*theta))/(2*(1+sigma^2)));
coef2 = ((1-4*sigma^2)*(1-3*sigma^2)*(1-2*sigma^2)*(1-sigma^2)*cos(4*theta))/(8*(1+sigma^2)*(1+2*sigma^2)*(1+3*sigma^2));
coef3 = ((1-2*sigma^2)*(1-sigma^2)*sin(2*theta))/(2*sqrt(2)*(1+sigma^2));
coef4 = ((1-4*sigma^2)*(1-3*sigma^2)*(1-2*sigma^2)*(1-sigma^2)*sin(4*theta))/(4*sqrt(2)*(1+sigma^2)*(1+2*sigma^2)*(1+3*sigma^2));
Fv1 = C11/(coef1+coef2);
aQuadratic1 = -2*coef2*(coef1+coef2);
aQuadratic2 = (coef3-coef4)^2;
aQuadratic = aQuadratic1 - aQuadratic2;
bQuadratic1 = 2*C11*coef2-C22*(coef1+coef2);
bQuadratic2 = -2*C12_real*(coef3-coef4);
bQuadratic = bQuadratic1 - bQuadratic2;
cQuadratic1 = C11*C22;
cQuadratic2 = C12_real^2+C12_imag^2;
cQuadratic = cQuadratic1 - cQuadratic2;
rQuadratic = roots([aQuadratic bQuadratic cQuadratic]);
sel1 = rQuadratic == real(rQuadratic);
rQuadratic = rQuadratic(sel1);
sel2 = rQuadratic == abs(rQuadratic);
rQuadratic = rQuadratic(sel2);
Fv2 = max(rQuadratic);
aCubic1 = 2*coef2*(1-coef1+coef2)*(coef1+coef2);
aCubic2 = 2*coef2*(coef3-coef4)*(coef3+coef4);
aCubic3 = -(coef1+coef2)*(coef3+coef4)^2;
aCubic4 = 2*coef2^3;
aCubic5 = -(1-coef1+coef2)*(coef3-coef4)^2;
aCubic = aCubic1+aCubic2-aCubic3-aCubic4-aCubic5;
bCubic1 = -2*coef2*(C11*(1-coef1+coef2)+C33*(coef1+coef2))+C22*(1-coef1+coef2)*(coef1+coef2);
bCubic2 = -2*coef2*(C23_real*(coef3-coef4)+C12_real*(coef3+coef4))+2*C13_real*(coef3-coef4)*(coef3+coef4);
bCubic3 = 2*C23_real*(coef1+coef2)*(coef3+coef4)+C11*(coef3+coef4)^2;
bCubic4 = (4*C13_real+C22)*coef2^2;
bCubic5 = 2*C12_real*(1-coef1+coef2)*(coef3-coef4)+C33*(coef3-coef4)^2;
bCubic = bCubic1+bCubic2-bCubic3-bCubic4-bCubic5;
cCubic1 = 2*C11*C33*coef2-C11*C22*(1-coef1+coef2)-C22*C33*(coef1+coef2);
cCubic2 = 2*(C12_real*C23_real-C12_imag*C23_imag)*coef2-2*(C13_real*C23_real+C13_imag*C23_imag)*...
    (coef3-coef4)-2*(C12_real*C13_real+C12_imag*C13_imag)*(coef3+coef4);
cCubic3 = -(C23_real^2+C23_imag^2)*(coef1+coef2)-2*C11*C23_real*(coef3+coef4);
cCubic4 = 2*(C13_real^2+C13_imag^2+C13_real*C22)*coef2;
cCubic5 = -(C12_real^2+C12_imag^2)*(1-coef1+coef2)-2*C33*C12_real*(coef3-coef4);
cCubic = cCubic1+cCubic2-cCubic3-cCubic4-cCubic5;
dCubic1 = C11*C22*C33;
dCubic2 = 2*(C12_real*(C13_real*C23_real+C13_imag*C23_imag)+C12_imag*(C13_imag*C23_real-C13_real*C23_imag));
dCubic3 = C11*(C23_real^2+C23_imag^2);
dCubic4 = C22*(C13_real^2+C13_imag^2);
dCubic5 = C33*(C12_real^2+C12_imag^2);
dCubic = dCubic1+dCubic2-dCubic3-dCubic4-dCubic5;
rCubic = roots([aCubic bCubic cCubic dCubic]);
sel3 = rCubic == real(rCubic);
rCubic = rCubic(sel3);
sel4 = rCubic == abs(rCubic);
rCubic = rCubic(sel4);
Fv3 = max(rCubic);
Fv = min([Fv1 Fv2 Fv3]);
end  

I'm wondering if there's any function like bsxfun, blockproc or arrayfun to vectorize this part of the code?

for i = 0:NumberOfAngles
    for j = 0:NumberOfStandandardDeviations
        theta = -pi+i*MeanOrientationAngleStep;
        sigma = j*StandardDeviationStep;
        Fv = FindFv(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
    end
end

Solution

  • I've commented that you should probably either put the loops inside your vectorized worker function, or/and use compiled MEX code. Here's my take on the first approach:

    function [Ps,Pd,Pv] = Arii2010_Modified_1Pixel(C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
    MeanOrientationAngleStep = 1/100;
    StandardDeviationStep = 1/100;
    NumberOfAngles = floor(2*pi/MeanOrientationAngleStep);
    NumberOfStandandardDeviations = 1/StandardDeviationStep;
    
    %// set up vectorized input
    thetav = -pi + (0:NumberOfAngles)*MeanOrientationAngleStep;
    sigmav = (0:NumberOfStandandardDeviations)*StandardDeviationStep;
    
    %// grid for parameter pairs
    [theta, sigma] = meshgrid(thetav,sigmav);
    
    %// run vectorized function
    Fv_vec = FindFv_vec(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33);
    end
    
    
    function Fv_vec = FindFv_vec(sigma,theta,C11,C12_imag,C12_real,C13_imag,C13_real,C22,C23_imag,C23_real,C33)
    coef1 = 1./2-(((1-2.*sigma.^2).*(1-sigma.^2).*cos(2.*theta))./(2.*(1+sigma.^2)));
    coef2 = ((1-4.*sigma.^2).*(1-3.*sigma.^2).*(1-2.*sigma.^2).*(1-sigma.^2).*cos(4.*theta))./(8.*(1+sigma.^2).*(1+2.*sigma.^2).*(1+3.*sigma.^2));
    coef3 = ((1-2.*sigma.^2).*(1-sigma.^2).*sin(2.*theta))./(2.*sqrt(2).*(1+sigma.^2));
    coef4 = ((1-4.*sigma.^2).*(1-3.*sigma.^2).*(1-2.*sigma.^2).*(1-sigma.^2).*sin(4.*theta))./(4.*sqrt(2).*(1+sigma.^2).*(1+2.*sigma.^2).*(1+3.*sigma.^2));
    %// this is OK:
    Fv1 = C11./(coef1+coef2);
    
    aQuadratic1 = -2.*coef2.*(coef1+coef2);
    aQuadratic2 = (coef3-coef4).^2;
    aQuadratic = aQuadratic1 - aQuadratic2;
    bQuadratic1 = 2.*C11.*coef2-C22.*(coef1+coef2);
    bQuadratic2 = -2.*C12_real.*(coef3-coef4);
    bQuadratic = bQuadratic1 - bQuadratic2;
    cQuadratic1 = C11.*C22;
    cQuadratic2 = C12_real.^2+C12_imag.^2;
    cQuadratic = cQuadratic1 - cQuadratic2;
    %// move this down to one block for looping:
    %//rQuadratic = roots([aQuadratic bQuadratic cQuadratic]);
    %//sel1 = rQuadratic == real(rQuadratic);
    %//rQuadratic = rQuadratic(sel1);
    %//sel2 = rQuadratic == abs(rQuadratic);
    %//rQuadratic = rQuadratic(sel2);
    %//Fv2 = max(rQuadratic);
    
    aCubic1 = 2.*coef2.*(1-coef1+coef2).*(coef1+coef2);
    aCubic2 = 2.*coef2.*(coef3-coef4).*(coef3+coef4);
    aCubic3 = -(coef1+coef2).*(coef3+coef4).^2;
    aCubic4 = 2.*coef2.^3;
    aCubic5 = -(1-coef1+coef2).*(coef3-coef4).^2;
    aCubic = aCubic1+aCubic2-aCubic3-aCubic4-aCubic5;
    bCubic1 = -2.*coef2.*(C11.*(1-coef1+coef2)+C33.*(coef1+coef2))+C22.*(1-coef1+coef2).*(coef1+coef2);
    bCubic2 = -2.*coef2.*(C23_real.*(coef3-coef4)+C12_real.*(coef3+coef4))+2.*C13_real.*(coef3-coef4).*(coef3+coef4);
    bCubic3 = 2.*C23_real.*(coef1+coef2).*(coef3+coef4)+C11.*(coef3+coef4).^2;
    bCubic4 = (4.*C13_real+C22).*coef2.^2;
    bCubic5 = 2.*C12_real.*(1-coef1+coef2).*(coef3-coef4)+C33.*(coef3-coef4).^2;
    bCubic = bCubic1+bCubic2-bCubic3-bCubic4-bCubic5;
    cCubic1 = 2.*C11.*C33.*coef2-C11.*C22.*(1-coef1+coef2)-C22.*C33.*(coef1+coef2);
    cCubic2 = 2.*(C12_real.*C23_real-C12_imag.*C23_imag).*coef2-2.*(C13_real.*C23_real+C13_imag.*C23_imag).*...
        (coef3-coef4)-2.*(C12_real.*C13_real+C12_imag.*C13_imag).*(coef3+coef4);
    cCubic3 = -(C23_real.^2+C23_imag.^2).*(coef1+coef2)-2.*C11.*C23_real.*(coef3+coef4);
    cCubic4 = 2.*(C13_real.^2+C13_imag.^2+C13_real.*C22).*coef2;
    cCubic5 = -(C12_real.^2+C12_imag.^2).*(1-coef1+coef2)-2.*C33.*C12_real.*(coef3-coef4);
    cCubic = cCubic1+cCubic2-cCubic3-cCubic4-cCubic5;
    dCubic1 = C11.*C22.*C33;
    dCubic2 = 2.*(C12_real.*(C13_real.*C23_real+C13_imag.*C23_imag)+C12_imag.*(C13_imag.*C23_real-C13_real.*C23_imag));
    dCubic3 = C11.*(C23_real.^2+C23_imag.^2);
    dCubic4 = C22.*(C13_real.^2+C13_imag.^2);
    dCubic5 = C33.*(C12_real.^2+C12_imag.^2);
    dCubic = dCubic1+dCubic2-dCubic3-dCubic4-dCubic5;
    %// move this down to one block for looping:
    %//rCubic = roots([aCubic bCubic cCubic dCubic]);
    %//sel3 = rCubic == real(rCubic);
    %//rCubic = rCubic(sel3);
    %//sel4 = rCubic == abs(rCubic);
    %//rCubic = rCubic(sel4);
    %//Fv3 = max(rCubic);
    
    Fv_vec = zeros(size(theta));
    for k=1:numel(theta)
       rQuadratic = roots([aQuadratic(k) bQuadratic(k) cQuadratic(k)]);
       %//sel1 = rQuadratic == real(rQuadratic);
       %//rQuadratic = rQuadratic(sel1);
       %//sel2 = rQuadratic == abs(rQuadratic);
       %//rQuadratic = rQuadratic(sel2);
    
       %// suggestion instead: (with optional tol=1e-10 or something else earlier)
       rQuadratic = rQuadratic(abs(rQuadratic-abs(rQuadratic))<1e-10);  % allow some tolerance due to floating-point
       Fv2 = max(rQuadratic);
    
       rCubic = roots([aCubic(k) bCubic(k) cCubic(k) dCubic(k)]);
       %//sel3 = rCubic == real(rCubic);
       %//rCubic = rCubic(sel3);
       %//sel4 = rCubic == abs(rCubic);
       %//rCubic = rCubic(sel4);
       rCubic = rCubic(abs(rCubic-abs(rCubic))<1e-10);
       Fv3 = max(rCubic);
       Fv_vec(k) = min([Fv1(k); Fv2; Fv3]);  %// Fv1 is a matrix!
    end 
    
    end
    

    Since you didn't give any sample inputs, I haven't try to come up with one, so I haven't tested it.

    What I've done is change every arithmetic operation to their vectorized pairs (.*,./,.^), and restructure the code to use a single loop where the roots are being computed. The input theta and sigma can be matrices now, corresponding to pairs of the input vectors (constructed with meshgrid). My solution assumes that all the input parameters are scalars. I've also changed the part for finding the largest absolute value, allowing for some small errors due to floating-point arithmetic.