I wrote the well-known spectral-norm algorithm in Fortran after I initially wrote (and optimized) it in MATLAB. The speedup after naive conversion to Fortran is at least 18X, but the problem is that the output of the Fortran program is not accurate. The correct output should be 1.274224153
but my Fortran program outputs 1.273722712
, what am I doing wrong in Fortran?
program perf_spectralnorm
implicit none
integer, parameter :: n = 5500, dp = kind(0.d0)
real(dp) :: u(n) = 1, v(n), w(n), vBv, vv, res
integer :: i, j, nvec(n)
nvec = [(i, i=1,n)]
do i = 1,10
call Au(w, u) ! change w
call Atu(v, w) ! change v
call Au(w, v) ! change w
call Atu(u, w) ! change u
end do
vBv = dot_product(u, v)
vv = dot_product(v, v)
res = sqrt(vBv/vv)
print '(f12.9)', res
contains
elemental real(dp) function A(i, j)
integer, intent(in) :: i, j
A = 1.0_dp / ((i+j) * (i+j+1.0_dP)/2 + i + 1)
end
subroutine Au(w, u)
real(dp) :: w(:), u(:)
do i = 1,n
w(i) = dot_product(A(i-1,nvec-1) , u)
end do
end
subroutine Atu(v, w)
real(dp) :: v(:), w(:)
do i = 1,n
v(i) = dot_product(A(nvec-1,i-1) , w)
end do
end
end program perf_spectralnorm
My original implementation in MATLAB with correct output is as follows:
n = 5500;
fprintf("%.9f\n", perf_spectralnorm(n))
function res = A(i,j)
res = 1 ./ ((i+j) .* (i+j+1)/2 + i + 1);
end
function w = Au(u,w)
n = length(u);
j = 1:n;
for i = 1:n
w(i) = dot( A(i-1,j-1), u );
end
end
function v = Atu(w,v)
n = length(w);
j = 1:n;
for i = 1:n
v(i) = dot( A(j-1,i-1), w );
end
end
function res = perf_spectralnorm(n)
u = ones(n,1);
v = zeros(n,1);
w = zeros(n,1);
for i = 1:10
w = Au(u,w);
v = Atu(w,v);
w = Au(v,w);
u = Atu(w,u);
end
vBv = dot(u,v);
vv = dot(v,v);
res = sqrt(vBv/vv);
end
Subroutines Au
and Atu
use a variable i
for the do-loop through host association. This modified the do-loop variable i
in main program, which is invalid. To solve the issue, you need to declare i
as a local variable in Au
and Atu
. For example,
subroutine Au(w, u)
real(dp), intent(out) :: w(:)
real(dp), intent(in) :: u(:)
integer i
do i = 1, n
w(i) = dot_product(A(nvec-1,i-1), u)
end do
end
Note, I've taken the liberty to also include the INTENT
of the dummy arguments.