Search code examples
c++avxavx2mandelbrot

AVX calculation precision


I wrote a program to display the mandelbrot set. To speed it up, I used AVX (really AVX2) instructions through the <immintrin.h> header.
The problem is: The result of the AVX computation (with double precision) has artifacts, and it differs to the result when computed using "normal" doubles.
In detail, there is a function getIterationCount which calculates the number of iterations until the mandelbrot sequence exceeds 4, or assumes the point is included in the set if the sequences does not exceed 4 during the first N steps.
The code looks like this:

#include "stdafx.h"
#include <iostream>
#include <complex>
#include <immintrin.h>

class MandelbrotSet {
public:
    int getIterationCount(const std::complex<double>, const int) const noexcept;
    __m256i getIterationCount(__m256d cReal, __m256d cIm, unsigned maxIterations) const noexcept;
};

inline int MandelbrotSet::getIterationCount(const std::complex<double> c, const int maxIterations) const noexcept
{
    double currentReal = 0;
    double currentIm = 0;
    double realSquare;
    double imSquare;
    for (int i = 0; i < maxIterations; ++i) {
        realSquare = currentReal * currentReal;
        imSquare = currentIm * currentIm;
        currentIm = 2 * currentReal * currentIm + c.imag();
        currentReal = realSquare - imSquare + c.real();
        if (realSquare + imSquare >= 4) {
            return i;
        }
    }
    return -1;
}

const __m256i negone = _mm256_set_epi64x(-1, -1, -1, -1);
const __m256i one = _mm256_set_epi64x(1, 1, 1, 1);
const __m256d two = _mm256_set_pd(2, 2, 2, 2);
const __m256d four = _mm256_set_pd(4, 4, 4, 4);

//calculates for i = 0,1,2,3
//output[i] = if ctrl[i] == 0b11...1 then onTrue[i] else onFalse[i]
inline __m256i _mm256_select_si256(__m256i onTrue, __m256i onFalse, __m256i ctrl) {
    return _mm256_or_si256(_mm256_and_si256(onTrue, ctrl), _mm256_and_si256(onFalse, _mm256_xor_si256(negone, ctrl)));
}

inline __m256i MandelbrotSet::getIterationCount(__m256d cReal, __m256d cIm, unsigned maxIterations) const noexcept {
    __m256i result = _mm256_set_epi64x(0, 0, 0, 0);
    __m256d currentReal = _mm256_set_pd(0, 0, 0, 0);
    __m256d currentIm = _mm256_set_pd(0, 0, 0, 0);
    __m256d realSquare;
    __m256d imSquare;
    for (unsigned i = 0; i <= maxIterations; ++i)
    {
        realSquare = _mm256_mul_pd(currentReal, currentReal);
        imSquare = _mm256_mul_pd(currentIm, currentIm);

        currentIm = _mm256_mul_pd(currentIm, two);
        currentIm = _mm256_fmadd_pd(currentIm, currentReal, cIm);

        currentReal = _mm256_sub_pd(realSquare, imSquare);
        currentReal = _mm256_add_pd(currentReal, cReal);

        __m256i isSmaller = _mm256_castpd_si256(_mm256_cmp_pd(_mm256_add_pd(realSquare, imSquare), four, _CMP_LE_OS));
        result = _mm256_select_si256(_mm256_add_epi64(one, result), result, isSmaller);

        //if (i % 10 == 0 && !isSmaller.m256i_i64[0] && !isSmaller.m256i_i64[1] && !isSmaller.m256i_i64[2] && !isSmaller.m256i_i64[3]) return result;
    }
    return result;
}

using namespace std;

int main() {
    MandelbrotSet m;
    std::complex<double> point(-0.14203954214360026, 1);

    __m256i result_avx = m.getIterationCount(_mm256_set_pd(-0.14203954214360026, -0.13995837669094691, -0.13787721123829355, -0.13579604578563975),
        _mm256_set_pd(1, 1, 1, 1), 2681);

    int result_normal = m.getIterationCount(point, 2681);
    cout << "Normal: " << result_normal << ", AVX: " << result_avx.m256i_i64[0] << ", at point " << point << endl;
    return 0;
}

When I run this code, I get the following result: (The point -0.14203954214360026 + i is chosen intentionally, because both methods return the same/almost the same value in most points)

Normal: 13, AVX: 20, at point (-0.14204,1)

A difference of 1 might be acceptable, but a difference of 7 seems quite big, since both methods use double precision.
Have AVX instructions a lower precision than "normal" instruction? If not, why do both results differ so much?
I use MS Visual Studio 2017, MS Visual C++ 2017 15.6 v14.13 141 and my computer has a i7-7700K Processor. The Project is compiled for x64. The result is the same if it is compiler with no or full optimization.
The rendered results look like this:
AVX:
AVX Normal Normal

The values of realSquare and imSquare during the loop are as follows:

0, 0, 0
1, 0.0201752, 1
2, 1.25858, 0.512543
3, 0.364813, 0.367639
4, 0.0209861, 0.0715851
5, 0.0371096, 0.850972
6, 0.913748, 0.415495
7, 0.126888, 0.0539759
8, 0.00477863, 0.696364
9, 0.69493, 0.782567
10, 0.0527514, 0.225526
11, 0.0991077, 1.48388
12, 2.33115, 0.0542994
13, 4.5574, 0.0831971

In the AVX loop the values are:

0, 0, 0
1, 0.0184406, 1
2, 1.24848, 0.530578
3, 0.338851, 0.394109
4, 0.0365017, 0.0724287
5, 0.0294888, 0.804905
6, 0.830307, 0.478687
7, 0.04658, 0.0680608
8, 0.024736, 0.78746
9, 0.807339, 0.519651
10, 0.0230712, 0.0872787
11, 0.0400014, 0.828561
12, 0.854433, 0.404359
13, 0.0987707, 0.0308286
14, 0.00460416, 0.791455
15, 0.851277, 0.773114
16, 0.00332154, 0.387519
17, 0.270393, 1.14866
18, 1.02832, 0.0131355
19, 0.773319, 1.51892
20, 0.776852, 10.0336

Solution

  • Reversing the order of the arguments passed to _mm256_set_pd solves the problem.

    If you inspect the value of cReal in the debugger you'll see that the first element is set to -0.13579604578563975 not -0.14203954214360026.