vDSP FFT2d Swift wrong imaginary part on the result

I'm using vDSP from Accelerate framework to perform a fft2d operation in a 2d array that comes from a mesh grid.

The problem is that I get an array of 0 in the imaginary part that don't match with the same operation in python using pylab.fft2.

If I increment the array size, the results don't be zero but don't match anyway so, I'm doing something bad.

Can someone let me a hand? This is my first stack overflow question but I'm stuck for two weeks now.

This is the mesh grid (4x8 for this example)

[
    [1.80485138784544e-35, 2.61027906966774e-23, 1.26641655490943e-14, 2.06115362243857e-09, 1.1253517471926e-07, 2.06115362243857e-09, 1.26641655490943e-14, 2.61027906966774e-23],
    [2.93748211171084e-30, 4.24835425529162e-18, 2.06115362243857e-09, 0.000335462627902512, 0.0183156388887342, 0.000335462627902512, 2.06115362243857e-09, 4.24835425529162e-18],
    [1.60381089054866e-28, 2.31952283024359e-16, 1.1253517471926e-07, 0.0183156388887342, 1.0, 0.0183156388887342, 1.1253517471926e-07, 2.31952283024359e-16],
    [2.93748211171084e-30, 4.24835425529162e-18, 2.06115362243857e-09, 0.000335462627902512, 0.0183156388887342, 0.000335462627902512, 2.06115362243857e-09, 4.24835425529162e-18]
]

Here is the fft2 function:

func fft2(arr: [[Complex<Double>]]) -> [[Complex<Double>]] {
    let nRows = arr.count
    let nCols = arr[0].count
    let N = nRows * nCols

    let radix = FFTRadix(FFT_RADIX2)
    let pass = vDSP_Length(Int(log2(Double(N))))

    // Create FFTSetup
    let setup = vDSP_create_fftsetupD(pass, radix)

    // Direction
    let dir = FFTDirection(FFT_FORWARD)

    // Get real and imag doubles from the [Complex]
    // (all imag parts are 0.0 on this example)
    let (real, imag) = complex2DArrayToDouble(arr)

    // Pack 2d arrays as 1d (function bellow)
    var realArray = pack2dArray(real, rows: nRows, cols: nCols)
    var imagArray = pack2dArray(imag, rows: nRows, cols: nCols)

    // Create the split complex with the packed arrays
    var splitComplex = DSPDoubleSplitComplex(
        realp: &realArray,
        imagp: &imagArray)

    let log2n0c = vDSP_Length(Int(log2(Double(nCols))))
    let log2n1r = vDSP_Length(Int(log2(Double(nRows))))

    let rowStride = vDSP_Stride(nRows)
    let colStride = vDSP_Stride(1) // Use all cols

    // Perform the fft2d
    vDSP_fft2d_zipD(setup, &splitComplex, rowStride, colStride, log2n0c, log2n1r, dir)

    // Destroy setup
    vDSP_destroy_fftsetupD(setup)

    // Pack the 1d arrays on 2d arrays again
    let resultReal = unpack2dArray(realArray, rows: nRows, cols: nCols)
    let resultImag = unpack2dArray(imagArray, rows: nRows, cols: nCols)

    // Ignore this...
    return complexFrom2DArray([[Double]](), imag: [[Double]]())
}

And finally here are the functions that I use for pack and unpack the arrays from/to 2d to 1d

func pack2dArray(arr: [[Double]], rows: Int, cols: Int) -> [Double] {
    var resultArray = zeros(rows * cols)
    for Iy in 0...cols-1 {
        for Ix in 0...rows-1 {
            let index = Iy * rows + Ix
            resultArray[index] = arr[Ix][Iy]
        }
    }
    return resultArray
}

func unpack2dArray(arr: [Double], rows: Int, cols: Int) -> [[Double]] {
    var resultArray = [[Double]](count: rows, repeatedValue: zeros(cols))
    for Iy in 0...cols-1 {
        for Ix in 0...rows-1 {
            let index = Iy * rows + Ix
            resultArray[Ix][Iy] = arr[index]
        }
    }
    return resultArray
}

I will appreciate any info about this and I can change this to C or Objective-C if is easiest to make it works like in python.

Swift results:

[
    [(1.07460475603902+0.0.i), (-1.06348244974363+0.0.i), (1.03663115699765+0.0.i), (-1.00978033088166+0.0.i), (0.998658491216246+0.0.i), (-1.00978033088166+0.0.i), (1.03663115699765+0.0.i), (-1.06348244974363+0.0.i)],
    [(-1.03663138619031+0.0.i), (1.02590210946989+0.0.i), (-0.999999662394501+0.0.i), (0.974097665459761+0.0.i), (-0.963368838879988+0.0.i), (0.974097665459761+0.0.i), (-0.999999662394501+0.0.i), (1.02590210946989+0.0.i)],
    [(0.998658482971633+0.0.i), (-0.988322230996495+0.0.i), (0.963368617931946+0.0.i), (-0.938415438518917+0.0.i), (0.928079620195301+0.0.i), (-0.938415438518917+0.0.i), (0.963368617931946+0.0.i), (-0.988322230996495+0.0.i)],
    [(-1.03663138619031+0.0.i), (1.02590210946989+0.0.i), (-0.999999662394501+0.0.i), (0.974097665459761+0.0.i), (-0.963368838879988+0.0.i), (0.974097665459761+0.0.i), (-0.999999662394501+0.0.i), (1.02590210946989+0.0.i)]
]

Python results:

[
    [ 1.07460476 +0.00000000e+00j, -1.06348245 +1.98409020e-17j, 1.03663116 +0.00000000e+00j -1.00978033 -1.97866921e-17j, 0.99865849 +0.00000000e+00j -1.00978033 -1.98409020e-17j, 1.03663116 +0.00000000e+00j -1.06348245 +1.97866921e-17j]
    [-1.03663139 +0.00000000e+00j, 1.02590211 -1.90819560e-17j, -0.99999966 +0.00000000e+00j, 0.97409767 +1.90819558e-17j, -0.96336884 +0.00000000e+00j, 0.97409767 +1.90819560e-17j, -0.99999966 +0.00000000e+00j, 1.02590211 -1.90819558e-17j]
    [ 0.99865848 +0.00000000e+00j, 0.98832223 +1.83230190e-17j, 0.96336862 +0.00000000e+00j, 0.93841544 -1.83772293e-17j, 0.92807962 +0.00000000e+00j, 0.93841544 -1.83230190e-17j, 0.96336862 +0.00000000e+00j, 0.98832223 +1.83772293e-17j]
    [-1.03663139 +0.00000000e+00j, 1.02590211 -1.90819560e-17j, -0.99999966 +0.00000000e+00j, 0.97409767 +1.90819558e-17j, -0.96336884 +0.00000000e+00j, 0.97409767 +1.90819560e-17j, -0.99999966 +0.00000000e+00j, 1.02590211 -1.90819558e-17j]
]

Regards and thanks so much in advance!!

Edit 1

Here is a version on C of the same code: http://pastebin.com/C9RPgu68

And here is the python code: http://pastebin.com/rr4e6rku

Solution

The different output such as

Swift:  (-1.06348244974363+0.0.i)
Python: -1.06348245 +1.98409020e-17j

does not indicate a wrong result. First, the Swift code apparently uses a fixed-point representation, so that 1.98409020 ⋅ 10^-17 is rounded to 0.0. Second, even if you expect the result to be exactly zero, a small non-zero value is to be expected due to the limited precision of binary floating point numbers (about 16 decimal digits for a 64-bit Double).