Search code examples
c#masm

Error exit in C# when use MASM lib


I with my friend have to write program in C# which count Newton's iterations and use to it special ASM lib written by my friend. Library works because we run this asm code in visual in c main function and we got all points to draw Newton's fractal. But when I want run function in C# I got results in table and when I want write this results to file program exit with code -1073740791 (0xc0000409) and in file I have for example 10000 points or 20000 points (from 170476), when I want write number from 170076 to 170476 it works.. I do not know why it happens. Some problems with memory or stack?

Dll import:

 [DllImport(@"Dll_ASM.dll", CallingConvention = CallingConvention.Cdecl)]
        static extern void countPointsInAsm(double[] PolynomialCoefficient, double[] fromIntervals, double[] toIntervals, double[] TableOfPixels);

In any CallingConvention is the same thing.

Here is the algorithm:
in C:

#define PIXELSWIDTH 436
#define PIXELSHEIGHT 391
#define TABLELENGTH 170476

typedef struct
{
    double real;
    double imaginary;
} Complex;

Complex Add(Complex a, Complex b)
{
    Complex result_complex;
    result_complex.real = a.real + b.real;
    result_complex.imaginary = a.imaginary + b.imaginary;
    return result_complex;
}

Complex AddDouble(double a, Complex b)
{
    Complex result_complex;
    result_complex.real = a + b.real;
    result_complex.imaginary = b.imaginary;
    return result_complex;
}

Complex Sub(Complex a, Complex b)
{
    Complex result_complex;
    result_complex.real = a.real - b.real;
    result_complex.imaginary = a.imaginary - b.imaginary;
    return result_complex;
}

Complex Mul(Complex a, Complex b)
{
    Complex result_complex;
    result_complex.real = a.real*b.real - a.imaginary*b.imaginary;
    result_complex.imaginary = a.real*b.imaginary + a.imaginary*b.real;
    return result_complex;
}

Complex Div(Complex a, Complex b)
{
    Complex result_complex;
    result_complex.real = (a.real*b.real + a.imaginary*b.imaginary) / (b.real*b.real + b.imaginary*b.imaginary);
    result_complex.imaginary = (a.imaginary*b.real - a.real*b.imaginary) / (b.real*b.real + b.imaginary*b.imaginary);
    return result_complex;
}

double Abs(Complex z)
{
    double result = sqrt((z.real*z.real) + (z.imaginary*z.imaginary));
    return result;
}

int* CountPointsInC(double PolynomialCoefficients[10], double Intervals[2][2])
{
    int counter = 0;
    int iterations = 0;
    int max_iterations = 1000;
    double DerivativeCoefficients[9];
    Complex z;
    Complex zp;
    Complex polynomial_value;
    Complex derivative_value;
    int* result_table = (int*)malloc(sizeof(int)*TABLELENGTH);
    for (int j = 0; j < PIXELSHEIGHT; j++)
    {
        for (int k = 0; k < PIXELSWIDTH; k++)
        {
            iterations = 0;
            z.real = (Intervals[0][0] + k*((Intervals[1][0] - Intervals[0][0]) / 436));
            z.imaginary = (Intervals[0][1] + j*((Intervals[1][1] - Intervals[0][1]) / 391));
            for (int i = 0; i < 9; i++)
            {
                DerivativeCoefficients[i] = PolynomialCoefficients[i + 1] * (i + 1);
            };

            do
            {
                polynomial_value.real = PolynomialCoefficients[9];
                polynomial_value.imaginary = 0;
                derivative_value.real = DerivativeCoefficients[8];
                derivative_value.imaginary = 0;
                for (int i = 8; i >= 0; i--)
                {
                    polynomial_value = AddDouble(PolynomialCoefficients[i], Mul(polynomial_value, z));
                }
                for (int i = 7; i >= 0; i--)
                {
                    derivative_value = AddDouble(DerivativeCoefficients[i], Mul(derivative_value, z));
                }
                iterations += 1;
                zp = z;
                z = Sub(z, (Div(polynomial_value, derivative_value)));
            } while ((Abs(Sub(z, zp)) >= 0.01) && (iterations < max_iterations));
            result_table[counter] = iterations;
            counter++;
        }
    }
    return result_table;
}

A similar in assembly language:

.data
    aReal REAL8 ?
    bReal REAL8 ?
    realResult REAL8 ?
    aImaginary REAL8 ?
    bImaginary REAL8 ?
    imaginaryResult REAL8 ?
    doubleNumber REAL8 ?
    sqrtResult REAL8 ?
    zReal REAL8 ?
    zImaginary REAL8 ?
    zpReal REAL8 ?
    zpImaginary REAL8 ?
    polynomialValueReal REAL8 ?
    polynomialValueImaginary REAL8 ?
    derivativeValueReal REAL8 ?
    derivativeValueImaginary REAL8 ?
    counter REAL4 0.0
    iterations REAL4 1.0
    maxIterations REAL4 1000.0
    k dq 0
    j dq 0
    whileCondition REAL8 0.01
    derivativeCoefficients REAL8 9 dup(?)
.code

addComplex PROC
    ; adding real part
    fld aReal ; load aReal into st0
    fadd bReal ; aReal + bReal into st0
    fstp realResult ; store into realResult
    ; adding imaginary part
    fld aImaginary ; load aImaginary into st0
    fadd bImaginary ; aImaginary + bImaginary into st0
    fstp imaginaryResult ; store into imaginaryResult
addComplex ENDP

addDouble PROC
    ; adding real part
    fld aReal ; load aReal into st0
    fadd doubleNumber ; aReal + doubleNumber into st0
    fstp realResult ; store into realResult
    ; adding imaginary part
    fld aImaginary ; load aImaginary into st0
    fstp imaginaryResult ; store into imaginaryResult
    ret
addDouble ENDP

subComplex PROC
    ; subtracting real part
    fld aReal ; load aReal into st0
    fsub bReal ; aReal - bReal into st0
    fstp realResult ; store into realResult
    ; subtracting imaginary part
    fld aImaginary ; load aImaginary into st0
    fsub bImaginary ; aImaginary - bImaginary into st0
    fstp imaginaryResult ; store into imaginaryResult
    ret
subComplex ENDP

mulComplex PROC
    ; multing real part
    fld aReal ; load aReal into st0
    fmul bReal ; aReal * bReal into st0
    fld aImaginary ; load aImaginary into st0 and aReal * bReal into st1
    fmul bImaginary ; aImaginary * bImaginary into st0
    fsub st(1), st(0) ; aReal * bReal - aImaginary * bImaginary into st 0
    fxch st(1) ; swap st1 with st0
    fstp realResult ; store into realResult
    ; multing imaginary part
    fld aReal ; load aReal into st0
    fmul bImaginary ; aReal * bImaginary into st0
    fld aImaginary ; load aImaginary into st0 and aReal * bImaginary into st1
    fmul bReal ; aImaginary * bReal into st0
    fadd st(0), st(1) ; aReal * bImaginary + aImaginary * bReal into st 0
    fstp imaginaryResult ; store into imaginaryResult
    ret
mulComplex ENDP

divComplex PROC
    ; diving real part

    fld aReal ; load aReal into st0
    fmul bReal ; aReal * bReal into st0
    fld aImaginary ; load aImaginary into st0 and aReal * bReal into st1
    fmul bImaginary ; aImaginary * bImaginary into st0
    fadd st(0), st(1) ; aReal * bReal + aImaginary * bImaginary into st0

    fld bReal ; load bReal into st0
    fmul bReal ; bReal * bReal into st0
    fld bImaginary ; load bImaginary into st0
    fmul bImaginary ; bImaginary * bImaginary into st0
    fadd st(0), st(1) ; bReal * bReal + bImaginary * bImaginary into st0

    fdiv st(2), st(0) ; aReal * bReal + aImaginary * bImaginary / bReal * bReal + bImaginary * bImaginary into st0
    fxch st(2) ; swap st2 with st0
    fstp realResult ; store into realResult

    ; diving imaginary part
    fld aImaginary ; load aImaginary into st0
    fmul bReal ; aImaginary * bReal into st0
    fld aReal ; load aImaginary into st0 and aImaginary * bReal into st1
    fmul bImaginary ; aReal * bImaginary into st5
    fsub st(1), st(0) ; aImaginary * bReal - aReal * bImaginary into st1
    fxch st(1) ; swap st1 with st0

    fld bReal ; load bReal into st0
    fmul bReal ; bReal * bReal into st0
    fld bImaginary ; load bImaginary into st0
    fmul bImaginary ; bImaginary * bImaginary into st0
    fadd st(0), st(1) ; bReal * bReal + bImaginary * bImaginary into st0

    fdiv st(2), st(0) ; aImaginary * bReal - aReal * bImaginary / bReal * bReal + bImaginary * bImaginary into st2
    fxch st(2) ; swap st2 with st0
    fstp imaginaryResult ; store into imaginaryResult
    ret
divComplex ENDP

absComplex PROC
    fld aReal ; load aReal into st0
    fmul aReal ; aReal * aReal into st0
    fld aImaginary ; load aImaginary into st0 and aImaginary * bImaginary in3to st1
    fmul aImaginary ; aImaginary * aImaginary into st0
    fadd st(0), st(1) ; aReal * aReal + aImaginary * aImaginary into st0
    fsqrt ; compute square root and store to st0
    fstp sqrtResult ; store into sqrtResult
    ret
absComplex ENDP

countPointsInAsm PROC
    mov R13, RCX ; save pointer to PolynomialCoefficients table into R13
    mov R14, RDX ; save pointer to "fromTable" into R14
    mov R15, R8 ; save pointer to "toTable" into R15

    mov RBX, R13 ; pointer to PolynomialCoefficients to RBX
    mov RCX, 9 ; set loop counter
    lea RAX, derivativeCoefficients ; pointer to derivativeCoefficients to RAX
    xor RDI, RDI ; zero into RDI
    derivativeCoefficientsLoop:
        fld REAL8 ptr[RBX+8] ; load PolynomialCoefficients[i + 1]
        fmul iterations ; PolynomialCoefficients[i + 1] * (i + 1);
        fstp REAL8 ptr[RAX+RDI]
        mov R12, [RAX+RDI]
        fld iterations ; iterations to st(0)
        fld1 ; 1 to st(0) and iterations to st(1)
        faddp ; iterations + 1
        fstp iterations
        add RDI, 8
        add RBX, 8
    loop derivativeCoefficientsLoop 
    xor RDI, RDI ; zero into RDI

    mov RCX, 391 ; set outer loop counter
    mainOuterLoop:
        push RCX
        mov RCX, 436 ; set inner loop counter
        mainInnerLoop:

            fld REAL8 ptr[R15] ; load Interval[1][0]
            fsub REAL8 ptr[R14] ; Interval[1][0] - Interval[0][0]
            fmul k ; k*(Interval[1][0] - Interval[0][0])
            push k
            mov k, 436
            fdiv k ; k*(Interval[1][0] - Interval[0][0])/436
            pop k
            fadd REAL8 ptr[R14] ; Interval[0][0] + k*(Interval[1][0] - Interval[0][0])/436
            fstp zReal

            fld REAL8 ptr[R14+8] ; load Interval[1][1]
            fsub REAL8 ptr[R15+8] ; Interval[1][1] - Interval[0][1]
            fmul j ; j*(Interval[1][1] - Interval[0][1])
            push j
            mov j, 391
            fdiv j ; j*(Interval[1][1] - Interval[0][1])/391
            pop j
            fadd REAL8 ptr[R15+8] ; Interval[1][1] + j*(Interval[0][1] - Interval[1][1])/391
            fstp zImaginary

            mov iterations, 0 ; zero into iterations 

            doWhileLoop:
                fld REAL8 ptr[R13+72] ; load PolynomialCoefficients[9]
                fstp polynomialValueReal ; store into polynomialValueReal
                mov polynomialValueImaginary, 0 ; zero into polynomialValueImaginary

                lea RAX, derivativeCoefficients ; pointer to derivativeCoefficients to RAX
                fld REAL8 ptr[RAX+64] ; load derivativeCoefficients[8]
                fstp derivativeValueReal ; store into derivativeValueReal
                mov derivativeValueImaginary, 0 ; zero into derivativeValueImaginary

                push RCX
                mov RCX, 9 ; set polynomialValueLoop counter
                polynomialValueLoop:
                    fld polynomialValueReal ; load polynomialValueReal
                    fstp aReal ; store into aReal
                    fld polynomialValueImaginary ; load polynomialValueImaginary
                    fstp aImaginary ; store into aImaginary
                    fld zReal ; load zReal
                    fstp bReal ; store into bReal
                    fld zImaginary ; load zImaginary
                    fstp bImaginary ; store into bImaginary
                    call mulComplex ; Mul(polynomial_value, z)
                    fld realResult ; load realResult
                    fstp aReal ; store into aReal
                    fld imaginaryResult ; load imaginaryResult
                    fstp aImaginary ; store into aImaginary

                    mov RBX, RCX ; loop counter to RBX
                    dec RBX
                    imul RBX, 8 ; memory locations of PolynomialCoefficients[i-1]
                    fld REAL8 ptr[R13+RBX] ; load PolynomialCoefficients[i-1]
                    fstp doubleNumber ; store into aReal
                    call addDouble ; AddDouble(PolynomialCoefficients[i-1], Mul(polynomial_value, z))
                    fld realResult ; load realResult
                    fstp polynomialValueReal ; store into polynomialValueReal
                    fld imaginaryResult ; load imaginaryResult
                    fstp polynomialValueImaginary ; store into polynomialValueImaginary
                    finit
                DEC RCX
                CMP RCX, 0
                JNE polynomialValueLoop

                mov RCX, 8 ; set derivativeValueLoop counter
                derivativeValueLoop:
                    fld derivativeValueReal ; load derivativeValueReal
                    fstp aReal ; store into aReal
                    fld derivativeValueImaginary ; load derivativeValueImaginary
                    fstp aImaginary ; store into aImaginary
                    fld zReal ; load zReal
                    fstp bReal ; store into bReal
                    fld zImaginary ; load zImaginary
                    fstp bImaginary ; store into bImaginary
                    call mulComplex ; Mul(derivative_value, z)
                    fld realResult ; load realResult
                    fstp aReal ; store into aReal
                    fld imaginaryResult ; load imaginaryResult
                    fstp aImaginary ; store into aImaginary

                    mov RBX, RCX ; loop counter to RBX
                    dec RBX
                    imul RBX, 8 ; memory locations of DerivativeCoefficients[i-1]
                    fld REAL8 ptr[RAX+RBX] ; load DerivativeCoefficients[i-1]
                    fstp doubleNumber ; store into aReal
                    call addDouble ; AddDouble(DerivativeCoefficients[i-1], Mul(derivative_value, z))
                    fld realResult ; load realResult
                    fstp derivativeValueReal ; store into polynomialValueReal
                    fld imaginaryResult ; load imaginaryResult
                    fstp derivativeValueImaginary ; store into polynomialValueImaginary
                    finit
                DEC RCX
                CMP RCX, 0
                JNE derivativeValueLoop
                pop RCX

                fld1 ; load 1
                fadd iterations ; iterations + 1
                fstp iterations ; store into iterations

                fld zReal ; load zReal
                fstp zpReal ; store into zpReal
                fld zImaginary ; load zImaginary
                fstp zpImaginary ; store into zpImaginary

                fld polynomialValueReal ; load polynomialValueReal
                fstp aReal ; store into aReal
                fld polynomialValueImaginary ; load polynomialValueImaginary
                fstp aImaginary ; store into aImaginary
                fld derivativeValueReal ; load derivativeValueReal
                fstp bReal ; store into bReal
                fld derivativeValueImaginary ; load derivativeValueImaginary
                fstp bImaginary ; store into bImaginary
                call divComplex ; Div(polynomial_value, derivative_value)
                fld realResult ; load realResult
                fstp bReal ; store into bReal
                fld imaginaryResult ; load imaginaryResult
                fstp bImaginary ; store into bImaginary
                fld zReal ; load zReal
                fstp aReal ; store into aReal
                fld zImaginary ; load zImaginary
                fstp aImaginary ; store into aImaginary
                call subComplex ; Sub(z, (Div(polynomial_value, derivative_value)))
                fld realResult ; load realResult
                fstp zReal ; store into zReal
                fld imaginaryResult ; load imaginaryResult
                fstp zImaginary ; store into zImaginary

                fld zReal ; load zReal
                fstp aReal ; store into aReal
                fld zImaginary ; load zImaginary
                fstp aImaginary ; store into aImaginary
                fld zpReal ; load zpReal
                fstp bReal ; store into bReal
                fld zpImaginary ; load zpImaginary
                fstp bImaginary ; store into bImaginary
                call subComplex ; Sub(z, zp)
                fld realResult ; load realResult
                fstp aReal ; store into aReal
                fld imaginaryResult ; load imaginaryResult
                fstp aImaginary ; store into aImaginary
                call absComplex ; (Abs(Sub(z, zp))
                finit

                fld sqrtResult ; load sqrtResult
                fcomp whileCondition ; compare sqrtResult with 0.01
                fstsw AX
                sahf
                jb toEnd; if sqrtResult < 0.01 end doWhileLoop

                fld iterations ; load iterations as int
                fcomp maxiterations ; compare iterations with maxiterations
                fstsw AX
                sahf
                jb doWhileLoop ; if iterations >= maxiterations end doWhileLoop
            toEnd:
            fld iterations ; load iterations
            mov RAX, R9 ; pointer to resultTable to RAX
            fstp REAL8 ptr[RAX+RDI] ; add iterations to resultTable: result_table[counter] = iterations
            add RDI, 8

            fld1 ; load 1
            fadd counter ; counter + 1
            fstp counter ; store into counter

            inc k
        dec RCX
        cmp RCX, 0
        JNE mainInnerLoop

        inc j
        mov k, 0
        pop RCX
    dec RCX
    cmp RCX, 0
    JNE mainOuterLoop
    ret
countPointsInAsm ENDP
end

I think I cannot reduce the code size. We had problem with FPU stack earlier because registers was overflow but using clear: 'finit' helped.


Solution

  • Each calling convention requires that you must push-pop CPU registers you are going to change in your function. Do you do it? I can't see push instructions at the start of your asm code and pop instructions at the end of it. You can safely use eax/rax and a few others, but not all of them. Refer to documentation of the C calling convention to see the complete list.

    Update: I found this MSDN article about the registers in 64bit code. You have to manually push-pop all registers marked as callee saved. https://msdn.microsoft.com/en-us/library/6t169e9c.aspx