Search code examples
calgorithmx86divisioninteger-arithmetic

implementing school-like division on 32bit chunks on x86


Say i got two big numbers (defined below), and i want to implement division on them by falling back to x86 avaliable arithmetic

0008768376 - 1653656387 - 0437673667 - 0123767614 - 1039873878 - 2231712290 / 0038768167 - 3276287672 - 1665265628

C=A/B

Those numbers are stored as vectors of 32 bit unsigned ints. First one, A, is 6-unsigned-int vector, B is 3-unsigned-int long vector [each of this field i name generalised 'digit' or 'field' on my own]

The resulting C will be some 3-unsigned-int vector but to count it i need to fall back to some avaliable x86 (32 bit mode, though i could also hear about x64 too, but this is secondary) arithmetic

Tell me how to count at least first, most significant field of C resulting vector..

How to do that?


Solution

  • Here is unsigned long division pseudocode for any size operand from wikipedia:

    if D == 0 then
        error(DivisionByZeroException) end
    Q := 0                 -- initialize quotient and remainder to zero
    R := 0                     
    for i = n-1...0 do     -- where n is number of bits in N
      R := R << 1          -- left-shift R by 1 bit
      R(0) := N(i)         -- set the least-significant bit of R equal to bit i of the numerator
      if R >= D then
        R := R - D
        Q(i) := 1
      end
    end
    

    This can be extended to include signed division as well (rounding the result toward zero, not negative infinity):

    if D == 0 then
        error(DivisionByZeroException) end
    Q := 0                 -- initialize quotient and remainder to zero
    R := 0  
    SaveN := N             -- save numerator
    SaveD := D             -- save denominator
    if N < 0 then N = -N   -- invert numerator if negative
    if D < 0 then D = -D   -- invert denominator if negative
    for i = n-1...0 do     -- where n is number of bits in N
      R := R << 1          -- left-shift R by 1 bit
      R(0) := N(i)         -- set the least-significant bit of R equal to bit i of the numerator
      if R >= D then
        R := R - D
        Q(i) := 1
      end
    end
    if SaveN < 0 then
        R = -R             -- numerator was negative, negative remainder
    if (SaveN < 0 and SaveD >= 0) or (SaveN >= 0 and SaveD < 0) then
        Q = -Q             -- differing signs of inputs, result is negative
    

    Here is a relatively simple, unoptimized, untested implementation in x86 ASM (NASM syntax) that should be easy to understand:

            ; function div_192_96
            ; parameters:
            ;                 24 bytes: numerator, high words are stored after low words
            ;                 24 bytes: denominator, high words are stored after low words (only low 12 bytes are used)
            ;                  4 bytes: address to store 12 byte remainder in (must not be NULL)
            ;                  4 bytes: address to store 12 byte quotient in (must not be NULL)
            ; return value:   none
            ; error checking: none
    
            GLOBAL  div_192_96
    div_192_96:
            pushl   ebp             ; set up stack frame
            movl    ebp, esp
            pushl   0               ; high word of remainder
            pushl   0               ; middle word of remainder
            pushl   0               ; low word of remainder
            pushl   0               ; high word of quotient
            pushl   0               ; middle word of quotient
            pushl   0               ; low word of quotient
            movl    ecx, 96
    .div_loop:
            jecxz   .div_loop_done
            decl    ecx
            ; remainder = remainder << 1
            movl    eax, [ebp-8]    ; load middle word of remainder
            shld    [ebp-4], eax, 1 ; shift high word of remainder left by 1
            movl    eax, [ebp-12]   ; load low word of remainder
            shld    [ebp-8], eax, 1 ; shift middle word of remainder left by 1
            shll    [ebp-12], 1     ; shift low word of remainder left by 1
            ; quotient = quotient << 1
            movl    eax, [ebp-20]   ; load middle word of remainder
            shld    [ebp-16], eax, 1; shift high word of remainder left by 1
            movl    eax, [ebp-24]   ; load low word of remainder
            shld    [ebp-20], eax, 1; shift middle word of remainder left by 1
            shll    [ebp-24], 1     ; shift low word of remainder left by 1
            ; remainder(0) = numerator(127)
            movl    eax, [ebp+28]   ; load high word of numerator
            shrl    eax, 31         ; get top bit in bit 0
            orl     [ebp-12], eax   ; OR into low word of remainder
            ; numerator = numerator << 1
            movl    eax, [ebp+24]   ; load 5th word of numerator
            shld    [ebp+28], eax, 1; shift 6th word of numerator left by 1
            movl    eax, [ebp+20]   ; load 4th word of numerator
            shld    [ebp+24], eax, 1; shift 5th word of numerator left by 1
            movl    eax, [ebp+16]   ; load 3rd word of numerator
            shld    [ebp+20], eax, 1; shift 4th word of numerator left by 1
            movl    eax, [ebp+12]   ; load 2nd word of numerator
            shld    [ebp+16], eax, 1; shift 3rd word of numerator left by 1
            movl    eax, [ebp+8]    ; load 1st word of numerator
            shld    [ebp+12], eax, 1; shift 2nd word of numerator left by 1
            shll    [ebp+8], 1      ; shift 1st word of numerator left by 1
            ; if (remainder >= denominator)
            movl    eax, [ebp+40]   ; compare high word of denominator
            cmpl    eax, [ebp-4]    ; with high word of remainder
            jb      .div_loop
            ja      .div_subtract
            movl    eax, [ebp+36]   ; compare middle word of denominator
            cmpl    eax, [ebp-8]    ; with middle word of remainder
            jb      .div_loop
            ja      .div_subtract
            movl    eax, [ebp+32]   ; compare low word of denominator
            cmpl    eax, [ebp-12]   ; with low word of remainder
            jb      .div_loop
    .div_subtract:
            ; remainder = remainder - denominator
            movl    eax, [ebp+32]   ; load low word of denominator
            subl    [ebp-12], eax   ; and subtract from low word of remainder
            movl    eax, [ebp+36]   ; load middle word of denominator
            sbbl    [ebp-8], eax    ; and subtract from middle word of remainder (with borrow)
            movl    eax, [ebp+40]   ; load high word of denominator
            sbbl    [ebp-4], eax    ; and subtract from high word of remainder (with borrow)
            ; quotient(0) = 1
            orl     [ebp-24], 1     ; OR 1 into low word of quotient
            jmp     .div_loop
    .div_loop_done:
            movl    eax, [ebp+56]   ; load remainder storage pointer
            movl    edx, [ebp-12]   ; load low word of remainder
            movl    [eax+0], edx    ; store low word of remainder
            movl    edx, [ebp-8]    ; load middle word of remainder
            movl    [eax+4], edx    ; store middle word of remainder
            movl    edx, [ebp-4]    ; load high word of remainder
            movl    [eax+8], edx    ; store high word of remainder
            movl    eax, [ebp+60]   ; load quotient storage pointer
            movl    edx, [ebp-24]   ; load low word of quotient
            movl    [eax+0], edx    ; store low word of quotient
            movl    edx, [ebp-20]   ; load middle word of quotient
            movl    [eax+4], edx    ; store middle word of quotient
            movl    edx, [ebp-16]   ; load high word of quotient
            movl    [eax+8], edx    ; store high word of quotient
            addl    esp, 24
            popl    ebp
            ret
    

    Please note that this is just to give you a general idea, and has not been tested. BTW, it's probably easier to calculate the quotient of two numbers of equal size in assembly than to try to work around overflow issues (which are completely unhandled in above code).