assembly reverse-engineering x86-16 checksum crc16

Debug checksum algorithm written in x86 16-bit assembly

I'm currently reverse engineering a software which computes a 2 bytes wide checksum for a given buffer of data. The code comes from a 16-bit DLL (NE format) and it was compiled with Borland C++. I suspect the checksum to be a CRC-16 with a poly of 0x8408 but I had no chance computing an identical CRC so I wonder if the implementation is "CRC16 standard" or not.

Here's the assembly implementation:

crc_cal proc    far

var_4= word ptr -4
arg_0= word ptr  6
arg_2= dword ptr  8

mov ax, seg dseg37
inc bp
push    bp
mov bp, sp
push    ds
mov ds, ax
sub sp, 2
push    si
push    di
xor cx, cx
mov dx, 0FFFFh
mov [bp+var_4], 8408h

loc_42646:
les bx, [bp+arg_2]
add bx, cx
mov al, es:[bx]
xor al, dl
mov dl, al
inc cx
xor di, di
jmp short loc_42672

loc_42657:
mov si, dx
dec si
mov ax, si
shr ax, 1
mov si, ax
mov ax, dx
shr ax, 1
mov dx, ax
cmp si, dx
jnz short loc_42671
mov ax, dx
xor ax, [bp+var_4]
mov dx, ax

loc_42671:
inc di

loc_42672:
cmp di, 8
jb  short loc_42657
cmp cx, [bp+arg_0]
jb  short loc_42646
mov ax, dx
not ax
mov dx, ax
les bx, [bp+arg_2]
add bx, cx
mov es:[bx], dl
inc cx
mov ax, dx
shr ax, 8
mov dx, ax
les bx, [bp+arg_2]
add bx, cx
mov es:[bx], dl
inc cx
pop di
pop si
pop cx
pop ds
pop bp
dec bp
retf
crc_cal endp

And some data with the associated CRC (last two bytes), as computed by the software:

|                           DATA                           |Inc|CRC|
|----------------------------------------------------------|---|---|
00 00 00 00 00 00 01 ef f7 fe ef ff fd ef fb fa fd a2 aa 21 01 f4 e0
00 00 00 00 00 00 01 ef f7 fd ef ff fd fe fb fa fd a2 aa 21 02 f4 d1
00 00 00 00 00 00 01 f7 fe fd fd ff fd df ff fb fd a2 aa 21 03 f4 cd
00 00 00 00 00 00 01 f7 fe fe fd ff f7 ef ff fa fd a2 aa 21 04 f4 c2
00 00 00 00 00 00 01 ef f7 fe ef ff fe ef fb fa fd a2 aa 21 05 f4 db
00 00 00 00 00 00 01 ef f7 fe ef ff fd ef fb fa fd a2 aa 21 06 f4 db

Solution

The data shown doesn't correspond to a crc, as noted in this prior answer:

Find used CRC-16 algorithm

The code is an overly complex implementation of a right shifting CRC (in dx), poly = 0x8408, initial value = 0xffff, xor out = 0xffff. Check the next 2 bytes after each line to see if that is where the CRC is appended.

Questions code with comments. Thanks to Ross Ridge for explaining the "inc bp" is used to indicate a far call was involved, in case the stack needs to be backwalked (the "dec bp" at the end is used to undo the "inc bp" at the start).

crc_cal proc    far

var_4   =       word ptr -4     ; used to store poly
arg_0   =       word ptr  6     ; number of bytes of data
arg_2   =       dword ptr 8     ; far pointer to data

        mov     ax, seg dseg37  ; for ds that is never used
        inc     bp              ; bp += 1, (bp&1 == far call indicator)
        push    bp              ; save bp+1
        mov     bp, sp          ; bp = sp, base for the equated offsets
        push    ds              ; save ds
        mov     ds, ax          ; ds = dseg37  (never used)
        sub     sp, 2           ; allocate space for poly (var_4)
        push    si              ; save si, di
        push    di
        xor     cx, cx          ; cx = offset to data
        mov     dx, 0FFFFh      ; dx = initial crc
        mov     [bp+var_4], 8408h ;store poly

loc_42646:
        les     bx, [bp+arg_2]  ; al = next byte of data
        add     bx, cx
        mov     al, es:[bx]
        xor     al, dl          ; crclo ^= data
        mov     dl, al
        inc     cx              ; increment offset to data
        xor     di, di          ; di = bit counter (0 to 7)
        jmp     short loc_42672

loc_42657:
        mov     si, dx          ; si = (crc-1)>>1
        dec     si              ;  if lsb was 0, then
        mov     ax, si          ;  si != dx later on
        shr     ax, 1
        mov     si, ax
        mov     ax, dx          ; dx = (crc)>>1
        shr     ax, 1
        mov     dx, ax
        cmp     si, dx          ; br if prior lsb of crc was 0
        jnz     short loc_42671
        mov     ax, dx          ; crc ^= 0x8408
        xor     ax, [bp+var_4]
        mov     dx, ax

loc_42671:
        inc     di              ; increment bit counter

loc_42672:
        cmp     di, 8           ; loop till byte done
        jb      short loc_42657
        cmp     cx, [bp+arg_0]  ; loop till all bytes done
        jb      short loc_42646
        mov     ax, dx          ; dx = ~ crc
        not     ax
        mov     dx, ax
        les     bx, [bp+arg_2]  ; append crc to data, lsbyte first
        add     bx, cx
        mov     es:[bx], dl
        inc     cx
        mov     ax, dx
        shr     ax, 8
        mov     dx, ax
        les     bx, [bp+arg_2]
        add     bx, cx
        mov     es:[bx], dl
        inc     cx              ; useless, cx gets overwritten below
        pop     di              ; restore di, si
        pop     si
        pop     cx              ; cx = poly
        pop     ds              ; restore ds, bp
        pop     bp
        dec     bp              ; bp -= 1 (undo inc bp from above)
        retf
crc_cal endp