The x264 lib is crashing on Windows as always. Now, I'm trying to fix it, and I don't understand one thing. In the code we can see only the function signature:
int x264_coeff_last64_sse2( dctcoef *dct );
But, there is no implementation in *.h, *.c or *.asm source. How is that possible???
(The function is called and it crashed, so it has to be linked in)
They are being a little tricky :) It's actually defined as a macro in quant.c
line 356 with my git checkout.
Here is the definition:
#define last(num)\
static int x264_coeff_last##num( dctcoef *l )\
{\
int i_last = num-1;\
while( i_last >= 0 && l[i_last] == 0 )\
i_last--;\
return i_last;\
}
EDIT : I think this is assembly code you were looking for are lines (1317-1363) in x264\common\x86\quant-a.asm
:
%ifndef ARCH_X86_64
cglobal coeff_last64, 1, 5-mmsize/16
pxor m2, m2
LAST_MASK 16, r2d, r0+SIZEOF_DCTCOEF* 32, r4d
LAST_MASK 16, r3d, r0+SIZEOF_DCTCOEF* 48, r4d
shl r3d, 16
or r2d, r3d
xor r2d, -1
jne .secondhalf
LAST_MASK 16, r1d, r0+SIZEOF_DCTCOEF* 0, r4d
LAST_MASK 16, r3d, r0+SIZEOF_DCTCOEF*16, r4d
shl r3d, 16
or r1d, r3d
not r1d
BSR eax, r1d, 0x1f
RET
.secondhalf:
BSR eax, r2d, 0x1f
add eax, 32
RET
%else
cglobal coeff_last64, 1,4
pxor m2, m2
LAST_MASK 16, r1d, r0+SIZEOF_DCTCOEF* 0
LAST_MASK 16, r2d, r0+SIZEOF_DCTCOEF*16
LAST_MASK 16, r3d, r0+SIZEOF_DCTCOEF*32
LAST_MASK 16, r0d, r0+SIZEOF_DCTCOEF*48
shl r2d, 16
shl r0d, 16
or r1d, r2d
or r3d, r0d
shl r3, 32
or r1, r3
not r1
BSR rax, r1, 0x3f
RET
%endif
%endmacro
%ifndef ARCH_X86_64
INIT_MMX mmx2
COEFF_LAST
%endif
INIT_XMM sse2
COEFF_LAST
INIT_XMM sse2, lzcnt
COEFF_LAST
Hope that helps!