Could you please explain low performance of FFI.cast in a following snippet?
prof = require 'profile' local ffi = require("ffi") ffi.cdef[[ struct message { int field_a; }; ]] function cast_test1() bytes = ffi.new("char[100000000]") sum = 0 t1 = prof.rdtsc() for i=1,1000000 do sum = sum + i end t2 = prof.rdtsc() print("test1", tonumber(t2-t1)) end function cast_test2() bytes = ffi.new("char[100000000]") sum = 0 t1 = prof.rdtsc() for i=1,1000000 do sum = sum + i msg = ffi.cast("struct message *", bytes+ i * 16) -- msg.field_a = i end t2 = prof.rdtsc() print("test2", tonumber(t2-t1)) end cast_test1() cast_test2()
Looks like the loop with the cast runs about 30 times slower. Any ideas how to overcome this?
% luajit -v cast_tests.lua LuaJIT 2.0.3 -- Copyright (C) 2005-2014 Mike Pall. http://luajit.org/ test1 3227528 test2 94474000
Looks like the global msg variable was the main culprit. Replacing it with local gives 20x speedup :)
It's relevant both for lualit-2.0.3 and lualit-2.1
function cast_test3() local bytes = ffi.new("char[100000000]") local sum = 0 local t1 = prof.rdtsc() for i=1,1000000 do sum = sum + i local msg = ffi.cast("struct message *", bytes+ i * 4) msg.field_a = i end local t2 = prof.rdtsc() local sum2 = 0 for i=1,1000000 do local msg = ffi.cast("struct message *", bytes+ i * 4) sum2 = sum2 + msg.field_a end local t3 = prof.rdtsc() print(sum, sum2) print("test3", tonumber(t2-t1), tonumber(t3-t2)) end cast_test3()
Results:
% /usr/bin/luajit -v cast_tests.lua ~/Projects/lua_tests/lua_rdtsc LuaJIT 2.0.3 -- Copyright (C) 2005-2014 Mike Pall. http://luajit.org/ 500000500000 500000500000 test3 4502508 4850884