I'am trying to learn assembly. I disassembled a simple C program and debugged it with gdb. But one thing I've noticed is that values are moved very frequent.
0x0000555555555231 <+0>: push rbp
0x0000555555555232 <+1>: mov rbp,rsp
0x0000555555555235 <+4>: sub rsp,0x20
0x0000555555555239 <+8>: mov QWORD PTR [rbp-0x18],rdi
0x000055555555523d <+12>: mov QWORD PTR [rbp-0x20],rsi
0x0000555555555241 <+16>: mov rax,QWORD PTR [rbp-0x18]
0x0000555555555245 <+20>: mov rdi,rax
0x0000555555555248 <+23>: call 0x5555555551d9 <get_file_size>
rdi's value gets moved to the stack at rbp-0x18 (at <+8>), rsi's value to the stack at rbp-0x20 (+12). Then the value at rbp-0x18 gets moved to rax(+16) which will be moved to rdi again(+20). Why is this done? Why not just using rdi or at least moving rbp-0x18 to rdi directly instead via rax (at <+16>)? This could save the instruction at +20
The compilation without any optimisations enabled does not have to much sense if you want to learn some assembler from it.
Here you have an example how the optimizations can change the generated code.
char *foo(char *dest, const char *src, size_t len)
{
char *savedDest = dest;
size_t index = 0;
if(len)
{
while(--len && (dest[index] = src[index]));
index++;
*dest = 0;
}
return savedDest;
}
No optimizations
foo:
push rbp
mov rbp, rsp
mov QWORD PTR [rbp-24], rdi
mov QWORD PTR [rbp-32], rsi
mov QWORD PTR [rbp-40], rdx
mov rax, QWORD PTR [rbp-24]
mov QWORD PTR [rbp-8], rax
mov QWORD PTR [rbp-16], 0
cmp QWORD PTR [rbp-40], 0
je .L2
.L4:
sub QWORD PTR [rbp-40], 1
cmp QWORD PTR [rbp-40], 0
je .L3
mov rdx, QWORD PTR [rbp-32]
mov rax, QWORD PTR [rbp-16]
add rdx, rax
mov rcx, QWORD PTR [rbp-24]
mov rax, QWORD PTR [rbp-16]
add rax, rcx
movzx edx, BYTE PTR [rdx]
mov BYTE PTR [rax], dl
movzx eax, BYTE PTR [rax]
test al, al
jne .L4
.L3:
add QWORD PTR [rbp-16], 1
mov rax, QWORD PTR [rbp-24]
mov BYTE PTR [rax], 0
.L2:
mov rax, QWORD PTR [rbp-8]
pop rbp
ret
and optimize of size
foo:
mov rax, rdi
test rdx, rdx
je .L2
.L4:
dec rdx
je .L3
mov cl, BYTE PTR [rsi]
mov BYTE PTR [rax], cl
test cl, cl
jne .L4
.L3:
mov BYTE PTR [rax], 0
.L2:
ret