I am trying to read a piece of Rust assembly code, but actually, it's harder to read than the ASM code generated by the C/C++ compiler. So, how to analyze the ASM code of the below piece of Rust code?
fn main() {
let closure = |x| println!("{}", x);
let x: fn(x: i32) -> () = closure;
println!("{}", x as i32);
}
The corresponding assembly code like below with some comments (I only pasted the main part, for full version please use this Permalink: https://play.rust-lang.org/?version=nightly&mode=release&edition=2018&gist=e7ba4844f1ce6e881912dc074152988d):
playground::main: # @playground::main
# %bb.0:
subq $72, %rsp
leaq core::ops::function::FnOnce::call_once(%rip), %rax
movl %eax, 4(%rsp)
leaq 4(%rsp), %rax
movq %rax, 8(%rsp)
movq core::fmt::num::imp::<impl core::fmt::Display for i32>::fmt@GOTPCREL(%rip), %rax
movq %rax, 16(%rsp)
leaq .L__unnamed_2(%rip), %rax # the contents of rdx come from .L__unnamed_2(%rip), how to evaluate this part?
movq %rax, 24(%rsp) # the contents of rdi come from rax.
movq $2, 32(%rsp)
movq $0, 40(%rsp)
leaq 8(%rsp), %rax
movq %rax, 56(%rsp)
movq $1, 64(%rsp)
leaq 24(%rsp), %rdi # rdi should be the register holding the value passed to println!.
callq *std::io::stdio::_print@GOTPCREL(%rip)
addq $72, %rsp
retq
# -- End function
main: # @main
# %bb.0:
subq $8, %rsp
movq %rsi, %rcx
movslq %edi, %rdx
leaq playground::main(%rip), %rax
movq %rax, (%rsp)
leaq .L__unnamed_1(%rip), %rsi
movq %rsp, %rdi
callq *std::rt::lang_start_internal@GOTPCREL(%rip)
# kill: def $eax killed $eax killed $rax
popq %rcx
retq
# -- End function
.L__unnamed_1:
.quad core::ptr::drop_in_place<std::rt::lang_start<()>::{{closure}}>
.quad 8 # 0x8
.quad 8 # 0x8
.quad std::rt::lang_start::{{closure}}
.quad std::rt::lang_start::{{closure}}
.quad core::ops::function::FnOnce::call_once{{vtable.shim}}
.L__unnamed_3:
.L__unnamed_4:
.byte 10
.L__unnamed_2:
.quad .L__unnamed_3
.zero 8
.quad .L__unnamed_4
.asciz "\001\000\000\000\000\000\000"
And, I am trying to find how the Rust compiler treats the function pointer of closure versus normal function. So, here I tried to use a closure as an example but seems I cannot find any valid assembly code that corresponds to the use of the variable "x".
There is no actual call to the closure, so no invocation code has been produced, but the use of the x variable is actually in a function not included in your post which has the misleading name of core::ops::function::FnOnce::call_once
in the ASM output, but has the much more mangled name of @_ZN4core3ops8function6FnOnce9call_once17hefa1aa47132c4122E
in the LLVM output of the same playground example. This is the actual contents of the closure (the println!("{}", x)
)
core::ops::function::FnOnce::call_once: # @core::ops::function::FnOnce::call_once
# %bb.0:
# allocate a bunch of stack space for variables and print arguments
subq $72, %rsp
# %edi has the value of x passed in to the closure, which we store in a new stack allocated variable
movl %edi, 4(%rsp)
# we then load the address of that variable into another variable
leaq 4(%rsp), %rax
movq %rax, 8(%rsp)
# the following is mostly populating the std::fmt::Arguments struct which is passed to print
movq core::fmt::num::imp::<impl core::fmt::Display for i32>::fmt@GOTPCREL(%rip), %rax
movq %rax, 16(%rsp)
leaq .L__unnamed_2(%rip), %rax
movq %rax, 24(%rsp)
movq $2, 32(%rsp)
movq $0, 40(%rsp)
# the address of the address of x is loaded into the arguments struct here
leaq 8(%rsp), %rax
movq %rax, 56(%rsp)
# finish populating the arguments and then call print
movq $1, 64(%rsp)
leaq 24(%rsp), %rdi
callq *std::io::stdio::_print@GOTPCREL(%rip)
addq $72, %rsp
retq
The playground main function is where the closure is created, but it's not actually called, and like the function above, is mostly populating the complex std::fmt::Arguments struct
playground::main: # @playground::main
# %bb.0:
subq $72, %rsp
# this creates the closure by storing a pointer to the closure's function
leaq core::ops::function::FnOnce::call_once(%rip), %rax
movl %eax, 4(%rsp)
# this stores the closure in main's `x` variable (line 3 of the example)
leaq 4(%rsp), %rax
movq %rax, 8(%rsp)
# populate the std::fmt::Arguments struct
movq core::fmt::num::imp::<impl core::fmt::Display for i32>::fmt@GOTPCREL(%rip), %rax
movq %rax, 16(%rsp)
leaq .L__unnamed_2(%rip), %rax # the contents of rdx come from .L__unnamed_2(%rip), how to evaluate this part?
movq %rax, 24(%rsp) # the contents of rdi come from rax.
movq $2, 32(%rsp)
movq $0, 40(%rsp)
# store the closure (stored in `x`) in the std::fmt::Arguments struct
leaq 8(%rsp), %rax
movq %rax, 56(%rsp)
# finish populating and call print
movq $1, 64(%rsp)
leaq 24(%rsp), %rdi # rdi should be the register holding the value passed to println!.
callq *std::io::stdio::_print@GOTPCREL(%rip)
addq $72, %rsp
retq
From the LLVM output, the std::fmt::Arguments is defined as %"std::fmt::Arguments" = type { [0 x i64], { [0 x { [0 x i8]*, i64 }]*, i64 }, [0 x i64], { i64*, i64 }, [0 x i64], { [0 x { i8*, i64* }]*, i64 }, [0 x i64] }
and I don't understand too many of the internal details, so I'm not sure exactly why it's referencing the the static memory area .L__unnamed_2 but digging into std::fmt::Arguments might give some more clues