Search code examples
assemblyoptimizationrustcompiler-optimizationllvm-codegen

Why can the Rust compiler not optimize away the Err arm of Box::downcast?


I have a Box<dyn Any> and I know the underlying type so I want to optimize away the test in Box::downcast() (source).

First I tried with std::hint::unreachable_unchecked():

pub unsafe fn downcast() -> Box<i32> {
    let value = any();
    if let Ok(value) = value.downcast() {
        value
    } else {
        std::hint::unreachable_unchecked()
    }
}

and

pub unsafe fn downcast() -> Box<i32> {
    any().downcast().map_err(|_| std::hint::unreachable_unchecked()).unwrap()
}

with rustc -C opt-level=3 both result in this (40 lines omitted):

example::downcast:
        push    rbx
        sub     rsp, 16
        call    any@PLT
        mov     rbx, rax
        mov     qword ptr [rsp], rax
        mov     qword ptr [rsp + 8], rdx
        mov     rdi, rax
        call    qword ptr [rdx + 24]
        mov     rax, rbx
        add     rsp, 16
        pop     rbx
        ret
        mov     rbx, rax
        mov     rdi, rsp
        call    core::ptr::drop_in_place
        mov     rdi, rbx
        call    _Unwind_Resume@PLT
        ud2

Since this is not the optimization I was looking for, I tried

pub unsafe fn downcast() -> Box<i32> {
    let value = any();
    std::intrinsics::assume(value.is::<i32>());
    value.downcast().unwrap()
}

but this got even worse (118 lines omitted):

example::downcast:
        push    r15
        push    r14
        push    rbx
        sub     rsp, 32
        call    any@PLT
        mov     rbx, rax
        mov     r14, rdx
        mov     qword ptr [rsp], rax
        mov     qword ptr [rsp + 8], rdx
        mov     r15, qword ptr [rdx + 24]
        mov     rdi, rax
        call    r15
        mov     qword ptr [rsp + 16], rbx
        mov     qword ptr [rsp + 24], r14
        mov     rdi, rbx
        call    r15
        movabs  rcx, -5015437470765251660     ;TypeId::of::<i32>()
        cmp     rax, rcx
        jne     .LBB5_7
        mov     rax, rbx
        add     rsp, 32
        pop     rbx
        pop     r14
        pop     r15
        ret
.LBB5_7:
        mov     rdi, rbx
        mov     rsi, r14
        call    core::result::unwrap_failed
        ud2
        mov     rbx, rax
        lea     rdi, [rsp + 16]
        call    core::ptr::drop_in_place
        mov     rdi, rbx
        call    _Unwind_Resume@PLT
        ud2
        mov     rbx, rax
        mov     rdi, rsp
        call    core::ptr::drop_in_place
        mov     rdi, rbx
        call    _Unwind_Resume@PLT
        ud2

I expected to generate code like this, which is the Ok arm from Box::downcast:

pub unsafe fn downcast() -> Box<i32> {
    let value = any();
    let raw: *mut dyn Any = Box::into_raw(value);
    Box::from_raw(raw as *mut i32)
}

which results in this (zero lines omitted):

example::downcast:
        push    rax
        call    any@PLT
        pop     rcx
        ret

Why can the compiler not optimize the code in such a way?

All assembly generated by godbolt.


Solution

  • Let's try to optimize your code as good as we can manually. If we manually inline downcast() we get the following:

    pub unsafe fn downcast() -> Box<i32> {
        let value = any();
        if value.is::<i32>() {
            let raw: *mut Any = Box::into_raw(value);
            Box::from_raw(raw as *mut i32)
        } else {
            std::hint::unreachable_unchecked()
        }
    }
    

    We can transform this:

    pub unsafe fn downcast() -> Box<i32> {
        let value = any();
        value.is::<i32>();
        let raw: *mut Any = Box::into_raw(value);
        Box::from_raw(raw as *mut i32)
    }
    

    value.is::<i32>() is unused! Can we remove it? Here's lies the issue.

    The is method calls get_type_id on a dyn Any object. That method can only be determined at runtime. And it may have side effects. Thus it can not be removed.

    You can see the same lengthy assembly code as in your examples just from the following function:

    #![feature(get_type_id)]
    pub fn nop(any: Box<dyn Any>) {
        any.get_type_id();
    }
    

    Now you may argue that Any::get_type_id is universally defined by the compiler and can not be overridden, but the compiler isn't smart enough to realize that.