Search code examples
c++optimization

Will C++ compilers optimize away an "identity" function?


I want to write a code which applies different transformations to strings. One of the transformations is actually noop:

std::string transform(const std::string& in) {
  return in;
}

Will the compiler understand that this is a noop and optimize it away? In other words, is there a difference between this

int main() {
  std::cout << "test";
}

and this:

int main() {
  std::cout << transform("test");
}

when transform is defined as above? can the compiler compile the two to the same (assembly?) code?


Solution

  • This depends. Let's take this code as an example.

    #include <iostream>
    
    std::string transform(std::string const& in)
    {
      return in;
    }
    
    int main(void)
    {
      std::cout << transform("Hello World!");
    }
    

    Using Clang/LLVM, we get this.

    define dso_local void @transform(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)(ptr dead_on_unwind noalias writable sret(%"class.std::__cxx11::basic_string") align 8 %0, ptr noundef nonnull align 8 dereferenceable(32) %1) #0 !dbg !1559 {
      %3 = alloca ptr, align 8
      %4 = alloca ptr, align 8
      store ptr %0, ptr %3, align 8
      store ptr %1, ptr %4, align 8
        #dbg_declare(ptr %4, !1566, !DIExpression(), !1567)
      %5 = load ptr, ptr %4, align 8, !dbg !1568
      call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::basic_string(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)(ptr noundef nonnull align 8 dereferenceable(32) %0, ptr noundef nonnull align 8 dereferenceable(32) %5), !dbg !1568
      ret void, !dbg !1569
    }
    
    define dso_local noundef i32 @main() #2 personality ptr @__gxx_personality_v0 !dbg !1570 {
      %1 = alloca ptr, align 8
      %2 = alloca ptr, align 8
      %3 = alloca ptr, align 8
      %4 = alloca ptr, align 8
      %5 = alloca %"class.std::__cxx11::basic_string", align 8
      %6 = alloca %"class.std::__cxx11::basic_string", align 8
      %7 = alloca %"class.std::allocator", align 1
      %8 = alloca ptr, align 8
      %9 = alloca i32, align 4
      store ptr %7, ptr %4, align 8
        #dbg_declare(ptr %4, !1571, !DIExpression(), !1574)
      %10 = load ptr, ptr %4, align 8
      store ptr %10, ptr %1, align 8
        #dbg_declare(ptr %1, !1576, !DIExpression(), !1579)
      %11 = load ptr, ptr %1, align 8
      invoke void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::basic_string<std::allocator<char>>(char const*, std::allocator<char> const&)(ptr noundef nonnull align 8 dereferenceable(32) %6, ptr noundef @.str, ptr noundef nonnull align 1 dereferenceable(1) %7)
              to label %12 unwind label %17, !dbg !1581
    
    12:
      invoke void @transform(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)(ptr dead_on_unwind writable sret(%"class.std::__cxx11::basic_string") align 8 %5, ptr noundef nonnull align 8 dereferenceable(32) %6)
              to label %13 unwind label %21, !dbg !1582
    
    13:
      %14 = invoke noundef nonnull align 8 dereferenceable(8) ptr @std::basic_ostream<char, std::char_traits<char>>& std::operator<<<char, std::char_traits<char>, std::allocator<char>>(std::basic_ostream<char, std::char_traits<char>>&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)(ptr noundef nonnull align 8 dereferenceable(8) @std::cout, ptr noundef nonnull align 8 dereferenceable(32) %5)
              to label %15 unwind label %25, !dbg !1583
    
    15:
      call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()(ptr noundef nonnull align 8 dereferenceable(32) %5) #7, !dbg !1584
      call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()(ptr noundef nonnull align 8 dereferenceable(32) %6) #7, !dbg !1584
      store ptr %7, ptr %3, align 8
        #dbg_declare(ptr %3, !1585, !DIExpression(), !1587)
      %16 = load ptr, ptr %3, align 8
      call void @std::__new_allocator<char>::~__new_allocator()(ptr noundef nonnull align 1 dereferenceable(1) %16) #7, !dbg !1589
      ret i32 0, !dbg !1591
    
    17:
      %18 = landingpad { ptr, i32 }
              cleanup, !dbg !1591
      %19 = extractvalue { ptr, i32 } %18, 0, !dbg !1591
      store ptr %19, ptr %8, align 8, !dbg !1591
      %20 = extractvalue { ptr, i32 } %18, 1, !dbg !1591
      store i32 %20, ptr %9, align 4, !dbg !1591
      br label %30, !dbg !1591
    
    21:
      %22 = landingpad { ptr, i32 }
              cleanup, !dbg !1591
      %23 = extractvalue { ptr, i32 } %22, 0, !dbg !1591
      store ptr %23, ptr %8, align 8, !dbg !1591
      %24 = extractvalue { ptr, i32 } %22, 1, !dbg !1591
      store i32 %24, ptr %9, align 4, !dbg !1591
      br label %29, !dbg !1591
    
    25:
      %26 = landingpad { ptr, i32 }
              cleanup, !dbg !1591
      %27 = extractvalue { ptr, i32 } %26, 0, !dbg !1591
      store ptr %27, ptr %8, align 8, !dbg !1591
      %28 = extractvalue { ptr, i32 } %26, 1, !dbg !1591
      store i32 %28, ptr %9, align 4, !dbg !1591
      call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()(ptr noundef nonnull align 8 dereferenceable(32) %5) #7, !dbg !1584
      br label %29, !dbg !1584
    
    29:
      call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()(ptr noundef nonnull align 8 dereferenceable(32) %6) #7, !dbg !1584
      br label %30, !dbg !1584
    
    30:
      store ptr %7, ptr %2, align 8
        #dbg_declare(ptr %2, !1585, !DIExpression(), !1592)
      %31 = load ptr, ptr %2, align 8
      call void @std::__new_allocator<char>::~__new_allocator()(ptr noundef nonnull align 1 dereferenceable(1) %31) #7, !dbg !1594
      br label %32, !dbg !1584
    
    32:
      %33 = load ptr, ptr %8, align 8, !dbg !1584
      %34 = load i32, ptr %9, align 4, !dbg !1584
      %35 = insertvalue { ptr, i32 } poison, ptr %33, 0, !dbg !1584
      %36 = insertvalue { ptr, i32 } %35, i32 %34, 1, !dbg !1584
      resume { ptr, i32 } %36, !dbg !1584
    }
    

    This assembly (technically LLIR, but I won't get into the details of that) is super unreadable, but here's the pseudocode of it.

    function transform(string: pointer): void
    {
      let out: pointer = std::string::new(string);
      return out;
    }
    
    function main(): int32
    {
      let cstring: pointer = "Hello World!"
      let string: pointer = std::string::new(cstring);
      let out: pointer = call transform(string);
    }
    

    Basically, "transform" is just a thin wrapper on top of the string copy constructor (std::string::string(std::string const &string)), and that'll disappear when you turn on optimization. What won't dissappear, however, is the copy constructor call. The thing is, since that copy constructor is already compiled (since std::basic_string<char> has been instantiated before), your compiler can't be sure whether or not the copy constructor has side effects (for example, it might print to the screen) so it can't just skip over it.

    The solution to this is to recompile the C++ standard library alongside your code. Obviously, that's ridiculous. Doing a string copy construction is such a cheap operation that, unless your string is an entire file, I wouldn't worry at all. Otherwise, you can pick faster options (like a raw char*, or a std::shared_ptr<std::string>) to do stuff by reference instead of by value, which minimizes copy construction. If you do use std::shared_ptr, make sure to pass by const& whenever possible to avoid retain/reduce calls (ARC memory management). Otherwise, you can just use std::unique_ptr which ensures that you always do this, and saves ~8 bytes (implementation dependent) of reference count memory if you're doing a bunch of string stuff.

    In this particular case, the easiest thing is to just return a constant reference. However, since you mentioned it's intended as a no-op, that's likely to not work in a context where a value is expected.