I want to write a code which applies different transformations to strings. One of the transformations is actually noop:
std::string transform(const std::string& in) {
return in;
}
Will the compiler understand that this is a noop and optimize it away? In other words, is there a difference between this
int main() {
std::cout << "test";
}
and this:
int main() {
std::cout << transform("test");
}
when transform
is defined as above? can the compiler compile the two to the same (assembly?) code?
This depends. Let's take this code as an example.
#include <iostream>
std::string transform(std::string const& in)
{
return in;
}
int main(void)
{
std::cout << transform("Hello World!");
}
Using Clang/LLVM, we get this.
define dso_local void @transform(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)(ptr dead_on_unwind noalias writable sret(%"class.std::__cxx11::basic_string") align 8 %0, ptr noundef nonnull align 8 dereferenceable(32) %1) #0 !dbg !1559 {
%3 = alloca ptr, align 8
%4 = alloca ptr, align 8
store ptr %0, ptr %3, align 8
store ptr %1, ptr %4, align 8
#dbg_declare(ptr %4, !1566, !DIExpression(), !1567)
%5 = load ptr, ptr %4, align 8, !dbg !1568
call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::basic_string(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)(ptr noundef nonnull align 8 dereferenceable(32) %0, ptr noundef nonnull align 8 dereferenceable(32) %5), !dbg !1568
ret void, !dbg !1569
}
define dso_local noundef i32 @main() #2 personality ptr @__gxx_personality_v0 !dbg !1570 {
%1 = alloca ptr, align 8
%2 = alloca ptr, align 8
%3 = alloca ptr, align 8
%4 = alloca ptr, align 8
%5 = alloca %"class.std::__cxx11::basic_string", align 8
%6 = alloca %"class.std::__cxx11::basic_string", align 8
%7 = alloca %"class.std::allocator", align 1
%8 = alloca ptr, align 8
%9 = alloca i32, align 4
store ptr %7, ptr %4, align 8
#dbg_declare(ptr %4, !1571, !DIExpression(), !1574)
%10 = load ptr, ptr %4, align 8
store ptr %10, ptr %1, align 8
#dbg_declare(ptr %1, !1576, !DIExpression(), !1579)
%11 = load ptr, ptr %1, align 8
invoke void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::basic_string<std::allocator<char>>(char const*, std::allocator<char> const&)(ptr noundef nonnull align 8 dereferenceable(32) %6, ptr noundef @.str, ptr noundef nonnull align 1 dereferenceable(1) %7)
to label %12 unwind label %17, !dbg !1581
12:
invoke void @transform(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)(ptr dead_on_unwind writable sret(%"class.std::__cxx11::basic_string") align 8 %5, ptr noundef nonnull align 8 dereferenceable(32) %6)
to label %13 unwind label %21, !dbg !1582
13:
%14 = invoke noundef nonnull align 8 dereferenceable(8) ptr @std::basic_ostream<char, std::char_traits<char>>& std::operator<<<char, std::char_traits<char>, std::allocator<char>>(std::basic_ostream<char, std::char_traits<char>>&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&)(ptr noundef nonnull align 8 dereferenceable(8) @std::cout, ptr noundef nonnull align 8 dereferenceable(32) %5)
to label %15 unwind label %25, !dbg !1583
15:
call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()(ptr noundef nonnull align 8 dereferenceable(32) %5) #7, !dbg !1584
call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()(ptr noundef nonnull align 8 dereferenceable(32) %6) #7, !dbg !1584
store ptr %7, ptr %3, align 8
#dbg_declare(ptr %3, !1585, !DIExpression(), !1587)
%16 = load ptr, ptr %3, align 8
call void @std::__new_allocator<char>::~__new_allocator()(ptr noundef nonnull align 1 dereferenceable(1) %16) #7, !dbg !1589
ret i32 0, !dbg !1591
17:
%18 = landingpad { ptr, i32 }
cleanup, !dbg !1591
%19 = extractvalue { ptr, i32 } %18, 0, !dbg !1591
store ptr %19, ptr %8, align 8, !dbg !1591
%20 = extractvalue { ptr, i32 } %18, 1, !dbg !1591
store i32 %20, ptr %9, align 4, !dbg !1591
br label %30, !dbg !1591
21:
%22 = landingpad { ptr, i32 }
cleanup, !dbg !1591
%23 = extractvalue { ptr, i32 } %22, 0, !dbg !1591
store ptr %23, ptr %8, align 8, !dbg !1591
%24 = extractvalue { ptr, i32 } %22, 1, !dbg !1591
store i32 %24, ptr %9, align 4, !dbg !1591
br label %29, !dbg !1591
25:
%26 = landingpad { ptr, i32 }
cleanup, !dbg !1591
%27 = extractvalue { ptr, i32 } %26, 0, !dbg !1591
store ptr %27, ptr %8, align 8, !dbg !1591
%28 = extractvalue { ptr, i32 } %26, 1, !dbg !1591
store i32 %28, ptr %9, align 4, !dbg !1591
call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()(ptr noundef nonnull align 8 dereferenceable(32) %5) #7, !dbg !1584
br label %29, !dbg !1584
29:
call void @std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>::~basic_string()(ptr noundef nonnull align 8 dereferenceable(32) %6) #7, !dbg !1584
br label %30, !dbg !1584
30:
store ptr %7, ptr %2, align 8
#dbg_declare(ptr %2, !1585, !DIExpression(), !1592)
%31 = load ptr, ptr %2, align 8
call void @std::__new_allocator<char>::~__new_allocator()(ptr noundef nonnull align 1 dereferenceable(1) %31) #7, !dbg !1594
br label %32, !dbg !1584
32:
%33 = load ptr, ptr %8, align 8, !dbg !1584
%34 = load i32, ptr %9, align 4, !dbg !1584
%35 = insertvalue { ptr, i32 } poison, ptr %33, 0, !dbg !1584
%36 = insertvalue { ptr, i32 } %35, i32 %34, 1, !dbg !1584
resume { ptr, i32 } %36, !dbg !1584
}
This assembly (technically LLIR, but I won't get into the details of that) is super unreadable, but here's the pseudocode of it.
function transform(string: pointer): void
{
let out: pointer = std::string::new(string);
return out;
}
function main(): int32
{
let cstring: pointer = "Hello World!"
let string: pointer = std::string::new(cstring);
let out: pointer = call transform(string);
}
Basically, "transform" is just a thin wrapper on top of the string copy constructor (std::string::string(std::string const &string)
), and that'll disappear when you turn on optimization. What won't dissappear, however, is the copy constructor call. The thing is, since that copy constructor is already compiled (since std::basic_string<char>
has been instantiated before), your compiler can't be sure whether or not the copy constructor has side effects (for example, it might print to the screen) so it can't just skip over it.
The solution to this is to recompile the C++ standard library alongside your code. Obviously, that's ridiculous. Doing a string copy construction is such a cheap operation that, unless your string is an entire file, I wouldn't worry at all. Otherwise, you can pick faster options (like a raw char*
, or a std::shared_ptr<std::string>
) to do stuff by reference instead of by value, which minimizes copy construction. If you do use std::shared_ptr
, make sure to pass by const&
whenever possible to avoid retain
/reduce
calls (ARC memory management). Otherwise, you can just use std::unique_ptr
which ensures that you always do this, and saves ~8 bytes (implementation dependent) of reference count memory if you're doing a bunch of string stuff.
In this particular case, the easiest thing is to just return a constant reference. However, since you mentioned it's intended as a no-op, that's likely to not work in a context where a value is expected.