As we know, in a multi-bytes word computer such as x86/x86_64, it is more efficiently to copy/move a big bulk of memory word by word (4 or 8 bytes per step), than to do so byte by byte.
I'm curious about which way would strncpy/memcpy/memmove do things in, and how do they deal with memory word alignment.
char buf_A[8], buf_B[8];
// I often want to code as this
*(double*)buf_A = *(double*)buf_B;
//in stead of this
strcpy(buf_A, buf_B);
// but it worsen the readability of my codes.
I think all of the opinions and advices on this page are reasonable, but I decide to try a little experiment.
To my surprise, the fastest method isn't the one we expected theoretically.
I tried some code as following.
#include <cstring>
#include <iostream>
#include <string>
#include <chrono>
using std::string;
using std::chrono::system_clock;
inline void mycopy( double* a, double* b, size_t s ) {
while ( s > 0 ) {
*a++ = *b++;
--s;
}
};
// to make sure that every bits have been changed
bool assertAllTrue( unsigned char* a, size_t s ) {
unsigned char v = 0xFF;
while ( s > 0 ) {
v &= *a++;
--s;
}
return v == 0xFF;
};
int main( int argc, char** argv ) {
alignas( 16 ) char bufA[512], bufB[512];
memset( bufB, 0xFF, 512 ); // to prevent strncpy from stoping prematurely
system_clock::time_point startT;
memset( bufA, 0, sizeof( bufA ) );
startT = system_clock::now();
for ( int i = 0; i < 1024 * 1024; ++i )
strncpy( bufA, bufB, sizeof( bufA ) );
std::cout << "strncpy:" << ( system_clock::now() - startT ).count()
<< ", AllTrue:" << std::boolalpha
<< assertAllTrue( ( unsigned char* )bufA, sizeof( bufA ) )
<< std::endl;
memset( bufA, 0, sizeof( bufA ) );
startT = system_clock::now();
for ( int i = 0; i < 1024 * 1024; ++i )
memcpy( bufA, bufB, sizeof( bufA ) );
std::cout << "memcpy:" << ( system_clock::now() - startT ).count()
<< ", AllTrue:" << std::boolalpha
<< assertAllTrue( ( unsigned char* )bufA, sizeof( bufA ) )
<< std::endl;
memset( bufA, 0, sizeof( bufA ) );
startT = system_clock::now();
for ( int i = 0; i < 1024 * 1024; ++i )
memmove( bufA, bufB, sizeof( bufA ) );
std::cout << "memmove:" << ( system_clock::now() - startT ).count()
<< ", AllTrue:" << std::boolalpha
<< assertAllTrue( ( unsigned char* )bufA, sizeof( bufA ) )
<< std::endl;
memset( bufA, 0, sizeof( bufA ) );
startT = system_clock::now();
for ( int i = 0; i < 1024 * 1024; ++i )
mycopy( ( double* )bufA, ( double* )bufB, sizeof( bufA ) / sizeof( double ) );
std::cout << "mycopy:" << ( system_clock::now() - startT ).count()
<< ", AllTrue:" << std::boolalpha
<< assertAllTrue( ( unsigned char* )bufA, sizeof( bufA ) )
<< std::endl;
return EXIT_SUCCESS;
}
The result (one of many similar results):
strncpy:52840919, AllTrue:true
memcpy:57630499, AllTrue:true
memmove:57536472, AllTrue:true
mycopy:57577863, AllTrue:true
It looks like:
Is it funny?