Has anyone tested this in release mode builds? Or are the implementations so similar there's no significant difference?
I'm interested in the speed to:
Create a new shared_ptr
Create a copy of the shared_ptr
De-reference the pointer to access the pointee
This would be in a release build optimized for speed with new shared_ptrs being created with make_shared()
Ok, so it doesn't look like anyone has done this. Here's what I found using the standard VC 10 optimized settings for a WIN32 console app:
Visual C++ 2010 SP1 std::make_shared and std::shared_ptr were faster than the Boost 1.46.1 equivalents when populating a vector of 10 million pointer entries ( 1.96 secs versus 0.92 secs averaged across 20 runs)
Boost 1.46.1 was slightly faster than Visual C++ 2010 SP1 when copying an array of 10 million pointer entries ( 0.15 secs versus 0.17 secs averaged over 20 runs)
Visual C++ 2010 SP1 was slightly faster than the Boost 1.46.1 equivalents when dereferencing a vector of 10 million pointer entries 20 times ( 0.72 secs versus 0.811 secs averaged over 20 runs)
CONCLUSION: There was a significant difference when creating shared_ptrs to populate a vector. The Visual C++ 2010 shared_ptr was nearly twice as fast indicating a substantial difference in implementation compared to Boost 1.46.1.
The other tests didn't show a significant difference.
Here's the code I used:
#include "stdafx.h"
struct A
{
A( const unsigned A) : m_value(A)
{
}
const unsigned m_value;
};
typedef std::shared_ptr<A> APtr;
typedef boost::shared_ptr<A> ABoostPtr;
double TestSTLCreateSpeed()
{
const unsigned NUM_ENTRIES = 10000000;
std::vector<APtr> buffer;
buffer.reserve(NUM_ENTRIES);
boost::timer timer;
for( unsigned nEntry = 0; nEntry < NUM_ENTRIES; ++nEntry)
{
buffer.emplace_back( std::make_shared<A>(nEntry) );
}
const double timeTaken = timer.elapsed();
std::cout << "STL create test took " << timeTaken << " secs.\r\n";
return timeTaken;
}
double BoostSTLCreateSpeed()
{
const unsigned NUM_ENTRIES = 10000000;
std::vector<ABoostPtr> buffer;
buffer.reserve(NUM_ENTRIES);
boost::timer timer;
for( unsigned nEntry = 0; nEntry < NUM_ENTRIES; ++nEntry)
{
buffer.emplace_back( boost::make_shared<A>(nEntry) );
}
const double timeTaken = timer.elapsed();
std::cout << "BOOST create test took " << timeTaken << " secs.\r\n";
return timeTaken;
}
double TestSTLCopySpeed()
{
const unsigned NUM_ENTRIES = 10000000;
std::vector<APtr> buffer;
buffer.reserve(NUM_ENTRIES);
for( unsigned nEntry = 0; nEntry < NUM_ENTRIES; ++nEntry)
{
buffer.emplace_back( std::make_shared<A>(nEntry) );
}
boost::timer timer;
std::vector<APtr> buffer2 = buffer;
const double timeTaken = timer.elapsed();
std::cout << "STL copy test took " << timeTaken << " secs.\r\n";
return timeTaken;
}
double TestBoostCopySpeed()
{
const unsigned NUM_ENTRIES = 10000000;
std::vector<ABoostPtr> buffer;
buffer.reserve(NUM_ENTRIES);
for( unsigned nEntry = 0; nEntry < NUM_ENTRIES; ++nEntry)
{
buffer.emplace_back( boost::make_shared<A>(nEntry) );
}
boost::timer timer;
std::vector<ABoostPtr> buffer2 = buffer;
const double timeTaken = timer.elapsed();
std::cout << "BOOST copy test took " << timeTaken << " secs.\r\n";
return timeTaken;
}
double TestBoostDerefSpeed()
{
const unsigned NUM_ENTRIES = 10000000;
std::vector<ABoostPtr> buffer;
buffer.reserve(NUM_ENTRIES);
for( unsigned nEntry = 0; nEntry < NUM_ENTRIES; ++nEntry)
{
buffer.emplace_back( boost::make_shared<A>(nEntry) );
}
boost::timer timer;
unsigned total = 0;
for(unsigned nIter = 0; nIter < 20; ++nIter)
{
std::for_each( buffer.begin(), buffer.end(),
[&](const ABoostPtr& pA){
total += pA->m_value;
});
}
const double timeTaken = timer.elapsed();
std::cout << "BOOST deref total = " << total << ".\r\n";
std::cout << "BOOST deref test took " << timeTaken << " secs.\r\n";
return timeTaken;
}
double TestSTLDerefSpeed()
{
const unsigned NUM_ENTRIES = 10000000;
std::vector<APtr> buffer;
buffer.reserve(NUM_ENTRIES);
for( unsigned nEntry = 0; nEntry < NUM_ENTRIES; ++nEntry)
{
buffer.emplace_back( std::make_shared<A>(nEntry) );
}
boost::timer timer;
unsigned total = 0;
for(unsigned nIter = 0; nIter < 20; ++nIter)
{
std::for_each( buffer.begin(), buffer.end(),
[&](const APtr& pA){
total += pA->m_value;
});
}
const double timeTaken = timer.elapsed();
std::cout << "STL deref total = " << total << ".\r\n";
std::cout << "STL deref test took " << timeTaken << " secs.\r\n";
return timeTaken;
}
int _tmain(int argc, _TCHAR* argv[])
{
double totalTime = 0.0;
const unsigned NUM_TESTS = 20;
totalTime = 0.0;
for ( unsigned nTest = 0; nTest < NUM_TESTS; ++nTest)
{
totalTime += BoostSTLCreateSpeed();
}
std::cout << "BOOST create test took " << totalTime / NUM_TESTS << " secs average.\r\n";
totalTime = 0.0;
for ( unsigned nTest = 0; nTest < NUM_TESTS; ++nTest)
{
totalTime += TestSTLCreateSpeed();
}
std::cout << "STL create test took " << totalTime / NUM_TESTS << " secs average.\r\n";
totalTime = 0.0;
for ( unsigned nTest = 0; nTest < NUM_TESTS; ++nTest)
{
totalTime += TestBoostCopySpeed();
}
std::cout << "BOOST copy test took " << totalTime / NUM_TESTS << " secs average.\r\n";
totalTime = 0.0;
for ( unsigned nTest = 0; nTest < NUM_TESTS; ++nTest)
{
totalTime += TestSTLCopySpeed();
}
std::cout << "STL copy test took " << totalTime / NUM_TESTS << " secs average.\r\n";
totalTime = 0.0;
for ( unsigned nTest = 0; nTest < NUM_TESTS; ++nTest)
{
totalTime += TestBoostDerefSpeed();
}
std::cout << "Boost deref test took " << totalTime / NUM_TESTS << " secs average.\r\n";
totalTime = 0.0;
for ( unsigned nTest = 0; nTest < NUM_TESTS; ++nTest)
{
totalTime += TestSTLDerefSpeed();
}
std::cout << "STL deref test took " << totalTime / NUM_TESTS << " secs average.\r\n";
return 0;
}
I'll wait a while and if no one has refuted my results or come up with some better conclusions I'll accept my own answer.