In the below code I unpack a msgpack file to std::map
. The duration of this process is almost 85 seconds.
#include <map>
#include <vector>
#include <string>
#include <iostream>
#include <exception>
#include <msgpack.hpp>
#include <boost/variant.hpp>
#include <boost/filesystem.hpp>
using namespace std::literals;
namespace fs = boost::filesystem;
enum class TYPE_MSG : int
{
NULLPTR_,
INT64_,
DOUBLE_,
STRING_,
VECTOR_,
MAP_,
};
class MOVar;
typedef boost::variant<std::nullptr_t, int64_t, double, std::string, std::vector<MOVar>, std::map<std::string, MOVar>> MOVarST;
class MOVar : public MOVarST
{
public:
MOVar() : MOVarST(nullptr) {}
MOVar(double b) { MOVarST::operator=(b); }
MOVar(int64_t b) { MOVarST::operator=(b); }
MOVar(int b) : MOVar(static_cast<int64_t>(b)) {}
MOVar(std::string &&b) { MOVarST::operator=(b); }
MOVar(std::vector<MOVar> &b) { MOVarST::operator=(b); }
MOVar(std::map<std::string, MOVar> &b) { MOVarST::operator=(b); }
const MOVar &operator=(const int64_t &b) { MOVarST::operator=(b); return *this; }
const MOVar &operator=(std::string &&b) { MOVarST::operator=(std::move(b)); return *this; }
const MOVar &operator=(std::string &b) { MOVarST::operator=(std::move(b)); return *this; }
const MOVar &operator=(const double &b) { MOVarST::operator=(b); return *this; }
const MOVar &operator=(std::vector<MOVar> &&b) { MOVarST::operator=(std::move(b)); return *this; }
const MOVar &operator=(std::map<std::string, MOVar> &&b) { MOVarST::operator=(std::move(b)); return *this; }
bool is_map() const { return which() == (int)TYPE_MSG::MAP_; }
bool is_int64() const { return which() == (int)TYPE_MSG::INT64_; }
bool is_nill() const { return which() == (int)TYPE_MSG::NULLPTR_; }
bool is_double() const { return which() == (int)TYPE_MSG::DOUBLE_; }
bool is_string() const { return which() == (int)TYPE_MSG::STRING_; }
bool is_vector() const { return which() == (int)TYPE_MSG::VECTOR_; }
const double &_as_double() const { return boost::get<double>(*this); }
const int64_t &_as_int64() const { return boost::get<int64_t>(*this); }
const std::string &_as_string() const { return boost::get<std::string>(*this); }
const std::vector<MOVar> &_as_vector() const { return boost::get<std::vector<MOVar>>(*this); }
const std::map<std::string, MOVar> &_as_map() const { return boost::get<std::map<std::string, MOVar>>(*this); }
private:
};
void convert_msgpack_to_movar(msgpack::object const &o, MOVar &v);
namespace msgpack
{
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS)
{
namespace adaptor
{
template <>
struct convert<MOVar>
{
msgpack::object const &operator()(msgpack::object const &o, MOVar &v) const
{
convert_msgpack_to_movar(o, v);
return o;
}
};
}
}
}
int main()
{
std::map<std::string, MOVar> map;
auto fileName = "big_map.msgpack"s;
auto startTime = std::chrono::high_resolution_clock::now();
{
std::ifstream file(fileName, std::ios::binary);
auto fileSize = fs::file_size(fileName);
std::vector<char> buffer(fileSize);
file.read(buffer.data(), fileSize);
msgpack::object_handle oh = msgpack::unpack(buffer.data(), fileSize);
msgpack::object deserialized = oh.get();
deserialized.convert(map);
}
auto endTime = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::seconds>(endTime - startTime);
std::cout << "Duration: " << duration.count() << " seconds" << std::endl;
}
But when I try to replace std::map
with boost::interprocess::map
, the time increase almost 12 times.
What is my wrong? Is it necessary to replace my methodology in use of boost shared memory?
(I checked both maps and their result In second process and results was same in both of them.)
#include <fstream>
#include <iostream>
#include <exception>
#include <msgpack.hpp>
#include <boost/variant.hpp>
#include <boost/filesystem.hpp>
#include <boost/interprocess/containers/map.hpp>
#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
using namespace std::literals;
namespace fs = boost::filesystem;
namespace bip = boost::interprocess;
enum class TYPE_MSG : int
{
NULLPTR_,
INT64_,
DOUBLE_,
STRING_,
VECTOR_,
MAP_,
};
auto sharedMemoryName = "MySharedMemory"s;
unsigned long long shmSize = 9.8 * 1024 * 1024 * 1024ull;
bip::managed_shared_memory segment(bip::open_or_create, sharedMemoryName.data(), shmSize);
template <typename T>
using Alloc = bip::allocator<T, bip::managed_shared_memory::segment_manager>;
const Alloc<void> allocator(segment.get_segment_manager());
//MOVar class is like last example
void Convertor(MAP &map, std::map<std::string, MOVar>::const_iterator &pair);
class MOVarBip;
using STR = bip::basic_string<char, std::char_traits<char>, Alloc<char>>;
using PAIR = std::pair<const STR, MOVarBip>;
using MAP = bip::map<STR, MOVarBip, std::less<STR>, Alloc<PAIR>>;
using Vec = bip::vector<MOVarBip, Alloc<MOVarBip>>;
typedef boost::variant<std::nullptr_t, int64_t, double, STR, Vec, MAP> MOVarSTBIP;
class MOVarBip : public MOVarSTBIP
{
public:
MOVarBip() : MOVarSTBIP(nullptr) {}
MOVarBip(int64_t &b) { MOVarSTBIP::operator=(std::move(b)); }
MOVarBip(double &b) { MOVarSTBIP::operator=(std::move(b)); }
MOVarBip(STR &b) { MOVarSTBIP::operator=(std::move(b)); }
MOVarBip(Vec &b) { MOVarSTBIP::operator=(std::move(b)); }
MOVarBip(MAP &b) { MOVarSTBIP::operator=(std::move(b)); }
const MOVarBip& operator=(int64_t&& b) { MOVarSTBIP::operator=(std::move(b)); return *this; }
const MOVarBip& operator=(double&& b) { MOVarSTBIP::operator=(std::move(b)); return *this; }
const MOVarBip& operator=(std::string&& b)
{
auto &tmpValue = *segment.construct<STR>(bip::anonymous_instance)(allocator);
tmpValue = b.data();
MOVarSTBIP::operator=(std::move(tmpValue));
return *this;
}
const MOVarBip& operator=(std::vector<MOVar>&& value)
{
auto &vecBip = *segment.construct<Vec>(bip::anonymous_instance)(allocator);
for (auto &item : value)
{
switch (item.which())
{
case static_cast<int>(TYPE_MSG::MAP_):
{
auto &mapBip = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
auto element = item._as_map().begin();
auto mapEnd = item._as_map().end();
for (; element != mapEnd; ++element)
{
Convertor(mapBip, element);
}
MOVarBip valueBip = mapBip;
vecBip.push_back(std::move(valueBip));
break;
}
case static_cast<int>(TYPE_MSG::STRING_):
{
auto &tmpValue = *segment.construct<STR>(bip::anonymous_instance)(allocator);
tmpValue = item._as_string().data();
MOVarBip valueBip = tmpValue;
vecBip.push_back(std::move(valueBip));
break;
}
default:
{
throw std::logic_error("The code doesn't support this scenario for Vec type!");
}
}
}
MOVarSTBIP::operator=(std::move(vecBip));
return *this;
}
const MOVarBip& operator=(std::map<std::string, MOVar>&& value)
{
auto &mapBip = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
auto itr = value.cbegin();
auto endPoint = value.cend();
for (; itr != endPoint; ++itr)
{
Convertor(mapBip, itr);
}
MOVarSTBIP::operator=(std::move(mapBip));
return *this;
}
bool is_map() const { return which() == (int)TYPE_MSG::MAP_; }
bool is_int64() const { return which() == (int)TYPE_MSG::INT64_; }
bool is_nill() const { return which() == (int)TYPE_MSG::NULLPTR_; }
bool is_double() const { return which() == (int)TYPE_MSG::DOUBLE_; }
bool is_string() const { return which() == (int)TYPE_MSG::STRING_; }
bool is_vector() const { return which() == (int)TYPE_MSG::VECTOR_; }
const double &_as_double() const { return boost::get<double>(*this); }
const int64_t &_as_int64() const { return boost::get<int64_t>(*this); }
const STR &_as_string() const { return boost::get<STR>(*this); }
const Vec &_as_vector() const { return boost::get<Vec>(*this); }
const MAP &_as_map() const { return boost::get<MAP>(*this); }
private:
};
void Convertor(MAP &map, std::map<std::string, MOVar>::const_iterator &pair)
{
auto &keyBip = *segment.construct<STR>(bip::anonymous_instance)(allocator);
keyBip = pair->first.data();
auto &value = pair->second;
switch (value.which())
{
case static_cast<int>(TYPE_MSG::NULLPTR_):
{
auto &valueBip = *segment.construct<MOVarBip>(bip::anonymous_instance)();
map.insert({std::move(keyBip), std::move(valueBip)});
break;
}
case static_cast<int>(TYPE_MSG::INT64_):
{
auto &tmpValue = *segment.construct<int64_t>(bip::anonymous_instance)();
tmpValue = value._as_int64();
MOVarBip valueBip = tmpValue;
map.insert({std::move(keyBip), std::move(valueBip)});
break;
}
case static_cast<int>(TYPE_MSG::DOUBLE_):
{
auto &tmpValue = *segment.construct<double>(bip::anonymous_instance)();
tmpValue = value._as_double();
MOVarBip valueBip = tmpValue;
map.insert({std::move(keyBip), std::move(valueBip)});
break;
}
case static_cast<int>(TYPE_MSG::STRING_):
{
auto &tmpValue = *segment.construct<STR>(bip::anonymous_instance)(allocator);
tmpValue = value._as_string().data();
MOVarBip valueBip = tmpValue;
map.insert({std::move(keyBip), std::move(valueBip)});
break;
}
case static_cast<int>(TYPE_MSG::VECTOR_):
{
auto &vecBip = *segment.construct<Vec>(bip::anonymous_instance)(allocator);
for (auto &item : value._as_vector())
{
switch (item.which())
{
case static_cast<int>(TYPE_MSG::MAP_):
{
auto &mapBip = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
auto element = item._as_map().begin();
auto mapEnd = item._as_map().end();
for (; element != mapEnd; ++element)
{
Convertor(mapBip, element);
}
MOVarBip valueBip = mapBip;
vecBip.push_back(std::move(valueBip));
break;
}
case static_cast<int>(TYPE_MSG::STRING_):
{
auto &tmpValue = *segment.construct<STR>(bip::anonymous_instance)(allocator);
tmpValue = item._as_string().data();
MOVarBip valueBip = tmpValue;
vecBip.push_back(std::move(valueBip));
break;
}
default:
{
throw std::logic_error("The code doesn't support this scenario for Vec type!");
}
}
}
MOVarBip valueBip = vecBip;
map.insert({std::move(keyBip), std::move(valueBip)});
break;
}
case static_cast<int>(TYPE_MSG::MAP_):
{
auto &mapBip = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
auto itr = value._as_map().begin();
auto endPoint = value._as_map().end();
for (; itr != endPoint; ++itr)
{
Convertor(mapBip, itr);
}
MOVarBip valueBip = mapBip;
map.insert({std::move(keyBip), std::move(valueBip)});
break;
}
default:
{
throw std::logic_error("The code doesn't support this scenario!");
break;
}
}
}
namespace msgpack
{
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS)
{
namespace adaptor
{
template <>
struct convert<STR>
{
msgpack::object const &operator()(msgpack::object const &o, STR &v) const
{
switch (o.type)
{
case msgpack::type::BIN:
v.assign(o.via.bin.ptr, o.via.bin.size);
break;
case msgpack::type::STR:
v.assign(o.via.str.ptr, o.via.str.size);
break;
default:
throw msgpack::type_error();
break;
}
return o;
}
};
template <>
struct convert<MOVarBip>
{
msgpack::object const &operator()(msgpack::object const &o, MOVarBip &v) const
{
switch (o.type)
{
case msgpack::type::NIL:
v = MOVarBip();
break;
case msgpack::type::BOOLEAN:
v = (int64_t)(o.as<bool>());
break;
case msgpack::type::POSITIVE_INTEGER:
{
uint64_t temp = o.as<uint64_t>();
if (temp > (uint64_t)0x7FFFFFFFFFFFFFFF)
{
v = std::to_string(temp);
}
else
{
v = ((int64_t)temp);
}
break;
}
case msgpack::type::NEGATIVE_INTEGER:
v = (o.as<int64_t>());
break;
case msgpack::type::FLOAT32:
v = ((double)o.as<float>());
break;
case msgpack::type::FLOAT64:
v = (o.as<double>());
break;
case msgpack::type::STR:
v = o.as<std::string>();
break;
case msgpack::type::BIN:
v = o.as<std::string>();
break;
case msgpack::type::ARRAY:
v = o.as<std::vector<MOVar>>();
break;
case msgpack::type::MAP:
v = o.as<std::map<std::string, MOVar>>();
break;
case msgpack::type::EXT:
throw msgpack::type_error();
break;
}
return o;
}
};
template <>
struct convert<MAP>
{
msgpack::object const &operator()(msgpack::object const &o, MAP &v) const
{
if (o.type != msgpack::type::MAP)
{
throw msgpack::type_error();
}
msgpack::object_kv *p(o.via.map.ptr);
msgpack::object_kv *const pend(o.via.map.ptr + o.via.map.size);
auto &tmp = *segment.construct<MAP>(bip::anonymous_instance)(allocator);
for (; p != pend; ++p)
{
auto &key = *segment.construct<STR>(bip::anonymous_instance)(allocator);
p->key.convert(key);
p->val.convert(tmp[std::move(key)]);
}
v = std::move(tmp);
return o;
}
};
}
}
}
int main()
{
auto fileName = "big_map.msgpack"s;
startTime = std::chrono::high_resolution_clock::now();
{
std::ifstream file(fileName, std::ios::binary);
auto fileSize = fs::file_size(fileName);
std::vector<char> buffer(fileSize);
file.read(buffer.data(), fileSize);
auto &bip_map = *segment.construct<MAP>("bip_map")(allocator);
msgpack::object_handle oh = msgpack::unpack(buffer.data(), fileSize);
msgpack::object deserialized = oh.get();
deserialized.convert(bip_map);
}
endTime = std::chrono::high_resolution_clock::now();
duration = std::chrono::duration_cast<std::chrono::seconds>(endTime - startTime);
std::cout << "Duration: " << duration.count() << " seconds" << std::endl;
boost::interprocess::shared_memory_object::remove(sharedMemoryName.data());
}
This is the output of the code:
Duration: 72 seconds (for std::map
)
Duration: 956 seconds (for boost::interprocess::map
)
You are negating the allocators by manually constructing anonymous instances instead. Not only is this a recipe for leaks (which your code has a ton of), it's also less efficient.
All the handcoded conversions from MOVar to MOVarBip could be handled by a single function avoiding many many temporaries:
template <typename T> void BipConvert(T const& v, MOVarBip& result) {
struct Vis {
MOVarBip& result;
void operator()(MOVar const& v) const { boost::apply_visitor(*this, v); }
void operator()(std::nullptr_t) const { result = {}; }
void operator()(int64_t v) const { result = v; }
void operator()(std::string const& v) const {
result = STR(v.c_str(), segment.get_segment_manager());
}
void operator()(std::map<std::string, MOVar> const& v) const {
result = MAP(segment.get_segment_manager());
for (auto& map = boost::get<MAP>(result); auto& [k, v] : v)
BipConvert(v, map.emplace(k).first->second);
}
void operator()(std::vector<MOVar> const& v) const {
result = Vec(segment.get_segment_manager());
for (auto& vec = boost::get<Vec>(result); auto& item : v)
BipConvert(item, vec.emplace_back());
}
};
Vis{result}(v);
}
Also, not that randomly moving from mutable lvalue-refs is recipe for undefined behaviour bugs, e.g.:
MOVarBip(STR &b) { MOVarSTBIP::operator=(std::move(b)); }
The easiest way to fix that is to take by value, which does the right thing regardless of the value category of the argument. Regardless, if you intend to support different segments, you shouldn't move unless the allocators are equivalent.
Reversing the above observations, I observe that really you want to have a global static allocator that refers to the global segment. Something like:
template <typename T> using BaseAlloc = bip::allocator<T, bip::managed_shared_memory::segment_manager>;
template <typename T> struct SegmentAlloc : BaseAlloc<T> {
SegmentAlloc(bip::managed_shared_memory::segment_manager* mgr = segment->get_segment_manager())
: BaseAlloc<T>(mgr) {}
using BaseAlloc<T>::BaseAlloc;
template <typename U> struct rebind {
using other = SegmentAlloc<U>;
};
};
Now you can define your variable type:
struct MOVarBip;
using STR = bip::basic_string<char, std::char_traits<char>, SegmentAlloc<char>>;
using PAIR = std::pair<const STR, MOVarBip>;
using VEC = bip::vector<MOVarBip, SegmentAlloc<MOVarBip>>;
using MAP = bip::map<STR, MOVarBip, std::less<STR>, SegmentAlloc<PAIR>>;
static void json_dump(MAP const& data, std::string filename);
using MOVarSTBIP = boost::variant<std::nullptr_t, int64_t, double, STR, VEC, MAP>;
struct MOVarBip : MOVarSTBIP {
using Base = MOVarSTBIP;
template <typename... Args> explicit MOVarBip(Args&&... args) : Base(std::forward<Args>(args)...) {}
using Base::operator=;
explicit MOVarBip(std::string_view s) : Base(STR(s.data(), s.size())) {}
explicit MOVarBip(std::string s) : Base(STR(s.data(), s.size())) {}
bool is_map() const { return which() == TYPE_MSG::MAP_; }
bool is_int64() const { return which() == TYPE_MSG::INT64_; }
bool is_nill() const { return which() == TYPE_MSG::NULLPTR_; }
bool is_double() const { return which() == TYPE_MSG::DOUBLE_; }
bool is_string() const { return which() == TYPE_MSG::STRING_; }
bool is_vector() const { return which() == TYPE_MSG::VECTOR_; }
double const& _as_double() const { return boost::get<double>(*this); }
int64_t const& _as_int64() const { return boost::get<int64_t>(*this); }
STR const& _as_string() const { return boost::get<STR>(*this); }
VEC const& _as_vector() const { return boost::get<VEC>(*this); }
MAP const& _as_map() const { return boost::get<MAP>(*this); }
double & _as_double() { return boost::get<double>(*this); }
int64_t & _as_int64() { return boost::get<int64_t>(*this); }
STR & _as_string() { return boost::get<STR>(*this); }
VEC & _as_vector() { return boost::get<VEC>(*this); }
MAP & _as_map() { return boost::get<MAP>(*this); }
};
I always prefer non-intrusive msgpack adaptation regardless, and here it is in all its glory:
namespace msgpack {
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
namespace adaptor {
template <> struct convert<STR> {
msgpack::object const& operator()(msgpack::object const& o, STR& v) const {
switch (o.type) {
case msgpack::type::BIN: v.assign(o.via.bin.ptr, o.via.bin.size); break;
case msgpack::type::STR: v.assign(o.via.str.ptr, o.via.str.size); break;
default: throw msgpack::type_error(); break;
}
return o;
}
};
template <> struct convert<MOVarBip> {
msgpack::object const& operator()(msgpack::object const& o, MOVarBip& v) const {
switch (o.type) {
case msgpack::type::NIL: v = MOVarBip(); break;
case msgpack::type::BOOLEAN: v = static_cast<int64_t>(o.as<bool>()); break;
case msgpack::type::POSITIVE_INTEGER:
if (uint64_t temp = o.as<uint64_t>(); temp > 0x7FFFFFFFFFFFFFFF)
v = MOVarBip(std::to_string(temp));
else
v = static_cast<int64_t>(temp);
break;
case msgpack::type::NEGATIVE_INTEGER: v = o.as<int64_t>(); break;
case msgpack::type::FLOAT32:
case msgpack::type::FLOAT64: v = o.as<double>(); break;
case msgpack::type::STR:
case msgpack::type::BIN: v = o.as<STR>(); break;
case msgpack::type::ARRAY: v = o.as<VEC>(); break;
case msgpack::type::MAP: v = o.as<MAP>(); break;
case msgpack::type::EXT: throw msgpack::type_error(); break;
}
return o;
}
};
template <> struct convert<MAP> {
msgpack::object const& operator()(msgpack::object const& o, MAP& m) const {
if (o.type != msgpack::type::MAP)
throw msgpack::type_error();
m.clear();
for (auto p = o.via.map.ptr, pend = p + o.via.map.size; p != pend; ++p) {
auto [it, uniqueOk] = m.emplace(p->key.as<STR>(), MOVarBip{});
assert(uniqueOk);
p->val.convert(it->second);
}
return o;
}
};
template <> struct convert<VEC> {
msgpack::object const& operator()(msgpack::object const& o, VEC& v) const {
if (o.type != msgpack::type::ARRAY)
throw msgpack::type_error();
for (auto p = o.via.array.ptr, pend = p + o.via.array.size; p != pend; ++p)
p->convert(v.emplace_back());
return o;
}
};
} // namespace adaptor
}
} // namespace msgpack
I assume it can be more optimized, but this is a good start at simplification, I hope.
Live On Compiler Explorer
// #define MSGPACK_USE_BOOST
#include <iostream>
#include <msgpack.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/containers/map.hpp>
#include <boost/interprocess/containers/string.hpp>
#include <boost/interprocess/containers/vector.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/variant.hpp>
using namespace std::literals;
namespace bip = boost::interprocess;
enum /*class*/ TYPE_MSG : int { NULLPTR_, INT64_, DOUBLE_, STRING_, VECTOR_, MAP_ };
static auto sharedMemoryName = "MySharedMemory"s;
static auto shmSize = 9.8 * 1024 * 1024 * 1024ull;
static auto segment = std::make_unique<bip::managed_shared_memory>(//
bip::open_or_create, sharedMemoryName.data(), shmSize);
template <typename T> using BaseAlloc = bip::allocator<T, bip::managed_shared_memory::segment_manager>;
template <typename T> struct SegmentAlloc : BaseAlloc<T> {
SegmentAlloc(bip::managed_shared_memory::segment_manager* mgr = segment->get_segment_manager())
: BaseAlloc<T>(mgr) {}
using BaseAlloc<T>::BaseAlloc;
template <typename U> struct rebind {
using other = SegmentAlloc<U>;
};
};
struct MOVarBip;
using STR = bip::basic_string<char, std::char_traits<char>, SegmentAlloc<char>>;
using PAIR = std::pair<const STR, MOVarBip>;
using VEC = bip::vector<MOVarBip, SegmentAlloc<MOVarBip>>;
using MAP = bip::map<STR, MOVarBip, std::less<STR>, SegmentAlloc<PAIR>>;
static void json_dump(MAP const& data, std::string filename);
using MOVarSTBIP = boost::variant<std::nullptr_t, int64_t, double, STR, VEC, MAP>;
struct MOVarBip : MOVarSTBIP {
using Base = MOVarSTBIP;
template <typename... Args> explicit MOVarBip(Args&&... args) : Base(std::forward<Args>(args)...) {}
using Base::operator=;
explicit MOVarBip(std::string_view s) : Base(STR(s.data(), s.size())) {}
explicit MOVarBip(std::string s) : Base(STR(s.data(), s.size())) {}
bool is_map() const { return which() == TYPE_MSG::MAP_; }
bool is_int64() const { return which() == TYPE_MSG::INT64_; }
bool is_nill() const { return which() == TYPE_MSG::NULLPTR_; }
bool is_double() const { return which() == TYPE_MSG::DOUBLE_; }
bool is_string() const { return which() == TYPE_MSG::STRING_; }
bool is_vector() const { return which() == TYPE_MSG::VECTOR_; }
double const& _as_double() const { return boost::get<double>(*this); }
int64_t const& _as_int64() const { return boost::get<int64_t>(*this); }
STR const& _as_string() const { return boost::get<STR>(*this); }
VEC const& _as_vector() const { return boost::get<VEC>(*this); }
MAP const& _as_map() const { return boost::get<MAP>(*this); }
double & _as_double() { return boost::get<double>(*this); }
int64_t & _as_int64() { return boost::get<int64_t>(*this); }
STR & _as_string() { return boost::get<STR>(*this); }
VEC & _as_vector() { return boost::get<VEC>(*this); }
MAP & _as_map() { return boost::get<MAP>(*this); }
};
namespace msgpack {
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
namespace adaptor {
template <> struct convert<STR> {
msgpack::object const& operator()(msgpack::object const& o, STR& v) const {
switch (o.type) {
case msgpack::type::BIN: v.assign(o.via.bin.ptr, o.via.bin.size); break;
case msgpack::type::STR: v.assign(o.via.str.ptr, o.via.str.size); break;
default: throw msgpack::type_error(); break;
}
return o;
}
};
template <> struct convert<MOVarBip> {
msgpack::object const& operator()(msgpack::object const& o, MOVarBip& v) const {
switch (o.type) {
case msgpack::type::NIL: v = MOVarBip(); break;
case msgpack::type::BOOLEAN: v = static_cast<int64_t>(o.as<bool>()); break;
case msgpack::type::POSITIVE_INTEGER:
if (uint64_t temp = o.as<uint64_t>(); temp > 0x7FFFFFFFFFFFFFFF)
v = MOVarBip(std::to_string(temp));
else
v = static_cast<int64_t>(temp);
break;
case msgpack::type::NEGATIVE_INTEGER: v = o.as<int64_t>(); break;
case msgpack::type::FLOAT32:
case msgpack::type::FLOAT64: v = o.as<double>(); break;
case msgpack::type::STR:
case msgpack::type::BIN: v = o.as<STR>(); break;
case msgpack::type::ARRAY: v = o.as<VEC>(); break;
case msgpack::type::MAP: v = o.as<MAP>(); break;
case msgpack::type::EXT: throw msgpack::type_error(); break;
}
return o;
}
};
template <> struct convert<MAP> {
msgpack::object const& operator()(msgpack::object const& o, MAP& m) const {
if (o.type != msgpack::type::MAP)
throw msgpack::type_error();
m.clear();
for (auto p = o.via.map.ptr, pend = p + o.via.map.size; p != pend; ++p) {
auto [it, uniqueOk] = m.emplace(p->key.as<STR>(), MOVarBip{});
assert(uniqueOk);
p->val.convert(it->second);
}
return o;
}
};
template <> struct convert<VEC> {
msgpack::object const& operator()(msgpack::object const& o, VEC& v) const {
if (o.type != msgpack::type::ARRAY)
throw msgpack::type_error();
for (auto p = o.via.array.ptr, pend = p + o.via.array.size; p != pend; ++p)
p->convert(v.emplace_back());
return o;
}
};
} // namespace adaptor
}
} // namespace msgpack
#include <chrono>
#include <fstream>
static constexpr auto now = std::chrono::high_resolution_clock::now;
static void timed_load(std::string filename, MAP& into) {
auto startTime = now();
std::ifstream file(filename, std::ios::binary);
std::vector<char> const buffer(std::istreambuf_iterator<char>(file), {});
msgpack::object_handle oh = msgpack::unpack(buffer.data(), buffer.size());
msgpack::object const& deserialized = oh.get();
deserialized.convert(into);
std::cout << "Duration: " << (now() - startTime) / 1ms << "ms" << std::endl;
}
int main() {
{
auto& bip_map = *segment->find_or_construct<MAP>("bip_map")();
timed_load("big_map.msgpack", bip_map);
json_dump(bip_map, "big_map.json");
}
segment.reset(); // close before remove
bip::shared_memory_object::remove(sharedMemoryName.c_str());
}
#include <boost/json.hpp>
void json_dump(MAP const& data, std::string filename) {
struct Vis {
using value = boost::json::value;
value operator()(MOVarBip const& var) const { return boost::apply_visitor(*this, var); }
value operator()(std::nullptr_t) const { return nullptr; }
value operator()(int64_t i) const { return i; }
value operator()(double d) const { return d; }
value operator()(STR const& s) const { return s.c_str(); }
value operator()(VEC const& v) const {
boost::json::array arr;
for (auto& el : v)
arr.push_back((*this)(el));
return arr;
}
value operator()(MAP const& m) const {
boost::json::object obj;
for (auto& [k, v] : m)
obj[k.c_str()] = (*this)(v);
return obj;
}
};
std::ofstream(filename) << Vis{}(data);
}
I've confirmed it clean under ASAN/UBSAN as well as leak-free. As you can see I threw in a json dump just to verify that the data is accessible and valid.
On my machine:
I imagine that this could be a good basis for further optimizing e.g. with reserve()
as you already found.
I found some time to optimize further using optimized containers. Using flat_map and small_vector allows us to reduce allocations for small number of elements, which are pretty prevalent (most prevalent vector size is one, most maps are <10 and many are 4 elements).
Here it is, with some modifications to allow STR
to be replaced with a small-vector optimized type as well, and adding the adaptor for flat_map
.
// #define MSGPACK_USE_BOOST
#include <iostream>
#include <msgpack.hpp>
#include <boost/container/flat_map.hpp>
#include <boost/container/small_vector.hpp>
#include <boost/container/string.hpp>
#include <boost/interprocess/allocators/allocator.hpp>
#include <boost/interprocess/managed_shared_memory.hpp>
#include <boost/variant.hpp>
#include <utility>
using namespace std::literals;
namespace bip = boost::interprocess;
enum /*class*/ TYPE_MSG : int { NULLPTR_, INT64_, DOUBLE_, STRING_, VECTOR_, MAP_ };
static auto sharedMemoryName = "MySharedMemory"s;
static auto shmSize = 9.8 * 1024 * 1024 * 1024ull;
static auto segment = std::make_unique<bip::managed_shared_memory>(//
bip::open_or_create, sharedMemoryName.data(), shmSize);
template <typename T> using BaseAlloc = bip::allocator<T, bip::managed_shared_memory::segment_manager>;
template <typename T> struct SegmentAlloc : BaseAlloc<T> {
SegmentAlloc(bip::managed_shared_memory::segment_manager* mgr = segment->get_segment_manager())
: BaseAlloc<T>(mgr) {}
using BaseAlloc<T>::BaseAlloc;
template <typename U> struct rebind {
using other = SegmentAlloc<U>;
};
};
struct MOVarBip;
template <typename T, size_t N = 4> using SmallVec = boost::container::small_vector<T, N, SegmentAlloc<T>>;
template <typename K, typename V, typename Cmp = std::less<K>, typename P = std::pair<K, V>>
using SmallMap = boost::container::flat_map<K, V, Cmp, SmallVec<P>>;
// using SmallStr = boost::container::basic_string<char, std::char_traits<char>, SegmentAlloc<char>>;
using SmallStr = SmallVec<char, 32>; // many strings are 32-characters digests
using VarVec = SmallVec<MOVarBip>;
using VarMap = SmallMap<SmallStr, MOVarBip>;
using PVEC = std::unique_ptr<VarVec>;
using PMAP = std::unique_ptr<VarMap>;
static void json_dump(VarMap const& data, std::string filename);
using MOVarSTBIP = boost::variant<std::nullptr_t, int64_t, double, SmallStr, PVEC, PMAP>;
struct MOVarBip : MOVarSTBIP {
using Base = MOVarSTBIP;
template <typename... Args> explicit MOVarBip(Args&&... args) : Base(std::forward<Args>(args)...) {}
using Base::operator=;
explicit MOVarBip(std::string_view s) : Base(SmallStr(s.begin(), s.end())) {}
explicit MOVarBip(std::string s) : Base(SmallStr(s.begin(), s.end())) {}
bool is_map() const { return which() == TYPE_MSG::MAP_; }
bool is_int64() const { return which() == TYPE_MSG::INT64_; }
bool is_nill() const { return which() == TYPE_MSG::NULLPTR_; }
bool is_double() const { return which() == TYPE_MSG::DOUBLE_; }
bool is_string() const { return which() == TYPE_MSG::STRING_; }
bool is_vector() const { return which() == TYPE_MSG::VECTOR_; }
double const& _as_double() const { return boost::get<double>(*this); }
int64_t const& _as_int64() const { return boost::get<int64_t>(*this); }
SmallStr const& _as_string() const { return boost::get<SmallStr>(*this); }
VarVec const& _as_vector() const { return *boost::get<PVEC>(*this); }
VarMap const& _as_map() const { return *boost::get<PMAP>(*this); }
double& _as_double() { return boost::get<double>(*this); }
int64_t& _as_int64() { return boost::get<int64_t>(*this); }
SmallStr& _as_string() { return boost::get<SmallStr>(*this); }
VarVec& _as_vector() { return *boost::get<PVEC>(*this); }
VarMap& _as_map() { return *boost::get<PMAP>(*this); }
};
namespace msgpack {
MSGPACK_API_VERSION_NAMESPACE(MSGPACK_DEFAULT_API_NS) {
namespace adaptor {
template <> struct convert<SmallStr> {
msgpack::object const& operator()(msgpack::object const& o, SmallStr& v) const {
switch (o.type) {
case msgpack::type::BIN:
v.assign(o.via.bin.ptr, o.via.bin.ptr + o.via.bin.size);
break;
case msgpack::type::STR:
v.assign(o.via.str.ptr, o.via.str.ptr + o.via.str.size);
break;
default: throw msgpack::type_error(); break;
}
return o;
}
};
template <> struct convert<MOVarBip> {
msgpack::object const& operator()(msgpack::object const& o, MOVarBip& v) const {
switch (o.type) {
case msgpack::type::NIL: v = MOVarBip(); break;
case msgpack::type::BOOLEAN: v = static_cast<int64_t>(o.as<bool>()); break;
case msgpack::type::POSITIVE_INTEGER:
if (uint64_t temp = o.as<uint64_t>(); temp > 0x7FFFFFFFFFFFFFFF)
v = MOVarBip(std::to_string(temp));
else
v = static_cast<int64_t>(temp);
break;
case msgpack::type::NEGATIVE_INTEGER: v = o.as<int64_t>(); break;
case msgpack::type::FLOAT32:
case msgpack::type::FLOAT64: v = o.as<double>(); break;
case msgpack::type::STR:
case msgpack::type::BIN: v = o.as<SmallStr>(); break;
case msgpack::type::ARRAY: v = o.as<PVEC>(); break;
case msgpack::type::MAP: v = o.as<PMAP>(); break;
case msgpack::type::EXT: throw msgpack::type_error(); break;
}
return o;
}
};
template <> struct convert<PMAP> {
msgpack::object const& operator()(msgpack::object const& o, PMAP& pm) const {
if (o.type != msgpack::type::MAP)
throw msgpack::type_error();
if (pm)
pm->clear();
else
pm = std::make_unique<VarMap>();
pm->reserve(o.via.map.size);
for (auto p = o.via.map.ptr, pend = p + o.via.map.size; p != pend; ++p) {
auto [it, uniqueOk] = pm->emplace(p->key.as<SmallStr>(), MOVarBip{});
assert(uniqueOk);
p->val.convert(it->second);
}
return o;
}
};
template <> struct convert<PVEC> {
msgpack::object const& operator()(msgpack::object const& o, PVEC& pv) const {
if (o.type != msgpack::type::ARRAY)
throw msgpack::type_error();
if (pv)
pv->clear();
else
pv = std::make_unique<VarVec>();
pv->reserve(o.via.array.size);
for (auto p = o.via.array.ptr, pend = p + o.via.array.size; p != pend; ++p)
p->convert(pv->emplace_back());
return o;
}
};
} // namespace adaptor
namespace adaptor { // adapt for boost::container::flat_map
template <typename K, typename V, typename Compare, typename Alloc>
struct convert<boost::container::flat_map<K, V, Compare, Alloc>> {
msgpack::object const& operator()(msgpack::object const& o,
boost::container::flat_map<K, V, Compare, Alloc>& v) const {
if (o.type != msgpack::type::MAP) throw msgpack::type_error();
boost::container::flat_map<K, V, Compare, Alloc> tmp;
// tmp.reserve(o.via.map.size); // seems to slow it down ever so slightly
for (auto p = o.via.map.ptr, pend = p + o.via.map.size; p != pend; ++p)
tmp.emplace(p->key.as<K>(), p->val.as<V>());
v = std::move(tmp);
return o;
}
};
} // namespace adaptor
}
} // namespace msgpack
#include <chrono>
#include <fstream>
static constexpr auto now = std::chrono::high_resolution_clock::now;
static void timed_load(std::string filename, VarMap& into) {
auto startTime = now();
std::ifstream file(filename, std::ios::binary);
std::vector<char> const buffer(std::istreambuf_iterator<char>(file), {});
msgpack::object_handle oh = msgpack::unpack(buffer.data(), buffer.size());
msgpack::object const& deserialized = oh.get();
deserialized.convert(into);
std::cout << "Duration: " << (now() - startTime) / 1ms << "ms" << std::endl;
}
int main() {
{
auto& bip_map = *segment->find_or_construct<VarMap>("bip_map")();
timed_load("big_map.msgpack", bip_map);
json_dump(bip_map, "big_map.json");
}
segment.reset(); // close before remove
bip::shared_memory_object::remove(sharedMemoryName.c_str());
}
#include <boost/json.hpp>
void json_dump(VarMap const& data, std::string filename) {
struct Vis {
using value = boost::json::value;
value operator()(MOVarBip const& var) const { return boost::apply_visitor(*this, var); }
value operator()(std::nullptr_t) const { return nullptr; }
value operator()(int64_t i) const { return i; }
value operator()(double d) const { return d; }
value operator()(SmallStr const& s) const {
// std::cerr << "STR size is\t" << s.size() << "\n"; // manual profiling
return boost::json::string_view(s.data(), s.size());
}
value operator()(VarVec const& v) const {
// std::cerr << "VarVec size is\t" << v.size() << "\n"; // manual profiling
boost::json::array arr;
for (auto& el : v)
arr.push_back((*this)(el));
return arr;
}
value operator()(VarMap const& m) const {
// std::cerr << "VarMap size is\t" << m.size() << "\n"; // manual profiling
boost::json::object obj;
for (auto& [k, v] : m)
obj[boost::json::string_view(k.data(), k.size())] = (*this)(v);
return obj;
}
value operator()(PVEC const& p) const { return operator()(*p); }
value operator()(PMAP const& p) const { return operator()(*p); }
};
std::ofstream(filename) << Vis{}(data);
}
With my local demo, note the difference when swichting the string representation:
// using SmallStr = boost::container::basic_string<char, std::char_traits<char>, SegmentAlloc<char>>;
using SmallStr = SmallVec<char, 32>; // many strings are 32-characters digests
It brings the time down from 10ms, which is not a lot slower than the 7ms of the original non-shared-memory version.