Search code examples
c++serializationboostbinary

Correct boost binary serialize/deserialize with data from HEAP


I am using Boost library and want to save serialized objects to binary file and read it from there. I have different objects and I want to have one function to save data and another one function to read. For example I have 2 different objects (MyCustomData and MyOtherCustomData). I am trying to pass void pointer to both write and read functions. Save function saveData not works correct because data from HEAP not serialized. How to make it correct for both saveData and loadData?

My example code like this:

main.cpp

#include <iostream>
#include <fstream>

#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/serialization/binary_object.hpp>


// some data
struct MyInfo
{
    int x;
    char z[10];
};

class MyCustomData
{
protected:
    MyInfo* _data;
    std::string _str1;

public:
    MyCustomData(const std::string& s1, int x, const char* z)
        : _str1(s1)
    {
        _data = new MyInfo();
        _data->x = x;
        strcpy_s(_data->z, sizeof(_data->z), z);
    }
    
    virtual ~MyCustomData()
    {
        delete _data;
    }

    virtual std::string print()
    {
        std::string res(_str1);
        res += ", data: " + std::to_string(_data->x) + ", ";
        res.append(_data->z);
        return res;
    }
};

class MyOtherCustomData : public MyCustomData
{
    std::string _str2;

public:
    MyOtherCustomData(const std::string& s1, const std::string& s2, int x, const char* z)
        : MyCustomData(s2, x, z)
        , _str2(s1)
    {
    }

    ~MyOtherCustomData()
    {
    }

    std::string print() override
    {
        std::string res(_str2);
        res += ", " + _str1;
        res += ", data: " + std::to_string(_data->x) + ", ";
        res.append(_data->z);
        return res;
    }
};

// save serialized data to file
bool saveData(std::string path, void* data, std::size_t datasize, std::string& err)
{
    bool res = false;
    std::ofstream fout;

    fout.open(path, std::ios::trunc);
    if (!fout.is_open())
    {
        err = "can't open file " + path;
    }
    else
    {
        boost::archive::binary_oarchive archive(fout);
        archive << boost::serialization::make_binary_object(data, datasize);
        res = true;
    }

    fout.close();
    return res;
}

// load serialized data from file
bool loadData(std::string path, void* data, std::size_t datasize, std::string& err)
{
    bool res = false;
    std::ifstream fin;

    fin.open(path, std::ios::in | std::ios::out);
    if (!fin.is_open())
    {
        err = "can't open file " + path;
    }
    else
    {
        // How to make correct read???
        auto obj = boost::serialization::make_binary_object(data, datasize);
        boost::archive::binary_iarchive archive(fin);
        archive >> obj;
        
        res = true;
    }

    fin.close();
    return res;
}

int main()
{
    std::string err;

    // SAVE
    {
        MyCustomData obj1("my info 1", 1, "info1");
        MyOtherCustomData obj2("other", "my info 2", 3, "info2");

        // save to file
        if (!saveData("data1.dat", &obj1, sizeof(MyCustomData), err))
            std::cout << "save error: " << err.c_str() << std::endl;
        else
            std::cout << "saved: " << obj1.print().c_str() << std::endl;

        // save to file
        if (!saveData("data2.dat", &obj2, sizeof(MyOtherCustomData), err))
            std::cout << "save error: " << err.c_str() << std::endl;
        else
            std::cout << "saved: " << obj2.print().c_str() << std::endl;
    }

    // READ
    {
        MyCustomData obj3("temp 1", 10, "temp1");
        MyOtherCustomData obj4("other temp", "temp 2", 20, "temp2");

        // read from file
        if (!loadData("data1.dat", &obj3, sizeof(MyCustomData), err))
            std::cout << "load error: " << err.c_str() << std::endl;
        else
            std::cout << "loaded: " << obj3.print().c_str() << std::endl;

        // read from file
        if (!loadData("data2.dat", &obj4, sizeof(MyOtherCustomData), err))
            std::cout << "load error: " << err.c_str() << std::endl;
        else
            std::cout << "loaded: " << obj4.print().c_str() << std::endl;
    }
}

Solution

  • You're completely bypassing all the serialization support from Boost Serialization by dealing with your object as raw data (make_binary_object on a void*).

    That's UB because non of your types are trivial and bitwise-copyable. It's also useless, because you could just use ostream/istream directly and be vastly more efficient even if it could work.

    I have the strongest suspicion you have experience programming C, but not with C++. The above signs (not realizing C++ classes cannot be bitwise copied) and others (not correctly managing lifetime with constructors/destructors, superfluous .c_str() calls, non-const-correct print member functions (instead of operator<<) etc reinforce that impression.

    For that reason I'll sketch up your example in C++ style so you can compare notes. First off, use Boost Serialization to serialize your types!

    struct MyInfo {
        int  x;
        char z[10]; // TODO prefer `std::array` over C-style arrays
        
        template <typename Ar>
        void serialize(Ar& ar, unsigned /*version*/) {
            ar & x & boost::serialization::make_array(z, sizeof(z));
        }
    };
    

    Likewise, for the other types:

    class MyCustomData {
      protected:
        MyInfo*     _data = nullptr;
        std::string _str1;
    
        friend struct boost::serialization::access;
        template <typename Ar> void serialize(Ar& ar, unsigned) { ar& _data& _str1; }
    
    // ....
    
    class MyOtherCustomData : public MyCustomData {
        std::string _str2;
    
        friend struct boost::serialization::access;
        template <typename Ar> void serialize(Ar& ar, unsigned) {
            ar & boost::serialization::base_object<MyCustomData>(*this) & _str2;
        }
    

    Note how simple! With just that in place you can already simplify the save/load functions:

    template <typename T>
    bool saveData(std::string const& path, T const& data, std::string& err) {
        if (std::ofstream fout{path}) {
            boost::archive::binary_oarchive archive(fout);
            archive << data;
            return true;
        } else {
            err = "can't open file " + path;
            return false;
        }
    }
    
    template <typename T>
    bool loadData(std::string const& path, T& data, std::string& err) {
        if (std::ifstream fin{path}) {
            boost::archive::binary_iarchive archive(fin);
            archive >> data;
            return true;
        } else {
            err = "can't open file " + path;
            return false;
        }
    }
    

    Which are already far easier and safer to use: Live On Coliru

    {
        MyCustomData      obj1("my info 1", 1, "info1");
        MyOtherCustomData obj2("other", "my info 2", 3, "info2");
    
        saveData ("data1.dat", obj1, err);
        std::cout << "saved: " << quoted(err) << std::endl;
    
        saveData ("data2.dat", obj2, err);
        std::cout << "saved: " << quoted(err) << std::endl;
    }
    
    // READ
    {
        MyCustomData      obj3;
        MyOtherCustomData obj4;
    
        // read from file
        loadData("data1.dat", obj3, err);
        std::cout << "loaded: " << obj3 << " (" << quoted(err) << ")" << std::endl;
    
        // read from file
        loadData("data2.dat", obj4, err);
        std::cout << "loaded: " << obj4 << " (" << quoted(err) << ")" << std::endl;
    }
    

    Printing

    saved: ""
    saved: ""
    loaded: my info 1, data: 1, info1 ("")
    loaded: other, my info 2, data: 3, info2 ("")
    

    Fixing The Rest

    There are still problems!

    1. You seem to be treating the char[10] as a small string. In reality, std::string already has the allocation optimization (Meaning of acronym SSO in the context of std::string for details), so you should probably just use it.

      Right now, it incurs a whole bunch of problems, in that

      • you print it, just assuming there will be a NUL-character terminating the data in the first 10 bytes
      • you copy 10 bytes, regardless of the source. At best you get indeterminate data, with some luck you get UB again if you accidentally (indirectly) depend on the values. On some platforms reading beyond the bounds of the static literal data ("info1" for example) just terminates the program with a segment violation.
    2. Also, ironically, you seem to want to avoid the dynamic allocation associated with std::string, but you introduce one unncessarily for new MyInfo at the same time?

    3. The error-reporting looks like a prime situation to use exceptions. Indeed, you needn't raise them yourself, because Boost Serialization also uses exceptions:

      template <typename T> void saveData(std::string const& path, T const& data) {
          std::ofstream                   fout{path};
          boost::archive::binary_oarchive archive(fout);
          archive << data;
      }
      
      template <typename T> void loadData(std::string const& path, T& data) {
          std::ifstream                   fin{path};
          boost::archive::binary_iarchive archive(fin);
          archive >> data;
      }
      
    4. Your MyCustomData type is copyable, but when you do it will result in double-free of _data. Oops, just use Rule of Zero:

    Live On Coliru

    #include <cstdlib>
    #include <fstream>
    #include <iomanip>
    #include <iostream>
    
    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/serialization/access.hpp>
    #include <boost/serialization/array.hpp>
    #include <boost/serialization/unique_ptr.hpp>
    
    // some data
    struct MyInfo {
        int                  x;
        std::array<char, 10> z;
    
        template <typename Ar> void serialize(Ar& ar, unsigned /*version*/) { ar & x & z; }
    };
    
    class MyCustomData {
      protected:
        std::unique_ptr<MyInfo> _data = nullptr;
        std::string             _str1;
    
        friend struct boost::serialization::access;
        template <typename Ar> void serialize(Ar& ar, unsigned) { ar & _data & _str1; }
    
        friend std::ostream& operator<<(std::ostream& os, MyCustomData const& mcd) { return os << mcd.to_string(); }
    
      public:
        MyCustomData(std::string s1 = {}, int x = {}, std::string_view z = {})
            : _data(new MyInfo{x, {}})
            , _str1(std::move(s1)) //
        {
            auto n = std::min(z.size(), _data->z.size());
            std::copy_n(z.data(), n, _data->z.data());
        }
    
        virtual ~MyCustomData() = default;
    
        virtual std::string to_string() const {
            std::string res(_str1);
            res += ", data: " + std::to_string(_data->x) + ", ";
            auto safe_n = strnlen(_data->z.data(), _data->z.size());
            res.append(_data->z.data(), safe_n);
            return res;
        }
    };
    
    class MyOtherCustomData : public MyCustomData {
        std::string _str2;
    
        friend struct boost::serialization::access;
        template <typename Ar> void serialize(Ar& ar, unsigned) {
            ar & boost::serialization::base_object<MyCustomData>(*this) & _str2;
        }
    
      public:
        MyOtherCustomData(std::string const& a = {}, std::string const& b = {}, int x = {}, std::string_view z = {})
            : MyCustomData(b, x, z)
            , _str2(a) {}
    
        std::string to_string() const override {
            return _str2 + ", " + MyCustomData::to_string();
        }
    };
    
    template <typename T> void saveData(std::string const& path, T const& data) {
        std::ofstream                   fout{path};
        boost::archive::binary_oarchive archive(fout);
        archive << data;
    }
    
    template <typename T> void loadData(std::string const& path, T& data) {
        std::ifstream                   fin{path};
        boost::archive::binary_iarchive archive(fin);
        archive >> data;
    }
    
    int main()
    {
        std::string err;
    
        try {
            // SAVE
            {
                MyCustomData      obj1("my info 1", 1, "info1");
                MyOtherCustomData obj2("other", "my info 2", 3, "info2");
    
                saveData ("data1.dat", obj1);
                std::cout << "data1.data written\n";
    
                saveData ("data2.dat", obj2);
                std::cout << "data2.data written\n";
            }
    
            {
                MyCustomData      obj3;
                loadData("data1.dat", obj3);
                std::cout << "loaded: " << obj3 << std::endl;
            }
    
            {
                MyOtherCustomData obj4;
                loadData("data2.dat", obj4);
                std::cout << "loaded: " << obj4 << std::endl;
            }
        } catch(std::exception const& e) {
            std::cerr << "Error: " << e.what() << std::endl;
        }
    }
    

    Of course, leveraging C++'s standard library is a far better idea than rolling your own "SSO string" and adding unnecessary allocation in the process. Down to 92 LoC and having none of the issues previously existing (no more manual string bounds checking, copying etc):

    #include <boost/archive/binary_iarchive.hpp>
    #include <boost/archive/binary_oarchive.hpp>
    #include <boost/serialization/access.hpp>
    #include <fstream>
    #include <iostream>
    
    // some data
    struct MyInfo {
        int         x = 0;
        std::string z;
    
        template <typename Ar> void serialize(Ar& ar, unsigned /*version*/) { ar& x& z; }
    };
    
    class MyCustomData {
      protected:
        MyInfo      _data = {};
        std::string _str1;
    
        friend struct boost::serialization::access;
        template <typename Ar> void serialize(Ar& ar, unsigned) { ar & _data & _str1; }
    
      public:
        MyCustomData(std::string s1 = {}, int x = {}, std::string_view z = {})
            : _data{x, std::string{z}}
            , _str1(std::move(s1)) {}
    
        virtual ~MyCustomData() = default; // keep for deletion through base class!
        virtual std::string to_string() const {
            return _str1 + ", data: " + std::to_string(_data.x) + ", " + _data.z;
        }
    };
    
    class MyOtherCustomData : public MyCustomData {
        std::string _str2;
    
        friend struct boost::serialization::access;
        template <typename Ar> void serialize(Ar& ar, unsigned) {
            ar & boost::serialization::base_object<MyCustomData>(*this) & _str2;
        }
    
      public:
        MyOtherCustomData(std::string const& a = {}, std::string const& b = {}, int x = {}, std::string_view z = {})
            : MyCustomData(b, x, z)
            , _str2(a) {}
    
        std::string to_string() const override { return _str2 + ", " + MyCustomData::to_string(); }
    };
    
    template <typename T> void saveData(std::string const& path, T const& data) {
        std::ofstream                   fout{path};
        boost::archive::binary_oarchive archive(fout);
        archive << data;
    }
    
    template <typename T> void loadData(std::string const& path, T& data) {
        std::ifstream                   fin{path};
        boost::archive::binary_iarchive archive(fin);
        archive >> data;
    }
    
    static inline std::ostream& operator<<(std::ostream& os, MyCustomData const& mcd) {
        return os << mcd.to_string();
    }
    
    int main() try {
        {
            MyCustomData obj1("my info 1", 1, "info1");
            saveData("data1.dat", obj1);
            std::cout << "data1.data written\n";
        }
    
        {
            MyOtherCustomData obj2("other", "my info 2", 3, "info2");
            saveData("data2.dat", obj2);
            std::cout << "data2.data written\n";
        }
    
        {
            MyCustomData obj3;
            loadData("data1.dat", obj3);
            std::cout << "loaded: " << obj3 << std::endl;
        }
    
        {
            MyOtherCustomData obj4;
            loadData("data2.dat", obj4);
            std::cout << "loaded: " << obj4 << std::endl;
        }
    } catch (std::exception const& e) {
        std::cerr << "Error: " << e.what() << std::endl;
    }
    

    Still printing

    data1.data written
    data2.data written
    loaded: my info 1, data: 1, info1
    loaded: other, my info 2, data: 3, info2