Search code examples
c++xmlboostboost-propertytreeboost-parameter

boost::property_tree : Parsing of Complex xml strucure


I am want to parse below xml structure using boost property_tree.

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Txn ver="1.0">
    <TOpts tCount="1"  tformat="0"  ttimeout="10" />
    <TData>
        <Tvalue date="YYMMDD" time="HHMM" Ref="100"/>
    </TData>
    <TCustOpts>
        <Param name="SALE" value="xyz" />
    </TCustOpts>
</Txn>

I am able to parse, first Topts field of above xml, But for TData & TCustOpts field, I am not getting right iteration and approach to parse the xml and facing exception. Can someone provide me right approach for TData & TCustOpts field parsing. Below is my code for reference.

stringstream ssString;
boost::property_tree::ptree pt1;
ssString << xml;
boost::property_tree::read_xml(ssString, pt1);

string TxnVer = pt1.get<string>("Txn.<xmlattr>.ver");

boost::property_tree::ptree formats = pt1.get_child("Txn");
BOOST_FOREACH(boost::property_tree::ptree::value_type const& node, formats) {
    if (node.first == "TOpts") {
        const boost::property_tree::ptree & attributes = node.second.get_child("<xmlattr>");
        BOOST_FOREACH(boost::property_tree::ptree::value_type const& v, attributes) {
            if (v.first == "tCount") {
                std::cout << " tCount " << v.second.data() << endl;
            }
            else if (v.first == "tformat") {
                std::cout << " tformat" << v.second.data() << endl;
            }
            else if (v.first == "ttimeout") {
                std::cout << " ttimeout " << v.second.data() << endl;
            }
          }
    }
    else if (node.first == "TOpts")

    else if (node.first == "TCustOpts") {
        const boost::property_tree::ptree & attributes1 = node.second.get_child("<xmlattr>");
        BOOST_FOREACH(boost::property_tree::ptree::value_type const& s, attributes1) {
            if (s.first == "name"){
                std::cout << "name " << s.second.data() << endl;
            }
            else if (s.first == "value") {
                std::cout << "value " << s.second.data() << endl;
            }
        }
    }
}

Solution

  • Ok, the usual anti-pattern when using Property Tree to parse information is "loop frenzy".

    The whole idea of storing key-value pairs in a tree format is to avoid having to loop low-level structures, instead using convenient addressing (using paths).

    Another anti-pattern is to have all the parsing in one big function. I'd split things up.

    Define Some Data Types

    Let's start with defining some data-types to keep our data manageable:

    namespace Domain {
        struct TOpts {
            size_t count;
            std::string format;
            size_t timeout ;
        };
    
        struct TData {
            std::string date; // YYMMD
            std::string time; // HHMM
            size_t ref;
        };
    
        struct TCustOpts {
            std::multimap<std::string, std::string> params;
        };
    
        struct Txn {
            std::string version;
            TOpts opts;
            TData data;
            TCustOpts custom_opts;
        };
    }
    

    This is our make-shift "Domain Layer".

    Let's Parse!

    So, here's how I'd write the parsing code:

    namespace Parsing {
        // concrete parse functions
        void parse(Domain::TOpts& v, ptree const& pt) {
            v.count   = pt.get("<xmlattr>.tCount", 0);
            v.format  = pt.get("<xmlattr>.tformat", "0");
            v.timeout = pt.get("<xmlattr>.ttimeout", 0);
        }
    
        void parse(Domain::TData& v, ptree const& pt) {
            v.date = pt.get("Tvalue.<xmlattr>.date", "YYMMDD");
            v.time = pt.get("Tvalue.<xmlattr>.time", "HHMM");
            v.ref  = pt.get("Tvalue.<xmlattr>.Ref", 0);
        }
    
        void parse(Domain::TCustOpts& v, ptree const& pt) {
            for (auto& param : pt) {
                if (param.first != "Param")
                    continue;
    
                v.params.emplace(
                    param.second.get("<xmlattr>.name", "(anon)"),
                    param.second.get("<xmlattr>.value", ""));
            }
        }
    
        // make any parse helper available optionally
        template <typename T>
        void parse_optional(T& v, boost::optional<ptree const&> pt) {
            if (pt) parse(v, *pt);
        }
    
        void parse(Domain::Txn& v, ptree const& pt) {
            v.version = pt.get("<xmlattr>.ver", "0.0");
            parse_optional(v.opts,        pt.get_child_optional("TOpts"));
            parse_optional(v.data,        pt.get_child_optional("TData"));
            parse_optional(v.custom_opts, pt.get_child_optional("TCustOpts"));
        }
    }
    

    The only not-so-straight-forward thing is parse_optional to deal with subtrees that might be absent.

    Using it:

    int main() {
        boost::property_tree::ptree pt;
        {
            extern char const* xml;
            std::stringstream ss(xml);
            read_xml(ss, pt);
        }
    
        Domain::Txn transaction;
        Parsing::parse(transaction, pt.get_child("Txn"));
    
        std::cout << transaction; // complete roundtrip
    }
    

    BONUS: Roundtrip

    Let's also save the same "Domain" classes back to a property tree, so we can verify it works:

    namespace Writing { // for DEBUG/demo only
        void serialize(Domain::TOpts const& v, ptree& pt) {
            pt.put("<xmlattr>.tCount", v.count);
            pt.put("<xmlattr>.tformat", v.format);
            pt.put("<xmlattr>.ttimeout", v.timeout);
        }
    
        void serialize(Domain::TData const& v, ptree& pt) {
            pt.put("Tvalue.<xmlattr>.date", v.date);
            pt.put("Tvalue.<xmlattr>.time", v.time);
            pt.put("Tvalue.<xmlattr>.Ref", v.ref);
        }
    
        void serialize(Domain::TCustOpts const& v, ptree& pt) {
            for (auto& param : v.params) {
                auto& p = pt.add_child("Param", ptree{});
                p.put("<xmlattr>.name", param.first);
                p.put("<xmlattr>.value", param.second);
            }
        }
    
        void serialize(Domain::Txn const& v, ptree& pt) {
            auto& txn = pt.add_child("Txn", ptree{});
            txn.put("<xmlattr>.ver", v.version);
            serialize(v.opts,        txn.add_child("TOpts", ptree{}));
            serialize(v.data,        txn.add_child("TData", ptree{}));
            serialize(v.custom_opts, txn.add_child("TCustOpts", ptree{}));
        }
    }
    

    FULL DEMO

    This demo shows your original XML parsed and serialized back:

    Live On Coliru

    #include <boost/property_tree/xml_parser.hpp>
    #include <iostream>
    #include <map>
    
    using boost::property_tree::ptree;
    
    namespace Domain {
        struct TOpts {
            size_t count;
            std::string format;
            size_t timeout ;
        };
    
        struct TData {
            std::string date; // YYMMD
            std::string time; // HHMM
            size_t ref;
        };
    
        struct TCustOpts {
            std::multimap<std::string, std::string> params;
        };
    
        struct Txn {
            std::string version;
            TOpts opts;
            TData data;
            TCustOpts custom_opts;
        };
    }
    
    namespace Parsing {
        // concrete parse functions
        void parse(Domain::TOpts& v, ptree const& pt) {
            v.count   = pt.get("<xmlattr>.tCount", 0);
            v.format  = pt.get("<xmlattr>.tformat", "0");
            v.timeout = pt.get("<xmlattr>.ttimeout", 0);
        }
    
        void parse(Domain::TData& v, ptree const& pt) {
            v.date = pt.get("Tvalue.<xmlattr>.date", "YYMMDD");
            v.time = pt.get("Tvalue.<xmlattr>.time", "HHMM");
            v.ref  = pt.get("Tvalue.<xmlattr>.Ref", 0);
        }
    
        void parse(Domain::TCustOpts& v, ptree const& pt) {
            for (auto& param : pt) {
                if (param.first != "Param")
                    continue;
    
                v.params.emplace(
                    param.second.get("<xmlattr>.name", "(anon)"),
                    param.second.get("<xmlattr>.value", ""));
            }
        }
    
        // make any parse helper available optionally
        template <typename T>
        void parse_optional(T& v, boost::optional<ptree const&> pt) {
            if (pt) parse(v, *pt);
        }
    
        void parse(Domain::Txn& v, ptree const& pt) {
            v.version = pt.get("<xmlattr>.ver", "0.0");
            parse_optional(v.opts,        pt.get_child_optional("TOpts"));
            parse_optional(v.data,        pt.get_child_optional("TData"));
            parse_optional(v.custom_opts, pt.get_child_optional("TCustOpts"));
        }
    }
    
    namespace Writing { // for DEBUG/demo only
        void serialize(Domain::TOpts const& v, ptree& pt) {
            pt.put("<xmlattr>.tCount", v.count);
            pt.put("<xmlattr>.tformat", v.format);
            pt.put("<xmlattr>.ttimeout", v.timeout);
        }
    
        void serialize(Domain::TData const& v, ptree& pt) {
            pt.put("Tvalue.<xmlattr>.date", v.date);
            pt.put("Tvalue.<xmlattr>.time", v.time);
            pt.put("Tvalue.<xmlattr>.Ref", v.ref);
        }
    
        void serialize(Domain::TCustOpts const& v, ptree& pt) {
            for (auto& param : v.params) {
                auto& p = pt.add_child("Param", ptree{});
                p.put("<xmlattr>.name", param.first);
                p.put("<xmlattr>.value", param.second);
            }
        }
    
        void serialize(Domain::Txn const& v, ptree& pt) {
            auto& txn = pt.add_child("Txn", ptree{});
            txn.put("<xmlattr>.ver", v.version);
            serialize(v.opts,        txn.add_child("TOpts", ptree{}));
            serialize(v.data,        txn.add_child("TData", ptree{}));
            serialize(v.custom_opts, txn.add_child("TCustOpts", ptree{}));
        }
    }
    
    namespace { // for debug/demo only
        std::ostream& operator<<(std::ostream& os, Domain::Txn const& v) {
            ptree tmp;
            Writing::serialize(v, tmp);
            write_xml(os, tmp, boost::property_tree::xml_writer_make_settings<std::string>(' ', 4));
            return os;
        }
    }
    
    int main() {
        boost::property_tree::ptree pt;
        {
            extern char const* xml;
            std::stringstream ss(xml);
            read_xml(ss, pt);
        }
    
        Domain::Txn transaction;
        Parsing::parse(transaction, pt.get_child("Txn"));
    
        std::cout << transaction; // complete roundtrip
    }
    
    char const* xml = R"(<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
    <Txn ver="1.0">
        <TOpts tCount="1"  tformat="0"  ttimeout="10" />
        <TData>
            <Tvalue date="YYMMDD" time="HHMM" Ref="100"/>
        </TData>
        <TCustOpts>
            <Param name="SALE" value="xyz" />
        </TCustOpts>
    </Txn>
    )";
    

    Which prints:

    <?xml version="1.0" encoding="utf-8"?>
    <Txn ver="1.0">
        <TOpts tCount="1" tformat="0" ttimeout="10"/>
        <TData>
            <Tvalue date="YYMMDD" time="HHMM"/>
        </TData>
        <TCustOpts>
            <Param name="SALE" value="xyz"/>
        </TCustOpts>
    </Txn>