Search code examples
c++xmlparsingboostboost-propertytree

Parse XML with Boost access and populate property tree


I have XML file:

<expressions>
    <addition id="1">
        <item>2</item>
        <item>3</item>
        <item>4</item>
    </addition>
    <subtraction id="2">
        <minuend>3</minuend>
        <subtrahend>2</subtrahend>
    </subtraction>
    <multiplication id="3">
        <factor>5</factor>
        <factor>6</factor>
        <factor>8</factor>
    </multiplication>
    <division id="4">
        <dividend>54</dividend>
        <divisor>9</divisor> 
    </division>
</expressions>

Need to parse it and provide the result like another XML:

<expressions>
    <result id="1">9</result>
    <result id="2">1</result>
    <result id="3">240</result>
    <result id="4">6</result>
</expressions>

Currently I'm investigating the BOOST in area of ptree and read_xml Please advise where additional information could be found? Thanks in advance

My current investigation results are:

I have the class which defines Expression with virtual function which evaluates expression, lower on inheritance tree this function should be overridden and implemented in context of expression type.

class AbstractExpression
{
public:
    AbstractExpression(ExpressionType aType){}
    virtual ~AbstractExpression() {}

    // Evaluates expression (must be overrided by child)
    virtual int  evalExpr() const = 0;

};

Inherited classes are additionExpression, substractionExpression, multiplicationExpression, divisionExpression.

Besides this I have implemented methods of data manipulation in every inherited class.

At the end I've wrote some code, which counts this XML:

using boost::property_tree::ptree;
    ptree pt;
    read_xml("/PATH_TO/valuator.xml", pt);
    const ptree & expressions = pt.get_child("expressions");
    BOOST_FOREACH(const ptree::value_type & currExpr, expressions){
        std::string readExprType = currExpr.first;
        std::cout << "currExpr = " << readExprType << std::endl;

        if (readExprType == "addition") {
            AbstractExpression *addExpr = new additionExpression();
            BOOST_FOREACH(const ptree::value_type & additionNodeEl, currExpr.second){
                std::string val = additionNodeEl.second.data();
                ((additionExpression*)addExpr)->addVal( atoi(val.c_str()) );

            }
            std::cout << "addition result = " << addExpr->evalExpr() << std::endl;
            delete addExpr;
        } else if (readExprType == "multiplication") {
            AbstractExpression *multExpr = new multiplicationExpression();
            BOOST_FOREACH(const ptree::value_type &multNodeEl, currExpr.second) {
                std::string val = multNodeEl.second.data();
                if (!val.empty())
                    ((multiplicationExpression*)multExpr)->addVal( atoi(val.c_str()) );
            }
            std::cout << "multiplication node result = " << multExpr->evalExpr() << std::endl;
            delete multExpr;
        } else if (readExprType == "subtraction") {
            AbstractExpression *substrExpr = new substractionExpression();
            BOOST_FOREACH(const ptree::value_type &substNodeEl, currExpr.second) {
                std::string elTypeName = substNodeEl.first;
                std::string val = substNodeEl.second.data();
                if (elTypeName == "minuend") {
                    ((substractionExpression*)substrExpr)->setMinuend( atoi(val.c_str()) );
                } else if (elTypeName == "subtrahend") {
                    ((substractionExpression*)substrExpr)->setSubtrahend( atoi(val.c_str()) );
                }
            }
            std::cout << "subtraction node result = " << substrExpr->evalExpr() << std::endl;
            delete substrExpr;
        } else if (readExprType == "division") {
            AbstractExpression *divExpr = new divisionExpression();
            BOOST_FOREACH(const ptree::value_type &divNodeEl, currExpr.second) {
                std::string elTypeName = divNodeEl.first;
                std::string val = divNodeEl.second.data();
                if ( elTypeName == "dividend" ) {
                    ((divisionExpression*)divExpr)->setDividend( atoi(val.c_str()) );
                } else if ( elTypeName == "divisor" ) {
                    ((divisionExpression*)divExpr)->setDivisor( atoi(val.c_str()) );
                }
            }
            std::cout << "dividend node result = " << divExpr->evalExpr() << std::endl;
            delete divExpr;
        }

    }

Now I need to write all these results to XML.


Solution

  • Really, use a proper XML library (TinyXML, RapidXML, PugiXML, libxml2 etc.).

    If you really care only about a very specific subset of XML, here's a quick & dirty parser based on Boost Spirit V2: Live On Coliru

    namespace /*parser*/
    {
        namespace qi = boost::spirit::qi;
    
        template <typename It, typename Skipper = qi::space_type>
            struct grammar : qi::grammar<It, ast::expressions(), Skipper>
        {
            grammar() : grammar::base_type(expressions_)
            {
                using namespace qi;
    
                static const lexeme_type L;
    
                simplevalue_ = auto_; // parses into Value, whatever it was declared as
                id_attr      = eps >> L["id"]      > '=' > '"' > int_        > '"';
                complex_attr = eps >> L["complex"] > '=' > '"' > *~char_('"') > '"';
                expr_open    = eps >> '<' >> L[lit(_r1)] > -id_attr > -complex_attr > '>';
                expr_close   = eps >> '<' >> '/' > L[lit(_r1)] > '>';
    
                // expression element parsing
                addition_       = expr_open(+"addition") > +subexpr_(+"item")                                  > expr_close(+"addition");
                subtraction_    = expr_open(+"subtraction") > (subexpr_(+"minuend") > subexpr_(+"subtrahend")) > expr_close(+"subtraction");
                multiplication_ = expr_open(+"multiplication") > +subexpr_(+"factor")                          > expr_close(+"multiplication");
                division_       = expr_open(+"division") > (subexpr_(+"dividend") > subexpr_(+"divisor"))      > expr_close(+"division");
                expression_     = simplevalue_ | addition_ | subtraction_ | multiplication_ | division_;
    
                subexpr_ = eps >> '<' >> L[lit(_r1)] > '>' > expression_ > '<' > '/' > L[lit(_r1)] > '>';
    
                expressions_ = eps
                    > '<' > L["expressions"] > '>'
                    > *expression_
                    > expr_close(+"expressions");
    
                BOOST_SPIRIT_DEBUG_NODES((simplevalue_)(expr_open)(expr_close)(subexpr_)(addition_)(subtraction_)(multiplication_)(division_)(expression_)(expressions_))
            }
          private:
            template <typename... T>
                using Rule = qi::rule<It, T..., qi::space_type>;
    
            // tags/primitives
            Rule<> complex_attr;
            Rule<int()> id_attr;
            Rule<ast::Value()> simplevalue_;
            Rule<ast::Id(std::string element_name)> expr_open;
            Rule<void(std::string element_name)> expr_close;
    
            Rule<ast::expression(std::string element_name )> subexpr_;
    
            // compounds
            Rule<ast::addition()>       addition_;
            Rule<ast::subtraction()>    subtraction_;
            Rule<ast::multiplication()> multiplication_;
            Rule<ast::division()>       division_;
            Rule<ast::expression()>     expression_;
    
            Rule<ast::expressions()> 
                expressions_;
        };
    
    } /*parser*/
    

    It parses the input into an abstract syntax tree that can be visited to evaluate the (sub) expressions. The test driver

    int main()
    {
        std::ifstream ifs("expressions.xml");
        It f(ifs >> std::noskipws), l;
    
        try {
            ast::expressions parsed;
    
            grammar<It> g;
            // parsing
            bool ok = phrase_parse(f, l, g, qi::space, parsed);
    
            // output results
            if (ok)
            {
                eval::machine machine;
    
                std::cout << "<expressions>\n";
                for (auto& expr : parsed)
                    if (get_id(expr))
                        std::cout << "\t<result id=\"" << *get_id(expr) << "\">" << machine.evaluate(expr) << "</result>\n";
                std::cout << "</expressions>\n";
            }
        } catch(qi::expectation_failure<It> const& e) {
            std::cout << "Expected " << e.what_ << " at '" << std::string(e.first,e.last) << "'\n";
        }
    }
    

    Prints

    <expressions>
        <result id="1">9</result>
        <result id="2">1</result>
        <result id="3">240</result>
        <result id="4">6</result>
    </expressions>
    

    Live On Coliru

    Note: doesn't handle comments, unicode, processing instructions, namespaces, PCDATA, character entity references etc. In short This is not an XML parser