Search code examples
c++c++14boost-spiritboost-phoenix

Why my Expression value lost from phoenix::bind function call?


I have a working parser to parse IF-ELSE statement.

//#define BOOST_SPIRIT_DEBUG 1
#include <boost/fusion/adapted.hpp>
#include <boost/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iomanip>
namespace qi = boost::spirit::qi;
namespace px = boost::phoenix;

namespace Ast {
    using boost::recursive_wrapper;

    template <typename> struct custom_string : std::char_traits<char> {};
    template <typename Tag>
    using String = std::basic_string<char, custom_string<Tag> >;

    using Identifier = String<struct TagId>;
    using Literal = String<struct TagLiteral>;
    using Variant = String<struct TagVariant>;
    using Word = String<struct TagWord>;
    using Obj = String<struct TagObj>;
    using BinOp = String<struct TagOp>;

    using Datatype = String<struct TagDatatype>;
    struct Base {
        Identifier id;
        Literal literal;
    };

    using Ids = std::vector<Identifier>;

    using Number = double;
    using Value = boost::variant<Literal, Number, Identifier, Variant>;

    // Expression block
    struct Bioperator;
    struct IEBlock;

    using Expression = boost::variant<Value, recursive_wrapper<Bioperator>>;

    struct Assign {
        Variant     var;
        Expression  value;
    };

    struct Bioperator {
        Bioperator(Expression l = {}, BinOp o = {}, Expression r = {}, bool g = false)
            : var(std::move(l))
            , op(std::move(o))
            , value(std::move(r))
            , group(std::move(g)) {}
        Expression var;
        BinOp      op;
        Expression value;
        bool group;
    };

    Expression set_group_flag(Expression e) {
        // If this is the Bioperator, set the group flag to true
        if (e.type() == typeid(Bioperator)) {
            auto bo = boost::get<Bioperator>(e);
            bo.group = true;
            e = bo;
        }
        return e;       // return the expression
    }

    using Statement = boost::make_recursive_variant<
        std::vector<boost::recursive_variant_>,
        Assign,
        recursive_wrapper<IEBlock>
    >::type;

    using Statements = std::vector<Statement>;

    struct IEBlock {
        Expression                 condition;
        Statement                  true_stmt;
        boost::optional<Statement> false_stmt;
    };

    using Task = std::vector<boost::variant<Statement>>;
} // namespace Ast

// Expressions
BOOST_FUSION_ADAPT_STRUCT(Ast::Assign, var, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::Bioperator, var, op, value);
BOOST_FUSION_ADAPT_STRUCT(Ast::IEBlock, condition, true_stmt, false_stmt);

namespace Parser {
    template <typename It> struct Task : qi::grammar<It, Ast::Task()> {
        Task() : Task::base_type(start) {
            using namespace qi;

            start = skip(space)[task_];

            // lexemes:
            id_ = raw[alpha >> *(alnum | '_' | ':')];
            literal_ = '"' > *('\\' >> char_ | ~char_('"')) > '"';
            variant_ = raw[(alpha|'_') >> *(alnum | '_')];

            auto optlit = copy(literal_ | attr(std::string(" ")));

            task_ = *task_item > eoi;
            task_item = statement_;
            value_ = literal_ | number_ | id_ | variant_;
            number_ = double_;

            // Expression
            statement_ = (assign_ | ifel_block_) > ';'; // expr_
            stmt_block_ = *statement_;
            
            assign_ = no_case["assign"] >> variant_ >> expr_;

            expr_ = boolterm_[_val = _1]
                >> *(boolop_ >> boolterm_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)]
                ;

            boolterm_ = binterm_[_val = _1]
                >> *(boolfacop_ >> binterm_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)]
                ;

            binterm_ = term_[_val = _1]
                >> *(bintermop_ >> term_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)]
                ;

            term_ = factor_[_val = _1]
                >> *(termop_ >> factor_)[_val = px::construct<Ast::Bioperator>(_val, _1, _2)]
                ;

            factor_ = value_
                | '(' >> expr_ >> ')'
                ;

            ifel_block_ = no_case["if"] >> condtion_core_ >> no_case["endif"];
            condtion_core_ = '(' >> expr_ >> ')'
                >> stmt_block_                                        // true block
                >> -(no_case["elseif"] >> condtion_core_ | elsepart_) // false block
                ;                                                //
            elsepart_ = no_case["else"] >> stmt_block_;

            bin_ops += "==", "!=", ">", ">=", "<", "<=";
            boolfacop_ = raw[bin_ops];

            bool_ops += "||", "&&";
            boolop_ = raw[bool_ops];

            binterm_ops += "|", "&", "*";
            bintermop_ = raw[binterm_ops];

            term_ops += "+", "-";
            termop_ = raw[term_ops];

            BOOST_SPIRIT_DEBUG_NODES(
                (task_)(task_item)
                (id_)(literal_)(variant_)(value_)(number_)
                (assign_)(ifel_block_)(condtion_core_)
                (expr_)(expr_)(term_)(binterm_)(boolfac_)(boolterm_)(factor_)
                (boolfacop_)(boolop_)(bintermop_)(termop_)
                (statement_)(stmt_block_)
            )
        }

    private:
        qi::rule<It, Ast::Task()> start;
        qi::symbols<char> bin_ops, bool_ops, binterm_ops, term_ops;

        using Skipper = qi::space_type;
        qi::rule<It, Ast::Task(), Skipper> task_, task_item;
        qi::rule<It, Ast::Assign(), Skipper> assign_;
        qi::rule<It, Ast::Statement(), Skipper> statement_;
        qi::rule<It, Ast::Expression(), Skipper> expr_, term_, binterm_, boolfac_, boolterm_, factor_, grp_expr_;
        qi::rule<It, Ast::IEBlock(), Skipper> ifel_block_, condtion_core_;
        qi::rule<It, Ast::Statement(), Skipper> elsepart_;
        qi::rule<It, Ast::Bioperator(), Skipper> bioperator_;
        qi::rule<It, Ast::Statements(), Skipper> stmt_block_;

        // lexemes:
        qi::rule<It, Ast::Identifier()> id_;
        qi::rule<It, Ast::Literal()>    literal_;
        qi::rule<It, Ast::Value()>      value_;
        qi::rule<It, Ast::Number()>     number_;
        qi::rule<It, Ast::Variant()>    variant_;
        qi::rule<It, Ast::BinOp()>      boolfacop_, boolop_, bintermop_, termop_;
    };
}

#include <pugixml.hpp>
namespace Generate {
    using namespace Ast;

    struct XML {
        using Node = pugi::xml_node;

        // callable for variant visiting:
        template <typename T> void operator()(Node parent, T const& node) const { apply(parent, node); }

    private:
        template <typename... Ts>
        void apply(Node parent, boost::variant<Ts...> const& v) const {
            using std::placeholders::_1;
            boost::apply_visitor(std::bind(*this, parent, _1), v);
        }

        void apply(Node parent, Number const& num) const {
            create_child(parent, "num").text().set(num);
        }
        void apply(Node parent, Identifier const& id) const {
            create_child(parent, "identifier").text().set(id.c_str());
        }
        void apply(Node parent, Variant const& v) const {
            create_child(parent, "variant").text().set(v.c_str());
        }
        void apply(Node parent, Literal const& literal) const {
            create_child(parent, "literal").text().set(literal.c_str());
        }

        template <typename T> void apply(Node parent, boost::optional<T> const& opt) const {
            if (opt)
                apply(parent, *opt);
        }

        void apply(Node parent, Assign const& a) const {
            auto asn_ = create_child(parent, "assign");
            apply(asn_, a.var);
            apply(asn_, a.value);
        }

        void apply(Node parent, Bioperator const& bo) const {
            auto use_parent = bo.group ? create_child(parent, "group") : parent;
            auto botag = create_child(use_parent, "bioperator");
            botag.text().set(bo.op.c_str());
            apply(botag, bo.var);
            apply(botag, bo.value);
        }

        void apply(Node parent, IEBlock const& c, char const* name = "if") const {
            auto if_stmt = create_child(parent, name);
            apply(if_stmt, c.condition);

            auto use_parent = name == "if" ? create_child(if_stmt, "then") : if_stmt;
            apply(use_parent, c.true_stmt);  // only show then for if block

            if (c.false_stmt) {
                // make sure elseif is always a child of if stmt.
                use_parent = find_parent_by_name(if_stmt, "if");
                if (auto nested = is_ifel_block(*c.false_stmt)) {
                    apply(use_parent, *nested, "elseif");
                }
                else {
                    apply(create_child(use_parent, "else"), *c.false_stmt);
                }
            }
        }

        void apply(Node parent, Statements const& b) const {
            if (b.size() == 1) // simplify single-statement block
                return apply(parent, b.front());

            for (auto& s : b)
                apply(parent, s);
        }

        void apply(Node parent, Task const& t) const {
            auto task = create_child(parent, "task");
            for (auto& item : t)
                apply(create_child(task, "item"), item);
        }

    private:
        Node create_child(Node parent, std::string const& name) const {
            auto child = parent.append_child();
            child.set_name(name.c_str());
            return child;
        }

        Node find_parent_by_name(Node node, std::string const& name) const {
            Node found = node;
            while ((std::string)found.name() != name) {
                found = found.parent();
            }
            return found;
        }

        static IEBlock const* is_ifel_block(IEBlock const& c) { return &c; }
        static IEBlock const* is_ifel_block(Statements const& b) {
            return b.size() == 1 ? is_ifel_block(b.front()) : nullptr;
        }
        template <typename... Ts> static IEBlock const* is_ifel_block(boost::variant<Ts...> const& v) {
            return boost::apply_visitor([](auto const& ast) { return is_ifel_block(ast); }, v);
        }
        template <typename T> static IEBlock const* is_ifel_block(T const&) { return nullptr; }
    };
} // namespace Generate

static const std::string cases[] = {
        R"(
          If ((Var3 == "A" && Var4 == 20) || (Var4 == "B" && Var5 > 0))
            Assign VarName (start + end + 1);
          EndIf;
    )",
};

int main() {
    using It = std::string::const_iterator;
    static const Parser::Task<It> p;
    static const Generate::XML to_xml;

    int i = 0;
    for (std::string const& input : cases) {
        try {
            Ast::Task t;

            std::cout << "*** Sample #" << ++i << std::endl;
            if (qi::parse(begin(input), end(input), p, t)) {
                pugi::xml_document doc;
                to_xml(doc.root(), t);
                doc.print(std::cout, "  ", pugi::format_default);
                std::cout << std::endl;
            }
            else {
                std::cout << " -> INVALID" << std::endl;
            }
        }
        catch (qi::expectation_failure<It> const& ef) {
            auto f = begin(input);
            auto p = ef.first - input.begin();
            //#pragma GCC diagnostic push
            //#pragma GCC diagnostic ignored "-Wsign-conversion"
            auto bol = input.find_last_of("\r\n", p) + 1;
            auto line = std::count(f, f + bol, '\n') + 1;
            auto eol = input.find_first_of("\r\n", p);

            std::cerr << " -> EXPECTED " << ef.what_ << " in line:" << line << "\n"
                << input.substr(bol, eol - bol) << "\n"
                << std::setw(static_cast<int>(p - bol)) << ""
                << "^--- here" << std::endl;
            //#pragma GCC diagnostic pop
        }
    }
}

Which will produce the following XML output

<task>
  <item>
    <if>
      <bioperator>||<bioperator>&amp;&amp;<bioperator>==<identifier>Var3</identifier>
            <literal>A</literal>
          </bioperator>
          <bioperator>==<identifier>Var4</identifier>
            <num>20</num>
          </bioperator>
        </bioperator>
        <bioperator>&amp;&amp;<bioperator>==<identifier>Var4</identifier>
            <literal>B</literal>
          </bioperator>
          <bioperator>&gt;<identifier>Var5</identifier>
            <num>0</num>
          </bioperator>
        </bioperator>
      </bioperator>
      <then>
        <assign>
          <variant>VarName</variant>
          <bioperator>+<bioperator>+<identifier>start</identifier>
              <identifier>end</identifier>
            </bioperator>
            <num>1</num>
          </bioperator>
        </assign>
      </then>
    </if>
  </item>
</task>

All good. Now I want to put a <group> tag around an expression when it's inside the parenthesis. So I'm modifying the factor_ rule from:

factor_ = value_
  | '(' >> expr_ >> ')'
  ;

to:

factor_ = value_
  | '(' >> expr_[_val = px::bind(&Ast::set_group_flag, _1)] >> ')'
  ;

to call this function

    Expression set_group_flag(Expression e) {
        // If this is the Bioperator, set the group flag to true
        if (e.type() == typeid(Bioperator)) {
            auto bo = boost::get<Bioperator>(e);
            bo.group = true;
            e = bo;
        }
        return e;       // return the expression
    }

The code compile ok but the value of Bioperator.var and Bioperator.value are lost. The updated factor_ rule produce the below result

<task>
  <item>
    <if>
      <bioperator>||<group>
          <bioperator>&amp;&amp;<bioperator>==<literal></literal>
              <literal></literal>
            </bioperator>
            <bioperator>==<literal></literal>
              <literal></literal>
            </bioperator>
          </bioperator>
        </group>
        <group>
          <bioperator>&amp;&amp;<bioperator>==<literal></literal>
              <literal></literal>
            </bioperator>
            <bioperator>&gt;<literal></literal>
              <literal></literal>
            </bioperator>
          </bioperator>
        </group>
      </bioperator>
      <then>
        <assign>
          <variant>VarName</variant>
          <group>
            <bioperator>+<bioperator>+<literal></literal>
                <literal></literal>
              </bioperator>
              <literal></literal>
            </bioperator>
          </group>
        </assign>
      </then>
    </if>
  </item>
</task>

Notice the bioperator values are all empty <literal></literal> tag now.

My question is Is this the right way to set the group flag? Did I do something wrong that make the expr value disappear like this?


Solution

  • Semantic actions suppress automatic attribute propagation. Either explicitly state it:

    factor_ = value_ [_val = _1]
      | '(' >> expr_ [_val = px::bind(&Ast::set_group_flag, _1)] >> ')'
      ;
    

    Or use operator %= to initialize the rule (see e.g. Boost.Spirit: Difference between operators "%=" and "=" or the documentation):

    Auto Rules It is typical to see rules like:

    r = p[_val = _1];
    

    [...]

    NOTE r %= p and r = p are equivalent if there are no semantic actions associated with p.


    That said, I urge you not to duct-tape your precedence problem. For one thing, a "group flag" is a boolean and not suited to express nesting. More importantly, your AST already reflects relations between the nodes. Instead, fix your parser to reflect the associatity/precedence if necessary and emit the grouping as required only. E.g. add(2, mul(3, 2)) is 2+32 (6), and mul(2, add(3, 1)) is 2(3+1) (note the added parentheses here).