Search code examples
c++parsingboostebnfboost-spirit-x3

Boost Spirit X3: Collapsing one-element lists


Say I have a (simplified) recursive grammar like this:

OrExpr    := AndExpr % "or"
AndExpr   := Term % "and"
Term      := ParenExpr | String
ParenExpr := '(' >> OrExpr >> ')'
String    := lexeme['"' >> *(char_ - '"') >> '"']

So this works, but the problem is that it will wrap everything in multiple layers of expression. For example, the string "hello" and ("world" or "planet" or "globe") would parse as OrExpr(AndExpr("hello", OrExpr(AndExpr("world"), AndExpr("planet"), AndExpr("globe")))) (playing fast and loose with the syntax, but hopefully you understand). What I'd like is for the one-element nodes to be collapsed into their parent, so it would end up as AndExpr("hello", OrExpr("world", "parent", "globe"))

This can be solved with actions and using a state machine that only constructs the outer object if there's more than one child inside it. But I'm wondering if there's a way to fix this problem without using parser actions?


EDIT: Almost minimal example

Coliru

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <iostream>

namespace x3 = boost::spirit::x3;

namespace burningmime::setmatch::ast
{
    // an expression node (either an AND or an OR)
    struct Expr;

    // child of an expression -- either another expression, or a terminal
    struct Node : x3::variant<std::string, x3::forward_ast<Expr>>
    {
        using base_type::base_type;
        using base_type::operator=;
    };

    // tags for expression type
    enum OPER
    {
        OPER_AND = 1,
        OPER_OR = 2
    };

    // see above
    struct Expr
    {
        OPER op;
        std::vector<Node> children;
    };

    // for debugging purposes; this will print all the expressions
    struct AstPrinter
    {
        void operator()(const Expr& node) const
        {
            std::cout << (node.op == OPER_AND ? "And(" : "Or(");
            bool first = true;
            for(const auto& child : node.children)
            {
                if(!first) std::cout << ", ";
                first = false;
                boost::apply_visitor(*this, child);
            }
            std::cout << ")";
        }

        void operator()(const std::string& node) const
        {
            std::cout << node;
        }
    };
}

 // these need to be at top-level scope
 // basically this adds compile-time type information, so the parser knows where to put various attributes
BOOST_FUSION_ADAPT_STRUCT(burningmime::setmatch::ast::Expr, op, children)

#define DECLARE_RULE(NAME, TYPE) static const x3::rule<class NAME, TYPE> NAME = #NAME;
#define KEYWORD(X) static const auto kw_##X = x3::no_case[#X];
#define DEFINE_RULE(NAME, GRAMMAR) \
    static const auto NAME##_def = GRAMMAR; \
    BOOST_SPIRIT_DEFINE(NAME)

namespace burningmime::setmatch::parser
{
    // we need to pre-declare the rules so they can be used recursively
    DECLARE_RULE(Phrase, std::string)
    DECLARE_RULE(Term, ast::Node)
    DECLARE_RULE(AndExpr, ast::Expr)
    DECLARE_RULE(OrExpr, ast::Expr)
    DECLARE_RULE(ParenExpr, ast::Expr)

    // keywords
    KEYWORD(and)
    KEYWORD(or)

    static const auto lparen = x3::lit('(');
    static const auto rparen = x3::lit(')');

    // helper parsers
    static const auto keywords = kw_and | kw_or | lparen | rparen;
    static const auto word = x3::lexeme[+(x3::char_ - x3::ascii::space - lparen - rparen)];
    static const auto bareWord = word - keywords;
    static const auto quotedString = x3::lexeme[x3::char_('"') >> *(x3::char_ - '"') >> x3::char_('"')];

    DEFINE_RULE(Phrase,     quotedString | bareWord)
    DEFINE_RULE(Term,       ParenExpr | Phrase)
    DEFINE_RULE(ParenExpr,  lparen >> OrExpr >> rparen)
    DEFINE_RULE(AndExpr,    x3::attr(ast::OPER_AND) >> (Term % kw_and))
    DEFINE_RULE(OrExpr,     x3::attr(ast::OPER_OR) >> (AndExpr % kw_or))
}

namespace burningmime::setmatch
{
    void parseRuleFluent(const char* buf)
    {
        ast::Expr root;
        auto start = buf, end = start + strlen(buf);
        bool success = x3::phrase_parse(start, end, parser::OrExpr, x3::ascii::space, root);
        if(!success || start != end)
            throw std::runtime_error(std::string("Could not parse rule: ") + buf);
        printf("Result of parsing: %s\n=========================\n", start);
        ast::Node root2(root);
        boost::apply_visitor(ast::AstPrinter(), root2);
    }
}

int main()
{
    burningmime::setmatch::parseRuleFluent(R"#("hello" and ("world" or "planet" or "globe"))#");
}

Solution

  • #include <boost/spirit/home/x3.hpp>
    #include <boost/spirit/home/x3/support/ast/variant.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <iostream>
    
    namespace x3 = boost::spirit::x3;
    
    namespace burningmime::setmatch::ast
    {
        // an expression node (either an AND or an OR)
        struct Expr;
    
        // child of an expression -- either another expression, or a terminal
        struct Node : x3::variant<std::string, x3::forward_ast<Expr>>
        {
            using base_type::base_type;
            using base_type::operator=;
        };
    
        // tags for expression type
        enum OPER
        {
            OPER_AND = 1,
            OPER_OR = 2
        };
    
        // see above
        struct Expr
        {
            OPER op;
            std::vector<Node> children;
        };
    
        // for debugging purposes; this will print all the expressions
        struct AstPrinter
        {
            void operator()(const Expr& node) const
            {
                std::cout << (node.op == OPER_AND ? "And(" : "Or(");
                bool first = true;
                for(const auto& child : node.children)
                {
                    if(!first) std::cout << ", ";
                    first = false;
                    boost::apply_visitor(*this, child);
                }
                std::cout << ")";
            }
    
            void operator()(const std::string& node) const
            {
                std::cout << node;
            }
        };
    }
    
     // these need to be at top-level scope
     // basically this adds compile-time type information, so the parser knows where to put various attributes
    BOOST_FUSION_ADAPT_STRUCT(burningmime::setmatch::ast::Expr, op, children)
    
    #define DECLARE_RULE(NAME, TYPE) static const x3::rule<class NAME##_r, TYPE> NAME = #NAME;
    #define KEYWORD(X) static const auto kw_##X = x3::no_case[#X];
    #define DEFINE_RULE(NAME, GRAMMAR) \
        static const auto NAME##_def = GRAMMAR; \
        BOOST_SPIRIT_DEFINE(NAME)
    
    namespace burningmime::setmatch::parser
    {
        // we need to pre-declare the rules so they can be used recursively
        DECLARE_RULE(Phrase,    std::string)
        DECLARE_RULE(Term,      ast::Node)
        DECLARE_RULE(AndExpr,   ast::Node)
        DECLARE_RULE(OrExpr,    ast::Node)
        DECLARE_RULE(ParenExpr, ast::Node)
    
        // keywords
        KEYWORD(and)
        KEYWORD(or)
    
        static const auto lparen = x3::lit('(');
        static const auto rparen = x3::lit(')');
    
        // helper parsers
        static const auto keywords = kw_and | kw_or | lparen | rparen;
        static const auto word = x3::lexeme[+(x3::char_ - x3::ascii::space - lparen - rparen)];
        static const auto bareWord = word - keywords;
        static const auto quotedString = x3::lexeme[x3::char_('"') >> *(x3::char_ - '"') >> x3::char_('"')];
    
        DEFINE_RULE(Phrase,     quotedString | bareWord)
        DEFINE_RULE(Term,       ParenExpr | Phrase)
        DEFINE_RULE(ParenExpr,  lparen >> OrExpr >> rparen)
        template <ast::OPER Op>
        struct make_node
        {
            template <typename Context >
            void operator()(Context const& ctx) const
            {
                if (_attr(ctx).size() == 1)
                    _val(ctx) = std::move(_attr(ctx)[0]);
                else
                    _val(ctx) = ast::Expr{ Op, std::move(_attr(ctx)) };
            }
        };
        DEFINE_RULE(AndExpr,    (Term % kw_and)[make_node<ast::OPER_AND>{}])
        DEFINE_RULE(OrExpr,     (AndExpr % kw_or)[make_node<ast::OPER_OR>{}])
    }
    
    namespace burningmime::setmatch
    {
        void parseRuleFluent(const char* buf)
        {
            ast::Node root;
            auto start = buf, end = start + strlen(buf);
            bool success = x3::phrase_parse(start, end, parser::OrExpr, x3::ascii::space, root);
            if (!success || start != end)
                throw std::runtime_error(std::string("Could not parse rule: ") + buf);
            printf("Result of parsing: %s\n=========================\n", start);
            boost::apply_visitor(ast::AstPrinter(), root);
        }
    }
    
    int main()
    {
        burningmime::setmatch::parseRuleFluent(R"#("hello" and ("world" or "planet" or "globe"))#");
    }
    

    https://wandbox.org/permlink/kMSHOHG0pgwGr0zv

    Output:

    Result of parsing: 
    =========================
    And("hello", Or("world", "planet", "globe"))