Compilation performance issues with boost spirit x3

This is a much too delayed follow on question to something I have previously asked. Basically, I have a fairly parameterized parser for a series of languages written in boost spirit x3. @sehe greatly helped me get even that far: Boost Spirit x3 -- Parameterizing Parsers with other Parsers

Now that I am there, though, I have some strange (to me) exploding compilation times. I cannot post the original code, so have been working on a 'minimal' example and have finally found at least one of the culprits. Here's my code:

#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/home/x3/support/ast/variant.hpp>
#include <boost/fusion/adapted.hpp>
#include <iomanip>
#include <iostream>

namespace x3 = boost::spirit::x3;
using x3::char_;
using x3::int_;
using x3::lexeme;

struct boring1 { std::string name; int i; };
struct boring2 { std::string name; int i; };

struct sub1 { std::string name; int i; };
struct sub2 { std::string name; int i; };
struct sub3 { std::string name; int i; };
struct sub4 { std::string name; int i; };

struct sub : x3::variant<sub1,sub2,sub3,sub4> {
  sub()=default;
  sub(const sub&)=default;
  sub& operator=(const sub&)=default;
  using base_type::base_type;
  using base_type::operator=;
};

struct leaf1 { char tag; int i; };
struct leaf2 { char tag; int i, j, k; };
struct leaf3 { std::string name; int i, j; };

struct leaf23 : x3::variant<leaf2, leaf3> {
  leaf23(const leaf23&)=default;
  leaf23()=default;
  leaf23& operator=(const leaf23&)=default;
  using base_type::base_type;
  using base_type::operator=;
};

template <typename T> struct node1 { std::string name; T leaf; };
template <typename T> struct node2 { std::string name; T leaf; };
template <typename T> struct node3 { std::string name; T leaf; };
template <typename T> struct node4 { std::string name; std::vector<T> leafs; };
template <typename T> struct node5 { std::string name; std::vector<T> leafs; };
template <typename T> struct node6 { std::string name; std::vector<T> leafs; };

template <typename T> struct cont1 { int i; std::string name; std::vector<T> ts; };
template <typename T> struct cont2 { unsigned i; char t; std::string name; std::vector<T> ts; };
template <typename T, typename U> struct cont3 {
  std::vector<T> ts; std::string name; std::vector<U> us;
};
template <typename T, typename U> struct cont4 {
  std::string name; std::vector<U> us; std::vector<T> ts;
};

template <typename T> struct program { int name; std::vector<T> stmts; };

BOOST_FUSION_ADAPT_STRUCT(boring1, name, i)
BOOST_FUSION_ADAPT_STRUCT(boring2, name, i)

BOOST_FUSION_ADAPT_STRUCT(sub1, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub2, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub3, name, i)
BOOST_FUSION_ADAPT_STRUCT(sub4, name, i)

BOOST_FUSION_ADAPT_STRUCT(leaf1, tag, i)
BOOST_FUSION_ADAPT_STRUCT(leaf2, tag, i, j, k)
BOOST_FUSION_ADAPT_STRUCT(leaf3, name, i, j)

BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node1)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node2)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node3)(T), name, leaf)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node4)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node5)(T), name, leafs)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (node6)(T), name, leafs)

BOOST_FUSION_ADAPT_TPL_STRUCT((T), (cont1)(T), i, name, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T), (cont2)(T), i, t, name, ts)
BOOST_FUSION_ADAPT_TPL_STRUCT((T) (U), (cont3)(T)(U), ts, name, us)
BOOST_FUSION_ADAPT_TPL_STRUCT((T) (U), (cont4)(T)(U), name, us, ts)

BOOST_FUSION_ADAPT_TPL_STRUCT((T), (program)(T), name, stmts)

template <typename T> struct as_type {
  template <typename Expr>
  auto operator[](Expr&& expr) const {
    return x3::rule<struct _, T>{"as"} = x3::as_parser(std::forward<Expr>(expr));
  }
};
template <typename T> static const as_type<T> as = {};

namespace Generic {
  x3::rule<class boring1, boring1> const boring1_p = "boring 1 parser";
  auto const boring1_p_def = lexeme[ +x3::alnum ] >> int_;

  x3::rule<class boring2, boring2> const boring2_p = "boring 2 parser";
  auto const boring2_p_def = lexeme[ +x3::alnum ] >> int_;

  BOOST_SPIRIT_DEFINE( boring1_p, boring2_p )

  x3::rule<class sub1, sub1> const sub1_p = "sub 1 parser";
  auto const sub1_p_def = lexeme[ +x3::alnum ] >> int_;
  x3::rule<class sub2, sub2> const sub2_p = "sub 2 parser";
  auto const sub2_p_def = lexeme[ +x3::alnum ] >> int_;
  x3::rule<class sub3, sub3> const sub3_p = "sub 3 parser";
  auto const sub3_p_def = lexeme[ +x3::alnum ] >> int_;
  x3::rule<class sub4, sub4> const sub4_p = "sub 4 parser";
  auto const sub4_p_def = lexeme[ +x3::alnum ] >> int_;
  x3::rule<class sub, sub> const sub_p = "combined sub parser";
  auto const sub_p_def = sub1_p | sub2_p | sub3_p | sub4_p;

  BOOST_SPIRIT_DEFINE( sub1_p, sub2_p, sub3_p, sub4_p, sub_p )

  template <typename T> auto leaf_p = x3::eps(false);

  template <typename T> auto leaves() {
    return as<std::vector<T>>[ leaf_p<T> % ',' ];
  }

  template <> auto leaf_p<leaf1> = as<leaf1>[lexeme[ (char_('t') | char_('u')) >> int_ ]];
  template <> auto leaf_p<leaf2> = as<leaf2>[lexeme[ char_('t') >> int_ >> "_" >> int_ >> "_" >> int_]];
  template <> auto leaf_p<leaf3> = as<leaf3>[lexeme[ (+x3::alnum) >> "_" >> int_ >> "_" >> int_]];

  template <> auto leaf_p<leaf23> = as<leaf23>[leaf_p<leaf2> | leaf_p<leaf3>];
    
  template <typename T> auto parseNode1() {
    return as<node1<T>>[ (x3::string("xx") | x3::string("yy") | x3::string("zz")) >> leaf_p<T>];
  }

  template <typename T> auto parseNode2() {
    return as<node2<T>>[ x3::string("node1") >> leaf_p<T>];
  }

  template <typename T> auto parseNode3() {
    return as<node3<T>>[ x3::string("node3") >> leaf_p<T>];
  }

  template <typename T> auto parseNode4() {
    return as<node4<T>>[ x3::string("node4") >> leaves<T>()];
  }

  template <typename T> auto parseNode5() {
    return as<node5<T>>[ x3::string("node5") >> leaves<T>()];
  }

  template <typename T> auto parseNode6() {
    return as<node6<T>>[ x3::string("node6") >> leaves<T>()];
  }

  struct With1 {};
  struct With2 {};

  x3::rule<class header1,std::string> const header1 = "header 1";
  auto const header1_def = lexeme[+x3::alnum][([](auto& ctx) {
    std::string v = x3::_attr(ctx);
    x3::get<With1>(ctx) = v;
    x3::_val(ctx) = v;
  })];

  x3::rule<class footer1,std::string> const footer1 = "footer 1";
  auto const footer1_def = lexeme[+x3::alnum][([](auto& ctx) {
    std::string v = x3::get<With1>(ctx);
    x3::_pass(ctx) = v == x3::_attr(ctx);
  })];

  x3::rule<class header2,std::string> const header2 = "header 2";
  auto const header2_def = lexeme[+x3::alnum][([](auto& ctx) {
    std::string v = x3::_attr(ctx);
    x3::get<With2>(ctx) = v;
    x3::_val(ctx) = v;
  })];

  x3::rule<class footer2,std::string> const footer2 = "footer 2";
  auto const footer2_def = lexeme[+x3::alnum][([](auto& ctx) {
    std::string v = x3::get<With2>(ctx);
    x3::_pass(ctx) = v == x3::_attr(ctx);
  })];

  BOOST_SPIRIT_DEFINE(header1, footer1, header2, footer2);

  template <typename T>
  auto const parseCont1(x3::rule<class c1, T> const& parser) {
    return as<cont1<T>>[ int_ >> x3::string("cont1") >> "{" >> +parser >> "}" ];
  }

  template <typename T>
  auto const parseCont2(x3::rule<class c1, T> const& parser) {
    return as<cont2<T>>[ x3::bin >> char_ >> header1 >> "{" >> +parser >> "}" >> x3::omit[footer1] ];
  }

  template <typename T, typename U>
  auto const parseCont3(x3::rule<class c1, U> const& parser) {
    return as<cont3<T,U>>[ leaves<T>() >> x3::string("cont3") >> "{" >> +parser >> "}" ];
  }

  auto vectorize = [](auto &ctx){ _val(ctx) = { _attr(ctx) }; };

  template <typename stmt>
  auto cont4_simple_body(x3::rule<class c1, stmt> const& parser) {
    return as<std::vector<stmt>>[parser[ vectorize ]];
  }

  /*  What follows are three different possible implementations of cont4_body, which
   *  is used in the parseCont4 rule below.
   *   - the first parses wrong, but is fast to compile (I don't understand why this is
   *     the case, but in tests from my real language, it produces empty vectors,
   *     always
   *   - the second is correct, but slower (maybe 2x)
   *   - the third is also correct, but slower than the above (another 2-3x)
   */

  // wrong but fast compile:  ~64s when just Language1 is enabled below in main
  // template <typename stmt>
  // auto cont4_body(x3::rule<class c1, stmt> const& parser) {
  //   return 
  //     as<std::vector<stmt>>
  //     [ "{" >> +parser >> "}"
  //     | parser[vectorize]
  //     ];
  // }
  
  // correct, slower compile: ~189s when just Language1 is enabled below in main
  // template <typename stmt>
  // auto cont4_body(x3::rule<class c1, stmt> const& parser) {
  //   return 
  //     as<std::vector<stmt>>
  //     [ "{" >> +parser >> "}"
  //     | cont4_simple_body(parser)
  //     ];
  // }


  // correct, even slower compile: ~424s when just Language1 is enabled below in main
  template <typename stmt>
  auto cont4_body(x3::rule<class c1, stmt> const& parser) {
    return 
        as<std::vector<stmt>>["{" >> + parser >> "}"]
      | as<std::vector<stmt>>[cont4_simple_body(parser)]
      ;
  }

  template <typename T, typename U>
  auto const parseCont4(x3::rule<class c1, U> const& parser) {
    return as<cont4<T,U>>[ header2  >> cont4_body(parser) >> leaves<T>() >> x3::omit[footer2]];
  }
}

namespace Language1 {

  struct lang1_stmt;

  struct lang1_stmt : x3::variant<
    boring1,
    boring2,
    sub,
    node1<leaf1>,
    node2<leaf1>,
    node3<leaf1>,
    node4<leaf1>,
    node5<leaf1>,
    node6<leaf1>,
    x3::forward_ast<cont1<lang1_stmt>>,
    x3::forward_ast<cont2<lang1_stmt>>,
    x3::forward_ast<cont3<leaf1, lang1_stmt>>,
    x3::forward_ast<cont4<leaf1, lang1_stmt>>
    > {
    lang1_stmt(const lang1_stmt&)=default;
    lang1_stmt()=default;
    lang1_stmt& operator=(const lang1_stmt&)=default;
    using base_type::base_type;
    using base_type::operator=;
  };

  x3::rule<class Generic::c1, lang1_stmt> lang1_stmt_p = "lang1 stmt parser";
  auto const lang1_stmt_p_def =
    Generic::boring1_p
    | Generic::boring2_p
    | Generic::sub_p
    | Generic::parseNode1<leaf1>()
    | Generic::parseNode2<leaf1>()
    | Generic::parseNode3<leaf1>()
    | Generic::parseNode4<leaf1>()
    | Generic::parseNode5<leaf1>()
    | Generic::parseNode6<leaf1>()
    | Generic::parseCont1<lang1_stmt>( lang1_stmt_p )
    | Generic::parseCont2<lang1_stmt>( lang1_stmt_p )
    | Generic::parseCont3<leaf1, lang1_stmt>( lang1_stmt_p )
    | Generic::parseCont4<leaf1, lang1_stmt>( lang1_stmt_p )
    ;

  BOOST_SPIRIT_DEFINE( lang1_stmt_p );

  std::string with1;
  std::string with2;

  auto const stmts
    = x3::rule<class grammar_stmts1, std::vector<lang1_stmt>> { "Lang1 statements" }
    = x3::with<Generic::With1>(with1)
       [x3::with<Generic::With2>(with2)[ +lang1_stmt_p ]];

  auto const grammar
    = x3::rule<class grammar1, program<lang1_stmt>> { "Lang1 grammar" }
    = x3::skip( x3::space )[x3::int_ >> stmts];
}

namespace Language2 {
  struct lang2_stmt;

  struct lang2_stmt : x3::variant<
    boring1,
    boring2,
    sub,
    node1<leaf23>,
    node2<leaf23>,
    node3<leaf23>,
    node4<leaf23>,
    node5<leaf23>,
    node6<leaf23>,
    x3::forward_ast<cont1<lang2_stmt>>,
    x3::forward_ast<cont2<lang2_stmt>>,
    x3::forward_ast<cont3<leaf23, lang2_stmt>>,
    x3::forward_ast<cont4<leaf23, lang2_stmt>>
    > {
    lang2_stmt(const lang2_stmt&)=default;
    lang2_stmt()=default;
    lang2_stmt& operator=(const lang2_stmt&)=default;
    using base_type::base_type;
    using base_type::operator=;
  };

  x3::rule<class Generic::c1, lang2_stmt> lang2_stmt_p = "lang2 stmt parser";
  auto const lang2_stmt_p_def =
    Generic::boring1_p
    | Generic::boring2_p
    | Generic::sub_p
    | Generic::parseNode1<leaf23>()
    | Generic::parseNode2<leaf23>()
    | Generic::parseNode3<leaf23>()
    | Generic::parseNode4<leaf23>()
    | Generic::parseNode5<leaf23>()
    | Generic::parseNode6<leaf23>()
    | Generic::parseCont1<lang2_stmt>( lang2_stmt_p )
    | Generic::parseCont2<lang2_stmt>( lang2_stmt_p )
    | Generic::parseCont3<leaf23, lang2_stmt>( lang2_stmt_p )
    | Generic::parseCont4<leaf23, lang2_stmt>( lang2_stmt_p )
    ;

  BOOST_SPIRIT_DEFINE( lang2_stmt_p );

  std::string with1;
  std::string with2;

  auto const stmts
    = x3::rule<class grammar_stmts2, std::vector<lang2_stmt>> { "Lang2 statements" }
    = x3::with<Generic::With1>(with1)
       [x3::with<Generic::With2>(with2)[ +lang2_stmt_p ]];

  auto const grammar
    = x3::rule<class grammar2, program<lang2_stmt>> { "Lang2 grammar" }
    = x3::skip( x3::space )[x3::int_ >> stmts];
}

void test(auto const& grammar, std::string text, auto ast) {
  auto f = text.begin(), l = text.end();
  std::cout << "\nParsing: " << std::quoted(text, '\'') << "\n";
  if (boost::spirit::x3::parse(f, l, grammar, ast)) {
    std::cout << "Success!\n";
  } else {
    std::cout << " -- Failed " << std::quoted(text, '\'') << "\n";
  }
}

int main() {
  test( Language1::grammar,
        "",
        program<Language1::lang1_stmt>{});

  // test(
  //      Language2::grammar,
  //      "",
  //      program<Language2::lang2_stmt>{});
}

You can see in this example that I have defined two languages. They are only different in the leaves of the ast, where targets of the language in Language2 are more complex than in Language1. I have replicated all of the features I am using in my real parser, but have created stupid fake parsers for different statement types. Of note: the performance problems I am seeing are greatly exaggerated in Language2. I have effectively only linked to Language1 in my main function to save time in testing.

The problem seems to be in my encoding of conditional statements, in particular cond4. Here, a conditional can optionally have braces. If there are none, then only a single statement is enclosed. Notice the 3 different implementations of the parser for the conditional body I have included. The first actually incorrectly parses (I mean, the parse succeeds, but the vectors are always empty!). The next two parse correctly but greatly increase compile times.

Any ideas what is going on here?

Solution

The exploding compile times usually hit me when I'm using locally-defined "anonymous" rules; by which I mean rules that are not tied to their parser implementation using the BOOST_SPRIIT_* macros via their type tag parameter, but rather implicitly.

The catch is that the "implicit" rule definition is carried into the context type, and that makes for bad worst-case template instantiation behaviour especially in the face of

recursive rules
locally changing skippers (lexeme[]. skip[], noskip[])
any other context decorating directive (x3::with<Tag>(expr) [p] or anything that builds on it)

So, in principle a good way to ward off the explosion is to diligently separate the rule instantion from the declaration site (using the macros), a process usually known as "compilation firewalling" - for the reason that it contains implementation details inside separate translation units.

There's mild tension between this approach and the idea of generating parsers entirely functionally, because a prototypical parser factory looks a bit like

 auto makeParser(auto arg) {
       return 
           x3::rule<struct tag_type, Attribute> {"aarg"}
           = something >> arg >> x3::eps;
 }

indeed, returning precisely such an "unnamed" rule that is tightly coupled to its definition.

I don't immediately see a big reason why you could not have your cake and eat it too by "allowing" a good deal of parsers to be composed in this way - accepting the higher compilation overhead - and decoupling at set points (high-level rules that are entry points for recursion, e.g.). Just keep in mind that these points will hardcode the context - including any skippers and/or (custom) directive state - so it may alter the behaviour. I'd wager if that happens, the result will just be more clear and easier to maintain anyways.