Search code examples
boostboost-spirit

Parsing complex logs with boost spirit


I am having problems while parsing a complex log with boost::spirit. I am not able of getting the data as I want, mostly because blanks skippers mess everything up.

I have the next text file called log.txt:

1:[2017-Feb-18 01:57:55.341100] <INFO, SIMULATING> => CPU | Name: CAR (ID: 0) - ID: 1
2:[2017-Feb-18 01:57:55.344100] <INFO, SENDING_DATA> => IO | Io_out - ABS: 1
3:[2017-Feb-18 01:57:55.344100] <INFO, SIMULATING> => CPU | Status: Ok
4:[2017-Feb-18 01:57:55.346100] <INFO, SIMULATING> => MSS | Random Number: 0x4D080020
5:[2017-Feb-18 01:57:55.346100] <INFO, SIMULATING> => CPU | Entering mode: AUTO
6:[2017-Feb-18 01:57:59.583342] <INFO, SENDING_DATA> => IO | Io_in - BRK: 1
7:[2017-Feb-18 01:58:24.604773] <INFO, RECEIVING_DATA> => DET | Point: 004811
8:[2017-Feb-18 01:58:24.844787] <INFO, SENDING_DATA> => PC | Send msg 1: 0101000000000000
9:[2017-Feb-18 01:58:26.204865] <INFO, RECEIVING_DATA> => PC2 | Receive msg 8: 0801000000000000
10:[2017-Feb-18 01:58:28.706008] <INFO, RECEIVING_DATA> => PC1 | Receive msg 2: 0201000000000000
11:[2017-Feb-18 01:58:29.345045] <INFO, SENDING_DATA> => PC | Send msg 3: 0301000000000000
12:[2017-Feb-18 01:58:29.706065] <INFO, RECEIVING_DATA> => PC1 | Receive msg 4: 04010000F8B8C1A7
13:[2017-Feb-18 01:58:29.846073] <INFO, SENDING_DATA> => PC | Send msg 5: 05010000F8B8C1A7
14:[2017-Feb-18 01:58:32.206208] <INFO, RECEIVING_DATA> => PC1 | Receive msg 6: 06010001F8B8C1A8
15:[2017-Feb-18 01:58:32.366217] <INFO, SENDING_DATA> => PC | Send msg 7: 07010001F8B8C1A8
17:[2017-Feb-18 01:58:32.406220] <INFO, RECEIVING_DATA> => PC2 | Receive msg 6: 06010001F8B8C1A8
18:[2017-Feb-18 01:58:32.875246] <INFO, SENDING_DATA> => PC | Send msg 7: 07010001F8B8C1A9
19:[2017-Feb-18 01:58:32.906248] <INFO, RECEIVING_DATA> => PC1 | Receive msg 6: 06010001F8B8C1A9
20:[2017-Feb-18 01:58:33.386276] <INFO, SENDING_DATA> => PC | Send msg 7: 07010001F8B8C1AA

And i am using the next code for parsing it into a boost fusion adapted struct:

#include <fstream>

#include <boost/config/warning_disable.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>


struct Message
{
    std::string line;
    std::string date;
    std::string time;
    char id;
    std::string hex;
};

BOOST_FUSION_ADAPT_STRUCT(
    Message,
    (std::string, Message::line)
    (std::string, Message::date)
    (std::string, Message::time)
    (char, Message::id)
    (std::string, Message::hex)
)

std::vector<Message> messages;

namespace qi = boost::spirit::qi;
namespace repo = boost::spirit::repository;
namespace ascii = boost::spirit::ascii;

void main()
{
    std::ifstream in("C:/log.txt", std::ios_base::in);
    in >> std::noskipws;//No white space skipping

    if (!in)
    {
        std::cerr << "Error: Could not open input file: " << std::endl;
        return;
    }//if

    boost::spirit::istream_iterator first(in);
    boost::spirit::istream_iterator last;

    bool result = qi::phrase_parse(first, last, 
        *repo::seek[qi::eol
            >> +ascii::char_("0-9") 
            >> ":["
            >> +ascii::char_("0-9a-fA-F-")
            >> +ascii::char_("0-9.:")
            >> "] <INFO, RECEIVING_DATA> => PC"
            >> ascii::char_('1', '2')
            >> "| Receive msg 6:"
            >> +ascii::char_("0-9a-fA-F")
            >> qi::eol],
        ascii::blank,
        messages);

    return;
}

When executing code, the data is bad formatted within the struct. May someone tries to help me with this issue?


Solution

  • After I added a lexeme i was able to parse the following one line log file:

    14:[2017-Feb-18 01:58:32.206208] <INFO, RECEIVING_DATA> => PC1 | Receive msg 6: 06010001F8B8C1A8
    

    This is my code. It still failed to read the original log.txt. But I'm unsure what you are trying to achieve.

    #include <fstream>
    
    #include <boost/config/warning_disable.hpp>
    #include <boost/fusion/include/adapt_struct.hpp>
    #include <boost/spirit/include/qi.hpp>
    #include <boost/spirit/repository/include/qi_seek.hpp>
    #include <boost/spirit/include/phoenix.hpp>
    #include <boost/spirit/include/phoenix_operator.hpp>
    
    struct Message
    {
        std::string line;
        std::string date;
        std::string time;
        char id;
        std::string hex;
    };
    
    BOOST_FUSION_ADAPT_STRUCT(
        Message,
        (std::string, Message::line)
        (std::string, Message::date)
        (std::string, Message::time)
        (char, Message::id)
        (std::string, Message::hex)
    )
    
    std::vector<Message> messages;
    
    namespace qi = boost::spirit::qi;
    namespace repo = boost::spirit::repository;
    namespace ascii = boost::spirit::ascii;
    namespace ph=boost::phoenix;
    
    void main()
    {
        std::ifstream in("C:/temp/log2.txt", std::ios_base::in);
        in >> std::noskipws;//No white space skipping
    
        if (!in)
        {
            std::cerr << "Error: Could not open input file: " << std::endl;
            return;
        }//if
        Message msg;
        boost::spirit::istream_iterator first(in);
        boost::spirit::istream_iterator last;
        bool result = qi::phrase_parse(first, last,
    //      *repo::seek[
            (+ascii::char_("0-9")
            >> qi::lexeme[":[" >> +ascii::char_("0-9a-fA-F-")]
            >> +ascii::char_("0-9.:")
            >> "] <INFO, RECEIVING_DATA> => PC"
            >> ascii::char_('1', '2')
            >> "| Receive msg 6:"
            >> +ascii::char_("0-9a-fA-F") )
            % qi::eol,
    /*      >> qi::eol],*/
            ascii::blank,
            messages);
            for (auto msg : messages) {
                std::cout << msg.line << ", " << msg.date << ", " << msg.time << ", " << msg.id << ", " << msg.hex << std::endl;
            }
        return;
    }