Search code examples
node.jsemailgmailgmail-imapemail-parsing

Incorrect gmail parsing with node-imap and mailparser


I'm writing a Node.js script that retrieves unread emails from my gmail inbox using node-imap, parses them with mailparser, and then does some work with the parsed emails. I'm running into a problem where the raw email being received doesn't seem to be parsed correctly by mailparser. I'm not sure if I've done something wrong in calling node-imap or mailparser, or if the email itself is in a bad format for some reason. I've included the code I'm running as well as the output produced.

var Imap = require("imap"),
    MailParser = require("mailparser").MailParser,
    Promise = require("bluebird"),
    request = require("request-promise").defaults({jar: true}),
    log = require("winston"),
    _ = require("underscore"),
    config = require("config").jobs;

var logConfig = _.clone(config.logConfig.email);
if (process.env.LOG_DIR) {
    logConfig.filename = process.env.LOG_DIR + "/" + logConfig.filename;
}
log.add(log.transports.File, logConfig || config.logConfig);

Promise.longStackTraces();

var imap = new Imap(config.emailConfig);
Promise.promisifyAll(imap);

imap.once("ready", execute);
imap.once("error", function (err) {
    log.error("Connection error: " + err.stack);
});
imap.connect();

function execute() {
    imap.openBoxAsync("INBOX", false)
        .then(function () {
            return imap.searchAsync(["UNSEEN"]);
        })
        .then(function (results) {
            var f = imap.fetch(results, {bodies: ["HEADER.FIELDS (FROM SUBJECT)", "TEXT"]});
            f.on("message", processMessage);
            f.once("error", function (err) {
                return Promise.reject(err);
            });
            f.once("end", function () {
                log.info("Done fetching all unseen messages.");
                imap.end();
            });
        })
        .catch(function (err) {
            log.error("Error fetching messages: " + err.stack);
            imap.end();
        });
}

function processMessage(msg, seqno) {
    log.info("Processing msg #" + seqno);

    var parser = new MailParser();
    parser.on("headers", function(headers) {
       log.info("Header: " + JSON.stringify(headers));
    });
    parser.on("end", function(msg) {
        log.info("From: " + msg.from);
        log.info("Subject: " + msg.subject);
        log.info("Text: " + msg.text);
        log.info("Html: " + msg.html);
    });

    msg.on("body", function (stream) {
        stream.on("data", function (chunk) {
            parser.write(chunk.toString("utf8"));
        });
    });
    msg.once("end", function () {
        log.info("Finished msg #" + seqno);
        parser.end();
    });
}

Output:

info: Processing msg #1
info: Finished msg #1
info: Done fetching all unseen messages.
info: Header: {"--001a11339690da942a051d866a04":"","content-type":"text/plain; charset=UTF-8"}
info: From: undefined
info: Subject: undefined
info: Text: Test app mail body!

- Jared

--001a11339690da942a051d866a04
Content-Type: text/html; charset=UTF-8

<div dir="ltr"><div>Test app mail body!<br><br></div>- Jared<br></div>

--001a11339690da942a051d866a04--
Subject: Here is a test appmail email
From: Jared Wolinsky <[email protected]>


info: Html: undefined

Solution

  • You're fetching the raw body of the email. mailparser is expecting the full email, headers and body both. To fetch that instead, just specify an empty string '' instead of 'TEXT'.