Search code examples
jakarta-mailmime-message

Extract MIME encoded content from email using JavaMail


I have an email message that has contentType: TEXT/PLAIN; charset="=?utf-8?B?ICJVVEYtOCI=?="

What do I need to extractContent to eliminate the java.io.UnsupportedEncodingException: =?utf-8?B?ICJVVEYtOCI=?=

I have tried the following:

import java.io.IOException;
import javax.mail.BodyPart;
import javax.mail.Message;
import javax.mail.MessagingException;
import javax.mail.internet.MimeMultipart;

public class ExtractContentText
{
    private static String extractContent(MimeMultipart mimeMultipartContent) throws MessagingException
    {
        String msgContentText = null;

        Exception cause = null;

        try
        {
            int numParts = mimeMultipartContent.getCount();

            for (int partNum = 0; msgContentText == null
                    && partNum < numParts; partNum++)
            {
                BodyPart part = mimeMultipartContent.getBodyPart(partNum);
                System.out.println("BodyContent.PartNum: "
                        + partNum + " has contentType:  " + part.getContentType());

                // TODO: Eliminate java.io.UnsupportedEncodingException: =?utf-8?B?ICJVVEYtOCI=?=
                Object partContent = part.getContent();
                if (partContent instanceof MimeMultipart)
                {
                    try
                    {
                        System.out.println("Processing inner MimeMultipart");
                        msgContentText = extractContent((MimeMultipart) partContent);
                        System.out.println("Using content found in inner MimeMultipart");
                    }
                    catch (MessagingException e)
                    {
                        System.out.println("Ignoring failure while trying to extract message content for inner MimeMultipart: "
                                + e.getMessage());
                    }
                }
                else
                {
                    try
                    {
                        msgContentText = (String) part.getContent();
                        System.out.println("PartNum: "
                                + partNum + " content [" + msgContentText + "]");
                    }
                    catch (ClassCastException e)
                    {
                        // If it is not a String, ignore the exception and continue looking
                        System.out.println("Ignoring Non-String message content: "
                                + e.getMessage());
                    }
                }
            }
        }
        catch (MessagingException | IOException e)
        {
            cause = e;
            System.out.println("Failure while trying to extract message content: "
                    + e.getMessage());
        }
        finally
        {
            // Fail if content could not be extracted
            if (msgContentText == null)
            {
                MessagingException ex;
                if (cause == null)
                {
                    ex = new MessagingException("Message content could not be extracted");
                }
                else
                {
                    ex = new MessagingException("Message content could not be extracted - "
                            + cause.getMessage(), cause);
                }
                System.out.println(ex);
                throw ex;
            }
        }

        return msgContentText;
    }

    public static void main(String[] args) throws MessagingException, IOException
    {
        Message m = null;
        System.out.println(extractContent((MimeMultipart) m.getContent()));
    }
}

Solution

  • See the JavaMail FAQ: Why do I get the UnsupportedEncodingException when I invoke getContent() on a bodypart that contains text data? You can use the javax.mail.Part.getInputStream() to gain access to the raw bytes and perform your own decoding.

    To repair an invalid content type header your can use the javax.mail.internet.ContentType to extract the parameter and use the javax.mail.MimeUtility.decodeText to decode unstructured headers.

    public static String cleanContentType(MimePart mp, String contentType) {
        String ct = "TEXT/PLAIN; charset=\"=?utf-8?B?ICJVVEYtOCI=?=\"";
        ContentType content = new ContentType(ct);
        System.out.println(content.getBaseType());
        System.out.println(content.getParameter("charset"));
        System.out.println(MimeUtility.decodeText(content.getParameter("charset")));
    }
    

    In the javax.mail.internet package there are a list of parameters that can be used to change some of the default behaviors. You can set the system property of mail.mime.parameters.strict to false to relax some of the rules on content type. You can also set the mail.mime.contenttypehandler to point to fully qualified class name that can fix problems with the content type. The custom class must contain the following method signature:

        public static String cleanContentType(MimePart mp, String contentType) {
            try {
                ContentType content = new ContentType(contentType);
                String charset = MimeUtility.decodeText(content.getParameter("charset"));
                charset = charset.replace("\"", "");
                content.setParameter("charset", charset);
                return content.toString();
            } catch (MessagingException | UnsupportedEncodingException ex) {
                return contentType;
            }
        }