Search code examples
pythonemailimaplib

Read email in python 3.7 using imaplib with HTML body and attachments in the email


I would really appreciate if someone can help me with this issue.

I have implemented the below code to read "unread emails from gmail inbox". I need to print "To", "From", "Subject", "Body" and "save attachments in a specified location"

I have 2 issues here.

  1. If there is any email with attachments, it gives the error Body: [<email.message.Message object at 0x026D1050>, <email.message.Message object at 0x02776B70>]. It will print all the required things and saves attachments but DOESN'T print the body.

This works fine if no attachment is included.

  1. If there is an email body with any styling in it like "bold/italic/underline/colour...etc", it doesn't print as it is.

Example : Python is printed as Python=C2=A0i= and sometimes different styling is seperated by "*".

def get_body(email_message):
for payload in email_message.get_payload():
     # print('Body:\t', payload.get_payload())
     break
return(payload.get_payload())
def read_email(server,uname,pwd):
    username = uname
    password = pwd
    mail = imaplib.IMAP4_SSL(server)
    mail.login(username, password)
    mail.select("inbox")
    try:
        result, data = mail.uid('search', None, '(UNSEEN)')
        inbox_item_list = data[0].split()
        most_recent = inbox_item_list[-1]
        result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
        raw_email = email_data[0][1].decode("UTF-8")
        email_message = email.message_from_string(raw_email)
        for part in email_message.walk():
            if part.get_content_maintype() == 'multipart':
                continue
            if part.get('Content-Disposition') is None:
                continue
            filename = part.get_filename()
            att_path = os.path.join(location, filename)

            if not os.path.isfile(att_path):
                fp = open(att_path, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()
                print('Downloaded file:', filename)
        if email_message.is_multipart():
            for payload in email_message.get_payload():
                print('To:\t\t', email_message['To'])
                print('From:\t',     email_message['From'])
                print('Subject:', email_message['Subject'])
                print('Date:\t',email_message['Date'])
                print('Body:\t', get_body(email_message))
                break        
        else:
            print('Nothing'])               
    except IndexError:
        print("No new email")
while True:
    read_email("imap.gmail.com", "s@gmail.com", "spassword")
time.sleep(10)

Many thanks


Solution

  • I new to python and this is the complete working code I have done to read unseen emails. You can print the elements according to your requirements. It works for gmail and office 365. This script runs for every 10 seconds. This might also work for other email providers by passing the credentials. Hope this helps.

    import email
    import imaplib
    import os
    import html2text
    import time
    detach_dir = 'locationWhereYouWantToSaveYourAttachments'
    
    
    def get_body(email_message):
        for payload in email_message.get_payload():
            break
        return payload.get_payload()
     def two_way_email(server,uname,pwd):
        username = uname
        password = pwd
        mail = imaplib.IMAP4_SSL(server)
        mail.login(username, password)
        mail.select("inbox")
        try:
            result, data = mail.uid('search', None, '(UNSEEN)')
            inbox_item_list = data[0].split()
            most_recent = inbox_item_list[-1]
            result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
            raw_email = email_data[0][1].decode("UTF-8")
            email_message = email.message_from_string(raw_email)
             for part in email_message.walk():
                if part.get_content_maintype() == 'multipart':
                    continue
                if part.get('Content-Disposition') is None:
                    continue
                 filename = part.get_filename()
                att_path = os.path.join(detach_dir, filename)
                 if not os.path.isfile(att_path):
                    fp = open(att_path, 'wb')
                    fp.write(part.get_payload(decode=True))
                    fp.close()
                    print('Downloaded file:', filename)
            if email_message.is_multipart():
                for payload in email_message.get_payload():
                    print('To:\t\t', email_message['To'])
                    print('From:\t',     email_message['From'])
                    print('Subject:', email_message['Subject'])
                    print('Date:\t',email_message['Date'])
                    for part in email_message.walk():
                        if (part.get_content_type() == 'text/plain') and (part.get('Content-Disposition') is None):
                            print('Body:\t',part.get_payload())
                    break
            else:
                print('To:\t\t', email_message['To'])
                print('From:\t', email_message['From'])
                print('Subject:', email_message['Subject'])
                print('Date:\t', email_message['Date'])
                print('Thread-Index:\t', email_message['Thread-Index'])
                text = f"{email_message.get_payload(decode=True)}"
                html = text.replace("b'", "")
                h = html2text.HTML2Text()
                h.ignore_links = True
                output = (h.handle(f'''{html}''').replace("\\r\\n", ""))
                output = output.replace("'", "")
                print(output)
         except IndexError:
            print("No new email")
    while True:
        two_way_email("outlook.office365.com", "yourOffice365EmailAddressHere", "yourpassword")
         two_way_email("imap.gmail.com", "yourGmailAddressHere", "yourPassword")
        time.sleep(10)