Search code examples
pythonemailurlhyperlinkimaplib

How do I retrieve a hyperlink URL from email within python?


I have the following code to get an HTML email as a email.Message object but am unsure how to proceed from here to find the url of the hyperlink given its display name. I have located the url in the payload(0). In addition the email is a href=3D so it doesnt work if i just copy the link and paste into a browser.

import sys
import imaplib 
import getpass
import email
import datetime
import email


M = imaplib.IMAP4_SSL('imap.gmail.com')
M.login('email@email.com','password123')

rv,boxes = M.list()
rv,boxes = M.select('Inbox/Test1')

rv, data = M.search(None, 'ALL')
typ, msg_data = M.fetch('1', '(RFC822)')

msg = email.message_from_string(msg_data[0][1])

url_name = 'Click Here'
html_text = msg.get_payload(0)

Solution

  • this will show all href in the message...one can update the parseLinks class to choice their individual string they are interested in.

    import imaplib 
    import email
    import quopri
    import HTMLParser
    
    class parseLinks(HTMLParser.HTMLParser):
        def handle_starttag(self, tag, attrs):
            global global_futures_fair_value
            if tag == 'a':
                for name, value in attrs:
                    if name == 'href':
                        print name
                        print value
    
    
    M = imaplib.IMAP4_SSL('imap.gmail.com')
    M.login('email@email.com','password123')
    
    M.select('Inbox/Test1')
    
    rv, data = M.search(None, 'ALL')
    typ, msg_data = M.fetch('1', '(RFC822)')
    
    msg = email.message_from_string(msg_data[0][1])
    
    url_name = 'Click Here'
    html_text = msg.get_payload(0)
    
    msg = str(msg.get_payload()[0])
    msg = quopri.decodestring(msg)
    
    linkParser = parseLinks()
    linkParser.feed(msg)