New programmer who has been coding scripts to automate work responsibilities.
Scope of Problem:
I get bi-monthly excel reports from an outside vendor sent via email. This vendor uses ZixMail for encryption in which my company does not leverage. As a result, I have to access these emails via a Secure Mail Center with my username and password to log on this Mail Center website. I am trying to establish a connection to this server and download the attachment files.
What I have tried:
Tried a IMAP connection into the "server" (I am not sure if the website is a mail server)
Accessing the site via HTTP using sessions.
The source code from the website where you download the file looks like:
a rel="external" href="/s/attachment?name=Random Letters and Numbers=emdeon" title="File Title.xlsx"
the href looks nothing like a normal URL and does not end in a .xlsx or any other type of file like most of the examples I have seen.
I guess I am just really looking for any ideas, thoughts, helps solutions.
Here is my HTTP connection code
import requests
import urllib.request
import shutil
import os
#Fill in your details here to be posted to the login form.
payload = {
'em': 'Username',
'passphrase': 'Password',
'validationKey': 'Key'
}
#This reads your URL and returns if the file is downloadable
def is_downloadable(URL_D):
h = requests.head(URL_D, allow_redirects=True)
header = h.headers
content_type = header.get('content-type')
if 'text' in content_type.lower():
return False
if 'html' in content_type.lower():
return False
return True
def download_file(URL_D):
with requests.get(URL_D, stream=True) as r:
r.raise_for_status()
with open(FileName, 'wb') as f:
for chunk in r.iter_content(chunk_size=None):
if chunk:
f.write(chunk)
f.close()
return FileName
def Main():
with requests.Session() as s:
p = s.post(URL, data=payload, allow_redirects=True )
print(is_downloadable(URL_D))
download_file(URL_D)
if __name__ == '__main__':
Path = "<path>"
FileName = os.path.join(Path,"Testing File.xlsx")
URL = 'login URL'
URL_D = 'Attachment URL"
Main()
is_downloadable(URL_D) returns as false and the excel file is empty and corrupted
Here is my code for the IMAP attempt:
import email
import imaplib
import os
class FetchEmail():
connection = None
error = None
def __init__(self, mail_server, username, password):
self.connection = imaplib.IMAP4_SSL(mail_server,port=993)
self.connection.login(username, password)
self.connection.select('inbox',readonly=False) # so we can mark mails as read
def close_connection(self):
"""
Close the connection to the IMAP server
"""
self.connection.close()
def save_attachment(self, msg, download_folder):
att_path = "No attachment found."
for part in msg.walk():
if part.get_content_maintype() == 'multipart':
continue
if part.get('Content-Disposition') is None:
continue
filename = part.get_filename()
att_path = os.path.join(download_folder, filename)
if not os.path.isfile(att_path):
fp = open(att_path, 'wb')
fp.write(part.get_payload(decode=True))
fp.close()
return att_path
def fetch_messages(self):
emails = []
(result, messages) = self.connection.search(None, "(ON 20-Nov-2020)")
if result == "OK":
for message in messages[0].split(' '):
try:
ret, data = self.connection.fetch(message,'(RFC822)')
except:
print ("No emails to read for date.")
self.close_connection()
exit()
msg = email.message_from_bytes(data[0][1])
if isinstance(msg, str) == False:
emails.append(msg)
response, data = self.connection.store(message, '+FLAGS','\\Seen')
return emails
self.error = "Failed to retreive emails."
return emails
def Main():
p = FetchEmail(mail_server,username,password)
msg = p.fetch_messages()
p.save_attachment(msg, download_folder)
p.close_connection()
if __name__ == "__main__":
mail_server = "Server"
username = "username"
password = "password"
download_folder= Path
Main()
Error Message: TimeoutError: [WinError 10060] A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond
Even if I wrote the IMAP script wrong, I tried to IMAP connect via cmd prompt and same results.
To recap all I am looking for is some pointers and ideas to solve this problem. Thank You!
For anyone who stumbled upon this because of a similar issue. Probably not since I have a really weird habit of making everything simple, complicated. But
I was able to solve problem by using selenium webdriver to login to the website, and navigate through using the "click" mechanism. This was the only way I'd be able to successfully download the reports.
import time
import os
import re
import datetime
from selenium.webdriver import Chrome
from selenium.webdriver.chrome.options import Options
today = datetime.date.today()
first = today.replace(day=1)
year = today.strftime('%Y')
month = today.strftime('%B')
lastMonth = (first - datetime.timedelta(days=1)).strftime('%b')
def Main():
chrome_options = Options()
chrome_options.add_experimental_option("detach", True)
s = Chrome(executable_path=path to chrome extension)
s.get("Website login page")
s.find_element_by_id("loginname").send_keys('username')
s.find_element_by_id("password").send_keys('password')
s.find_element_by_class_name("button").click()
for i in range(50):
s.get("landing page post login")
n = str(i)
subject = ("mailsubject"+n)
sent = ("mailsent"+n)
title = s.find_element_by_id(subject).text
date = s.find_element_by_id(sent).text
regex = "Bi Monthly"
regex_pr = "PR"
match = re.search(regex,title)
match_pr = re.search(regex_pr,title)
if match and not match_pr:
match_m = re.search(r"(\D{3})",date)
match_d = re.search(r"(\d{1,2})",date)
day = int(match_d.group())
m = (match_m.group(1))
if (day <= 15) and (m == lastMonth):
print("All up to date files have been dowloaded")
break
else:
name = ("messageItem"+n)
s.find_element_by_id(name).click()
s.find_element_by_partial_link_text("xlsx").click() #This should be under the else but its not formatting right on here
else:
continue
time.sleep(45)
if __name__ == "__main__":
Main()