I am trying to extract attachments (zip) from email messages (.msg) and then extract the only one doc (xls) from the zip file.
After extracting the xls i would like to rename it based on keywords in the .msg (i.e. if it contains 'Alipay' then append '_alipay' in the xls file name else '_tng')
import os
import extract_msg
import fnmatch
import zipfile
import glob
Tk().withdraw()
directory = askdirectory(title='Yo select your folder please')
input_dir = directory + "/"
os.chdir(input_dir)
pwd = '123'
keyword = '*Alipay*'
for email in os.listdir(input_dir):
if email.endswith('.msg'):
email_path = os.path.join(input_dir, email)
if fnmatch.fnmatch(email, keyword):
trans_type = '_alipay'
else:
trans_type = '_tng'
msg = extract_msg.Message(email)
msg.save_attachments()
msg.close()
for em_zip in glob.glob('*.zip'):
zip_path = os.path.join(input_dir, em_zip)
with zipfile.ZipFile(zip_path, 'r') as zf:
zf.extractall(pwd=bytes(pwd, 'utf-8'))
os.rename(zip_path, os.path.splitext(zip_path)[0] + trans_type + '.xls')
The error message i got is
Traceback (most recent call last): File "C:\Users\cheeh\Desktop\PyCharmPortable\PycharmProjects\ocrpdf\Alipay.py", line 71, in os.rename(zip_path, os.path.splitext(zip_path)[0] + trans_type + '.xls') PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:/Users/cheeh/Desktop/cimb/CDFSB60006039760617520201221.zip' -> 'C:/Users/cheeh/Desktop/cimb/CDFSB60006039760617520201221_alipay.xls'
Thanks
I went for a manual and longer way to achieve the outcome, though will appreciate if any pros can optimize the code as it takes a lot of time if there are lots of emails
keyword = '*Alipay*'
os.makedirs(os.path.join(input_dir, '_Alipay'))
os.makedirs(os.path.join(input_dir, '_TnG'))
for email in glob.iglob('*.msg'):
email_path = os.path.join(input_dir, email)
if fnmatch.fnmatch(email_path, keyword):
trans_type = '_Alipay'
else:
trans_type = '_TnG'
shutil.move(email_path, os.path.join(input_dir, trans_type))
for emails in glob.iglob('*/*.msg', recursive=True):
emails_path = os.path.join(input_dir, emails)
if fnmatch.fnmatch(emails_path, keyword):
trans_type = '_Alipay'
else:
trans_type = '_TnG'
msg = extract_msg.Message(emails_path)
msg.save_attachments(customPath=os.path.join(input_dir, trans_type))
msg.close()
for zips in glob.glob('*/*.zip', recursive=True):
zips_path = os.path.join(input_dir, zips)
if fnmatch.fnmatch(zips_path, keyword):
trans_type = '_Alipay'
else:
trans_type = '_TnG'
dst_folder = os.path.join(input_dir, trans_type)
with zipfile.ZipFile(zips, 'r') as zf:
zf.extractall(path=dst_folder, pwd=bytes(pwd, 'utf-8'))
for xls in glob.glob('*/*.xls', recursive=True):
xls_path = os.path.join(input_dir, xls)
if fnmatch.fnmatch(xls_path, keyword):
trans_type = '_Alipay'
else:
trans_type = '_TnG'
os.rename(xls_path, os.path.splitext(xls_path)[0] + trans_type + '.xls')
for xls in glob.glob('*/*.xls', recursive=True):
xls_path = os.path.join(input_dir, xls)
if fnmatch.fnmatch(xls_path, keyword):
trans_type = '_Alipay'
else:
trans_type = '_TnG'
XLS2XLSX(xls_path).to_xlsx(os.path.splitext(xls_path)[0] + '.xlsx')
for xlsx in glob.glob('*/*.xlsx', recursive=True):
shutil.move(xlsx, input_dir)