Search code examples
pythonfor-looprenamezip

Renaming extracted zip file


I am trying to extract attachments (zip) from email messages (.msg) and then extract the only one doc (xls) from the zip file.

After extracting the xls i would like to rename it based on keywords in the .msg (i.e. if it contains 'Alipay' then append '_alipay' in the xls file name else '_tng')

import os
import extract_msg
import fnmatch
import zipfile
import glob

Tk().withdraw()
directory = askdirectory(title='Yo select your folder please')
input_dir = directory + "/"
os.chdir(input_dir)

pwd = '123'
keyword = '*Alipay*'

for email in os.listdir(input_dir):
    if email.endswith('.msg'):
        email_path = os.path.join(input_dir, email)

        if fnmatch.fnmatch(email, keyword):
            trans_type = '_alipay'
        else:
            trans_type = '_tng'

        msg = extract_msg.Message(email)
        msg.save_attachments()
        msg.close()


        for em_zip in glob.glob('*.zip'):
            zip_path = os.path.join(input_dir, em_zip)

                with zipfile.ZipFile(zip_path, 'r') as zf:
                    zf.extractall(pwd=bytes(pwd, 'utf-8'))

                    os.rename(zip_path, os.path.splitext(zip_path)[0] + trans_type + '.xls')

The error message i got is

Traceback (most recent call last): File "C:\Users\cheeh\Desktop\PyCharmPortable\PycharmProjects\ocrpdf\Alipay.py", line 71, in os.rename(zip_path, os.path.splitext(zip_path)[0] + trans_type + '.xls') PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:/Users/cheeh/Desktop/cimb/CDFSB60006039760617520201221.zip' -> 'C:/Users/cheeh/Desktop/cimb/CDFSB60006039760617520201221_alipay.xls'

Thanks


Solution

  • I went for a manual and longer way to achieve the outcome, though will appreciate if any pros can optimize the code as it takes a lot of time if there are lots of emails

    keyword = '*Alipay*'
    os.makedirs(os.path.join(input_dir, '_Alipay'))
    os.makedirs(os.path.join(input_dir, '_TnG'))
    
    for email in glob.iglob('*.msg'):
        email_path = os.path.join(input_dir, email)
    
        if fnmatch.fnmatch(email_path, keyword):
            trans_type = '_Alipay'
        else:
            trans_type = '_TnG'
    
        shutil.move(email_path, os.path.join(input_dir, trans_type))
    
    for emails in glob.iglob('*/*.msg', recursive=True):
        emails_path = os.path.join(input_dir, emails)
    
        if fnmatch.fnmatch(emails_path, keyword):
            trans_type = '_Alipay'
        else:
            trans_type = '_TnG'
    
        msg = extract_msg.Message(emails_path)
        msg.save_attachments(customPath=os.path.join(input_dir, trans_type))
        msg.close()
    
    for zips in glob.glob('*/*.zip', recursive=True):
        zips_path = os.path.join(input_dir, zips)
    
        if fnmatch.fnmatch(zips_path, keyword):
            trans_type = '_Alipay'
        else:
            trans_type = '_TnG'
    
        dst_folder = os.path.join(input_dir, trans_type)
    
        with zipfile.ZipFile(zips, 'r') as zf:
            zf.extractall(path=dst_folder, pwd=bytes(pwd, 'utf-8'))
    
    for xls in glob.glob('*/*.xls', recursive=True):
        xls_path = os.path.join(input_dir, xls)
    
        if fnmatch.fnmatch(xls_path, keyword):
            trans_type = '_Alipay'
        else:
            trans_type = '_TnG'
    
        os.rename(xls_path, os.path.splitext(xls_path)[0] + trans_type + '.xls')
    
    for xls in glob.glob('*/*.xls', recursive=True):
        xls_path = os.path.join(input_dir, xls)
    
        if fnmatch.fnmatch(xls_path, keyword):
            trans_type = '_Alipay'
        else:
            trans_type = '_TnG'
    
        XLS2XLSX(xls_path).to_xlsx(os.path.splitext(xls_path)[0] + '.xlsx')
    
    for xlsx in glob.glob('*/*.xlsx', recursive=True):
        shutil.move(xlsx, input_dir)