Search code examples
pythonemail-attachmentseml

Unable to download all documents from eml file


I have a .eml file with 3 attachments in it. I was able to download one of the attachment but unable to download all the attachments.

import os
import email
import base64
# Get list of all files
files = [f for f in os.listdir('.') if os.path.isfile(f)]
# Create output directory
if os.path.exists("output"):
    pass
else:
    os.makedirs("output")

for eml_file in files:
    if eml_file.endswith(".eml"):
        with open(eml_file) as f:
            email = f.read()

        ext=".docx"

        if ext is not "":
            # Extract the base64 encoding part of the eml file
            encoding = email.split(ext+'"')[-1]
            if encoding:
                # Remove all whitespaces
                encoding = "".join(encoding.strip().split())
                encoding = encoding.split("=", 1)[0]
                # Convert base64 to string
                if len(encoding) % 4 != 0: #check if multiple of 4
                   while len(encoding) % 4 != 0:
                       encoding = encoding + "="
                try:
                    decoded = base64.b64decode(encoding)
                except:
                    print(encoding)
                    for i in range(100):
                        print('\n')
                # Save it as docx
                path = os.path.splitext(eml_file)[0]
                if path:
                    path = os.path.join("output", path + ext)
                    try:
                        os.remove(path)
                    except OSError:
                        pass
                    with open(path, "wb") as f:
                        f.write(decoded)
        else:
            print("File not done: " + eml_file)

How can I download all the attachments? edit: I have initialized the eml_file still not downloading all files.


Solution

  • You import the email module. So why do you ignore it and try to write an email parser yourself? In addition:

    1. You can use glob to list all files with a given extension.
    2. Use should have used not operator in the condition: (if not os.path.exists("output"): os.makedirs("output")), but even this is not necessary, because makedirs has exist_ok parameter.
    import os
    import glob
    import email
    from email import policy
    
    indir = '.'
    outdir = os.path.join(indir, 'output')
    
    os.makedirs(outdir, exist_ok=True)
    files = glob.glob(os.path.join(indir, '*.eml'))
    
    for eml_file in files:
        # This will not work in Python 2
        msg = email.message_from_file(open(eml_file), policy=policy.default)
        for att in msg.iter_attachments():
            # Tabs may be added for indentation and not stripped automatically
            filename = att.get_filename().replace('\t', '')
            # Here we suppose for simplicity sake that each attachment has a valid unique filename,
            # which, generally speaking, is not true.
            with open(os.path.join(outdir, filename), 'wb') as f:
                f.write(att.get_content())