I have a Python script that gets the email message IDs of all the emails in the inbox folder. However, Outlook throws an out-of-memory exception when it reaches several thousand emails.
Exception:
Printing emails...
Traceback (most recent call last):
File "print_emails.py", line 53, in main
print_emails()
File "print_emails.py", line 43, in print_emails
primary_emails, primary_email_ids = get_emails_and_ids(primary_source_folder)
File "print_emails.py", line 29, in get_emails_and_ids
property_accessor = item.PropertyAccessor
File "C:\Program Files\Python38\lib\site-packages\win32com\client\__init__.py", line 474, in __getattr__
return self._ApplyTypes_(*args)
File "C:\Program Files\Python38\lib\site-packages\win32com\client\__init__.py", line 467, in _ApplyTypes_
self._oleobj_.InvokeTypes(dispid, 0, wFlags, retType, argTypes, *args),
pywintypes.com_error: (-2147352567, 'Exception occurred.', (4096, 'Microsoft Outlook', 'Out of memory or system resources. Close some windows or programs and try again.', None, 0, -2147024882), None)
Press enter to exit...
I have tried two different methods: iterating once (get_emails_and_ids
) and twice (get_emails
and get_email_ids
).
It seems to be related to getting the PropertyAccessor
several thousand times. If I just get emails using get_emails
, it runs fine with 38,000 emails, but when I start getting the IDs using PropertyAccessor
thousands of times, that is when it runs out of memory. Do I have to release the old property accessors?
Install:
pip install -U pypiwin32
Code:
#!/usr/bin/env python
from typing import Any, List, Tuple, Set
import traceback
import win32com.client
PidTagInternetMessageId = "http://schemas.microsoft.com/mapi/proptag/0x1035001F"
primary_account_email = "[email protected]"
primary_source_folder_name = "Inbox"
def get_emails(folder) -> List:
return [item for item in folder.Items if "_MailItem" in str(type(item))]
def get_email_ids(emails) -> Set[str]:
return {email_id for email in emails if len(email_id := email.PropertyAccessor.GetProperty(PidTagInternetMessageId)) > 0}
def get_emails_and_ids(folder) -> Tuple[List, Set[str]]:
emails = []
email_ids = set()
for item in folder.Items:
if "_MailItem" in str(type(item)):
emails.append(item)
property_accessor = item.PropertyAccessor
email_id = property_accessor.GetProperty(PidTagInternetMessageId)
email_ids.add(email_id)
return emails, email_ids
def print_emails() -> None:
outlook = win32com.client.gencache.EnsureDispatch("Outlook.Application")
namespace = outlook.GetNamespace("MAPI")
primary_account = namespace.Folders[primary_account_email]
primary_folders = primary_account.Folders
primary_source_folder = primary_folders[primary_source_folder_name]
primary_emails, primary_email_ids = get_emails_and_ids(primary_source_folder)
# primary_emails = get_emails(primary_source_folder)
# primary_email_ids = get_email_ids(primary_emails)
print(primary_email_ids)
def main(*args: Tuple[Any, ...]) -> None:
try:
print(f"Printing emails...")
print_emails()
print()
print("Done.")
except Exception:
traceback.print_exc()
print()
print("Press enter to exit...")
input()
if __name__ == "__main__":
main()
My solution was to not store all the emails (MailItem objects) in a list. If I need emails in a list, when I process the email, I should list.pop()
or remove it right away from the list. Using the PropertyAccessor
and keeping the email in the list causes Outlook to keep the objects in memory and causes Outlook to run out of memory.
I got rid of the get_emails
and get_emails_and_ids
functions and re-wrote the get_email_ids
function to store only the email message IDs but not store the email objects in a list:
def get_email_ids(folder) -> Tuple[Set[str], int]:
email_ids = set()
items = folder.Items
i = 0
for item in items:
if "_MailItem" in str(type(item)):
i += 1
property_accessor = item.PropertyAccessor
email_id = property_accessor.GetProperty(PidTagInternetMessageId)
if len(email_id) > 0:
email_ids.add(email_id)
if i % 500 == 0:
print(f" Retrieved {i} email IDs.")
return email_ids, i
My other script that I wrote is a lot faster now and takes at least 10 minutes. Before, it used to process a couple emails per second and take hours.