I've filtered all the messages I want to request into a label in Gmail and I'm successfully getting mail back by using this bit of code in their quickstart.py script:
# My Code
results = service.users().messages().list(userId='me',labelIds = '{Label_id}', maxResults='10000000').execute()
messages = results.get('messages', [])
for message in messages:
msg = service.users().messages().get(userId='me', id=message['id'], format='metadata', metadataHeaders=['subject']).execute()
print(msg['snippet'].encode('utf-8').strip())
I first listed all the labels, and their ids, in an earlier request, and substitute that in where it says {Label_id}. And then I ask for just the subject metadata field. The problem is that the response only ever returns exactly 1 Mb of data. I know this because I redirect the output into a file and do an ls -latr --block-size=MB
. Moreover, I can see there are way more (older) messages in that label than what it's returning based on the dates. The request always stops at exactly the same message. None of them have any attachment.
Per their API reference I should be allowed:
Daily Usage 1,000,000,000 quota units per day
Per User Rate Limit 250 quota units per user per second
I don't think that's what I'm hitting but maybe I'm wrong because each message has 1-3 replies to it which I can see coming in and perhaps those are counting as 5 quota units per? Not sure. I've tried playing with the maxResults
parameter but that hasn't seemed to change anything.
Am I hitting a cap here, or is it in my request logic?
EDIT 1
from __future__ import print_function
import pickle
import os.path
import base64
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
## If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://mail.google.com/']
def main():
"""Shows basic usage of the Gmail API.
Lists the user's Gmail labels.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server()
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('gmail', 'v1', credentials=creds)
messageArray = []
pageToken = None
while True:
results = service.users().messages().list(userId='me',labelIds = '{Label_ID}', maxResults=500, pageToken=pageToken).execute()
messages = results.get('messages', [])
for message in messages:
msg = service.users().messages().get(userId='me', id=message['id'], format='metadata', metadataHeaders=['subject']).execute()
messageArray.append(msg)
pageToken = results.get('nextPageToken', None)
if not pageToken:
print('[%s]' % ', '.join(map(str, messageArray)))
break
if __name__ == '__main__':
main()
EDIT 2
This is the final script that I went with. This one spits out a much nicer and cleaner format that I just redirect to a file and is easy to parse.
from __future__ import print_function
import pickle
import os.path
import base64
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
## If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://mail.google.com/']
def main():
"""Shows basic usage of the Gmail API.
Lists the user's Gmail labels.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server()
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('gmail', 'v1', credentials=creds)
pageToken = None
while True:
results = service.users().messages().list(userId='me',labelIds = '{Label_ID}', maxResults=500, pageToken=pageToken).execute()
messages = results.get('messages', [])
for message in messages:
msg = service.users().messages().get(userId='me', id=message['id'], format='metadata', metadataHeaders=['subject']).execute()
print(msg['snippet'].encode('utf-8').strip())
pageToken = results.get('nextPageToken', None)
if not pageToken:
break
if __name__ == '__main__':
main()
maxResults
max value is 500. If you set it higher you'll still only get 500 messages in the results. You can confirm this with a len check of messages
.
You need to implement pagination.
messages = []
pageToken = None
while True:
results = service.users().messages().list(userId='me',labelIds = '{Label_id}', maxResults=500, pageToken=pageToken).execute()
messages.append(results.get(messages, []))
pageToken = results.get('nextPageToken', None)
if not pageToken:
break
If you just want the raw unparsed email message try using
# at top of file
from base64 import urlsafe_b64decode
msg = service.users().messages().get(userId='me', id=message['id'], format='raw').execute()
print(urlsafe_b64decode(msg['raw']))