I have 3000 emails in my gmail account. I want to create an aggregated list of all the senders so that I can more effectively clean up my inbox. I dont need to download the message bodys or the attachments.
I used this sample to get me started (https://developers.google.com/gmail/api/quickstart/dotnet) althought now I cant figure out how to return more than 100 message ids when i execute this code:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Google.Apis.Auth.OAuth2;
using Google.Apis.Gmail.v1;
using Google.Apis.Gmail.v1.Data;
using Google.Apis.Requests;
using Google.Apis.Services;
using Google.Apis.Util;
using Google.Apis.Util.Store;
namespace GmailQuickstart
{
class Program
{
static string[] Scopes = { GmailService.Scope.GmailReadonly };
static string ApplicationName = "Gmail API .NET Quickstart";
static void Main(string[] args)
{
UserCredential credential;
using (var stream = new FileStream("credentials.json", FileMode.Open, FileAccess.Read))
{
string credPath = "token.json";
credential = GoogleWebAuthorizationBroker.AuthorizeAsync(
GoogleClientSecrets.Load(stream).Secrets,
Scopes,
"user",
CancellationToken.None,
new FileDataStore(credPath, true)).Result;
Console.WriteLine("Credential file saved to: " + credPath);
}
// Create Gmail API service.
var service = new GmailService(new BaseClientService.Initializer()
{
HttpClientInitializer = credential,
ApplicationName = ApplicationName,
});
////get all of the message ids for the messages in the inbox
var messageRequest = service.Users.Messages.List("me");
messageRequest.LabelIds = "INBOX";
var messageList = new List<Message>();
ListMessagesResponse messageResponse1 = new ListMessagesResponse();
var k = 0;
do
{
messageResponse1 = messageRequest.Execute();
messageList.AddRange(messageResponse1.Messages);
var output = $"Request {k} - Message Count: {messageList.Count()} Page Token: {messageRequest.PageToken} - Next Page Token: {messageResponse1.NextPageToken}";
Console.WriteLine(output);
System.IO.File.AppendAllText(@"C:\000\log.txt", output);
messageRequest.PageToken = messageResponse1.NextPageToken;
k++;
//this switch allowed me to walk through getting multiple pages of emails without having to get them all
//if (k == 5)
//{
// break;
//}
} while (!String.IsNullOrEmpty(messageRequest.PageToken));
//once i created the list of all the message ids i serialized the list to JSON and wrote it to a file
//so I could test the next portions without having to make the calls against the above each time
var serializedMessageIdList = Newtonsoft.Json.JsonConvert.SerializeObject(messageList);
System.IO.File.WriteAllText(@"C:\000\MessageIds.json", serializedMessageIdList);
//read in the serialized list and rehydrate it to test the next portion
var mIdList = Newtonsoft.Json.JsonConvert.DeserializeObject<List<Message>>(System.IO.File.ReadAllText(@"C:\000\MessageIds.json"));
//this method takes those message ids and gets the message object from the api for each of them
//1000 is the maximum number of requests google allows in a batch request
var messages = BatchDownloadEmails(service, mIdList.Select(m => m.Id), 1000);
//again i'm serializing the message list and writing them to a file
var serializedMessageList = Newtonsoft.Json.JsonConvert.SerializeObject(messages);
System.IO.File.WriteAllText(@"C:\000\Messages.json", serializedMessageList);
//and then reading them in and rehydrating the list to test the next portion
var mList = Newtonsoft.Json.JsonConvert.DeserializeObject<IList<Message>>(System.IO.File.ReadAllText(@"C:\000\Messages.json"));
//then i loop through each message and pull the values out of the payload header i'm looking for
var emailList = new List<EmailItem>();
foreach (var message in mList)
{
if (message != null)
{
var from = message.Payload.Headers.SingleOrDefault(h => h.Name == "From")?.Value;
var date = message.Payload.Headers.SingleOrDefault(h => h.Name == "Date")?.Value;
var subject = message.Payload.Headers.SingleOrDefault(h => h.Name == "Subject")?.Value;
emailList.Add(new EmailItem() { From = from, Subject = subject, Date = date });
}
}
//i serialized this list as well
var serializedEmailItemList = Newtonsoft.Json.JsonConvert.SerializeObject(emailList);
System.IO.File.WriteAllText(@"C:\000\EmailItems.json", serializedEmailItemList);
//rehydrate for testing
var eiList = Newtonsoft.Json.JsonConvert.DeserializeObject<List<EmailItem>>(System.IO.File.ReadAllText(@"C:\000\EmailItems.json"));
//here is where i do the actual aggregation to determine which senders i have the most email from
var senderSummary = eiList.GroupBy(g => g.From).Select(g => new { Sender = g.Key, Count = g.Count() }).OrderByDescending(g => g.Count);
//serialize and output the results
var serializedSummaryList = Newtonsoft.Json.JsonConvert.SerializeObject(senderSummary);
System.IO.File.WriteAllText(@"C:\000\SenderSummary.json", serializedSummaryList);
}
public static IList<Message> BatchDownloadEmails(GmailService service, IEnumerable<string> messageIds, int chunkSize)
{
// Create a batch request.
var messages = new List<Message>();
//because the google batch request will only allow 1000 requests per batch the list needs to be split
//based on chunk size
var lists = messageIds.ChunkBy(chunkSize);
//double batchRequests = (2500 + 999) / 1000;
//for each list create a request with teh message id and add it to the batch request queue
for (int i = 0; i < lists.Count(); i++)
{
var list = lists.ElementAt(i);
Console.WriteLine($"list: {i}...");
var request = new BatchRequest(service);
foreach (var messageId in list)
{
//Console.WriteLine($"message id: {messageId}...");
var messageBodyRequest = service.Users.Messages.Get("me", messageId);
//messageBodyRequest.Format = UsersResource.MessagesResource.GetRequest.FormatEnum.Metadata;
request.Queue<Message>(messageBodyRequest,
(content, error, index, message) =>
{
messages.Add(content);
});
}
Console.WriteLine("");
Console.WriteLine("ExecuteAsync");
//execute all the requests in the queue
request.ExecuteAsync().Wait();
System.Threading.Thread.Sleep(5000);
}
return messages;
}
}
public class EmailItem
{
public string From { get; set; }
public string Subject { get; set; }
public string Date { get; set; }
}
public static class IEnumerableExtensions
{
public static IEnumerable<IEnumerable<T>> ChunkBy<T>(this IEnumerable<T> source, int chunkSize)
{
return source
.Select((x, i) => new { Index = i, Value = x })
.GroupBy(x => x.Index / chunkSize)
.Select(x => x.Select(v => v.Value));
}
}
}
The research I've done says I need to use a batch request and based on the information I've found Im not able to adapt it to what I'm trying to accomplish. My understanding is that I would use the batch request to get all of the message ids and then 3000 individual calls to get the actual from, subject, and date received from each email in my inbox??
You can use paging to get a full list.
Pass the page token from the previous page to get the next call to Users.Messages.List
(don't pass into the first call to get things started). Detect the end when the result contains no messages.
This allows you to get all the messages in the mailbox.
NB. I suggest you make the code async: if there are more than a few messages to read, it can take an appreciable time to get them all.