Search code examples
pythonpython-3.xpandasgmailgmail-api

How to save month-wise mails and aslo divide mails based on time using gmail api and save the output to csv or convert it into df?


This code below gets us the messages count for every 30 days from the time period of message sent.

This code gets us:(In-detail)

1.Amazon first mail to my mail with a particular phase(here first order).

2.Convert that epoch format into time date and using timedelta And getting count of mails sent in the interval of 30 days.

The output for this code will be like this:

Amazon first order:

1534476682000

Amazon total orders between 2018-08-01 and 2018-09-01: 20

Amazon total orders between 2018-09-01 and 2018-10-01: 11

Amazon total orders between 2018-10-01 and 2018-11-01: 15

Amazon total orders between 2018-11-01 and 2018-12-01: 7

Amazon total orders between 2018-12-01 and 2019-01-01: 19

Amazon total orders between 2019-01-01 and 2019-02-01: 23

Amazon total orders between 2019-02-01 and 2019-03-01: 12

Code:

#amazonfirstorder
from googleapiclient.discovery import build
from httplib2 import Http
from oauth2client import file, client, tools
from dateutil.relativedelta import relativedelta
from datetime import datetime


SCOPES = 'https://www.googleapis.com/auth/gmail.readonly'

def main():

    store = file.Storage('token.json')
    creds = store.get()
if not creds or creds.invalid:
    flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
    creds = tools.run_flow(flow, store)
service = build('gmail', 'v1', http=creds.authorize(Http()))

results = service.users().messages().list(userId='me', q='from:[email protected] subject:(your amazon.in order of )',labelIds = ['INBOX']).execute()

messages = results.get('messages', [])


print('\nFilpkart first order:')
if not messages:
    print (" ")
else:
    print (" ")

    msg = service.users().messages().get(userId='me', id=messages[-1]['id']).execute()
    #print(msg['snippet'])
    a=(msg['internalDate'])
    ts = int(a)
    ts /= 1000
    year=int(datetime.utcfromtimestamp(ts).strftime('%Y'))
    month=int(datetime.utcfromtimestamp(ts).strftime('%m'))
    #print(year)
    #print(month)
    print(msg['internalDate'])

    log_results = []
    start_date = datetime(year,month,1)
#start_date = datetime(2016,1,1)
    end_date = datetime.today()
    increment = relativedelta(months=1)
    target_date = start_date + increment

    while target_date <= end_date:

        timestamp_after = int(start_date.timestamp())  # timestamp of start day
        timestamp_before = int(target_date.timestamp())  # timestamp of start day + 30 days

        query = f'from:([email protected]) subject:(your amazon.in order of ) after:{timestamp_after} before:{timestamp_before}'
        results = service.users().messages().list(userId='me', q=query, labelIds=['INBOX']).execute()

        messages = results.get('messages', [])
        orders = len(messages)
        start_date_str = start_date.strftime('%Y-%m-%d')
        target_date_str = target_date.strftime('%Y-%m-%d')
        print(f"\nFlipkart total orders between {start_date.strftime('%Y-%m-%d')} and {target_date.strftime('%Y-%m-%d')}: {orders}")

        log_results.append(dict(start=start_date_str, end=target_date_str, orders=orders))

    # update interval
        start_date += increment
        target_date += increment

    return log_results



if __name__ == '__main__':
    log_results = main()    

Now i have two problems :

First

How to save the output of that code into csv file.

Second:

The above code gets us 30 days mails count,What i need is i need the count of mails i received before 12.00 PM in month-wise and after 12 PM in month-wise and save them in csv.

OUTPUT i need for 2nd Problem :

Amazon total orders between 2018-09-01 and 2018-10-01 before 12:00 PM : 11

Amazon total orders between 2018-10-01 and 2018-11-01 before 12:00 PM : 15

Amazon total orders between 2018-11-01 and 2018-12-01 before 12:00 PM : 7

Amazon total orders between 2018-12-01 and 2019-01-01 before 12:00 PM : 19

Amazon total orders between 2018-09-01 and 2018-10-01 after 12:00 PM : 3

Amazon total orders between 2018-10-01 and 2018-11-01 after 12:00 PM : 6

Amazon total orders between 2018-11-01 and 2018-12-01 after 12:00 PM : 88

Amazon total orders between 2018-12-01 and 2019-01-01 after 12:00 PM : 26

Solution

  • Similar to what already proposed, but in this case you would calculate the increment as exactly one month instead of 30 days (see the use of relativedelta instead of timedelta):

    from googleapiclient.discovery import build
    from httplib2 import Http
    from oauth2client import file, client, tools
    from dateutil.relativedelta import relativedelta
    from datetime import datetime
    
    SCOPES = 'https://www.googleapis.com/auth/gmail.readonly'
    
    def main():
    
        store = file.Storage('token.json')
        creds = store.get()
        if not creds or creds.invalid:
            flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
            creds = tools.run_flow(flow, store)
        service = build('gmail', 'v1', http=creds.authorize(Http()))
    
        log_results = []
    
        start_date = datetime(2016, 1, 1)
        end_date = datetime.today()
        increment = relativedelta(months=1)
        target_date = start_date + increment
    
        while target_date <= end_date:
    
            timestamp_after = int(start_date.timestamp())  # timestamp of start day
            timestamp_before = int(target_date.timestamp())  # timestamp of start day + 30 days
    
            query = f'from:([email protected]) subject:(your amazon.in order of ) after:{timestamp_after} before:{timestamp_before}'
            results = service.users().messages().list(userId='me', q=query, labelIds=['INBOX']).execute()
    
            messages = results.get('messages', [])
            orders = len(messages)
            start_date_str = start_date.strftime('%Y-%m-%d')
            target_date_str = target_date.strftime('%Y-%m-%d')
            print(f"\nAmazon total orders between {start_date.strftime('%Y-%m-%d')} and {target_date.strftime('%Y-%m-%d')}: {orders}")
    
            log_results.append(dict(start=start_date_str, end=target_date_str, orders=orders))
    
            # update interval
            start_date += increment
            target_date += increment
    
        return log_results
    
    
    
    if __name__ == '__main__':
        log_results = main()
        # Write to csv
        import pandas as pd
        df = pd.DataFrame(log_results)
        df.to_csv('orders.csv')