Search code examples
pythongoogle-drive-apigoogle-sheets-apigoogle-docsgoogle-docs-api

Download a spreadsheet from Google Drive / Workspace using Python


Can you produce a Python example of how to download a Google Sheets spreadsheet given its key and worksheet ID (gid)? I can't.

I've scoured versions 1, 2 and 3 of the API. I'm having no luck, I can't figure out their compilcated ATOM-like feeds API, the gdata.docs.service.DocsService._DownloadFile private method says that I'm unauthorized, and I don't want to write an entire Google Login authentication system myself. I'm about to stab myself in the face due to frustration.

I have a few spreadsheets and I want to access them like so:

username = '[email protected]'
password = getpass.getpass()

def get_spreadsheet(key, gid=0):
    ... (help!) ...

for row in get_spreadsheet('5a3c7f7dcee4b4f'):
    cell1, cell2, cell3 = row
    ...

Please save my face.


Update 1: I've tried the following, but no combination of Download() or Export() seems to work. (Docs for DocsService here)

import gdata.docs.service
import getpass
import os
import tempfile
import csv

def get_csv(file_path):
  return csv.reader(file(file_path).readlines())

def get_spreadsheet(key, gid=0):
  gd_client = gdata.docs.service.DocsService()
  gd_client.email = '[email protected]'
  gd_client.password = getpass.getpass()
  gd_client.ssl = False
  gd_client.source = "My Fancy Spreadsheet Downloader"
  gd_client.ProgrammaticLogin()

  file_path = tempfile.mktemp(suffix='.csv')
  uri = 'http://docs.google.com/feeds/documents/private/full/%s' % key
  try:
    entry = gd_client.GetDocumentListEntry(uri)

    # XXXX - The following dies with RequestError "Unauthorized"
    gd_client.Download(entry, file_path)

    return get_csv(file_path)
  finally:
    try:
      os.remove(file_path)
    except OSError:
      pass

Solution

  • In case anyone comes across this looking for a quick fix, here's another (currently) working solution that doesn't rely on the gdata client library:

    #!/usr/bin/python
    
    import re, urllib, urllib2
    
    class Spreadsheet(object):
        def __init__(self, key):
            super(Spreadsheet, self).__init__()
            self.key = key
    
    class Client(object):
        def __init__(self, email, password):
            super(Client, self).__init__()
            self.email = email
            self.password = password
    
        def _get_auth_token(self, email, password, source, service):
            url = "https://www.google.com/accounts/ClientLogin"
            params = {
                "Email": email, "Passwd": password,
                "service": service,
                "accountType": "HOSTED_OR_GOOGLE",
                "source": source
            }
            req = urllib2.Request(url, urllib.urlencode(params))
            return re.findall(r"Auth=(.*)", urllib2.urlopen(req).read())[0]
    
        def get_auth_token(self):
            source = type(self).__name__
            return self._get_auth_token(self.email, self.password, source, service="wise")
    
        def download(self, spreadsheet, gid=0, format="csv"):
            url_format = "https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key=%s&exportFormat=%s&gid=%i"
            headers = {
                "Authorization": "GoogleLogin auth=" + self.get_auth_token(),
                "GData-Version": "3.0"
            }
            req = urllib2.Request(url_format % (spreadsheet.key, format, gid), headers=headers)
            return urllib2.urlopen(req)
    
    if __name__ == "__main__":
        import getpass
        import csv
    
        email = "" # (your email here)
        password = getpass.getpass()
        spreadsheet_id = "" # (spreadsheet id here)
    
        # Create client and spreadsheet objects
        gs = Client(email, password)
        ss = Spreadsheet(spreadsheet_id)
    
        # Request a file-like object containing the spreadsheet's contents
        csv_file = gs.download(ss)
    
        # Parse as CSV and print the rows
        for row in csv.reader(csv_file):
            print ", ".join(row)