Search code examples
pythongoogle-apigoogle-drive-apigoogle-sheets-apigoogle-api-python-client

Downloading Google Sheet as CSV with Python yielding CSV that's only one line


I've created a fairly basic program that takes a Google Sheet and converts it into a CSV file and then spits said CSV in a given directory. That part works as intended, but the output .CSV has no line breaks or newlines, so when it's loaded into Excel, it's read as one line, with \r\n instead of newlines.

This program has some unneeded imports and other issues, but I just haven't cleaned those up yet.

Here's the program:

# Package Imports
from __future__ import print_function
import re
import os.path
import io
import csv
import ctypes
import google.auth.exceptions
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from googleapiclient.http import MediaIoBaseDownload

# Function Runners (for test purposes)

# Other Programmable Strings
EXPORT_DIRECTORY = 'DIREC'
CREDS_DIRECTORY = 'DIREC'
GSHEET_ID = 'ID'
FILE_NAME = 'NAME' + '.csv'
# When editing this URL, token.json MUST be deleted to reauthorize with Google.
SCOPES = 'https://www.googleapis.com/auth/drive'

# Define Global Variables
MessageBox = ctypes.windll.user32.MessageBoxW

def main():
    creds = None

    if os.path.exists('token.json'):
        creds = Credentials.from_authorized_user_file('token.json', SCOPES)

    # If there are no (valid) credentials available, directs user to OAuth login through default browser.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            try:
            
                creds.refresh(Request())

            except google.auth.exceptions.RefreshError:
                MessageBox(None, 'There was an error authorizing with Google. You will be redirected to a login page.', 'Google Sheet to CSV: Fatal Error', 0)

        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                'C:\\Users\AeroG\OneDrive\Desktop\SHEETSCSV\credentials.json', SCOPES) # Sometimes requires a specific path to credentials.json.
            creds = flow.run_local_server(port=0)

        # Save the credentials for the next run
        with open('token.json', 'w') as token:
            token.write(creds.to_json())

    service = build('drive', 'v3', credentials=creds)

Basically all you need to look at is this part below.

    # Call the Drive v3 API and compile CSV file from provided ID
    file_id = GSHEET_ID
    request = service.files().export_media(fileId=file_id,
                                             mimeType='text/csv')
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print("Download %d%%." % int(status.progress() * 100))
    
    fh.seek(0)
    with open('C:\\Users\AeroG\OneDrive\Desktop\SHEETSCSV\\' + FILE_NAME, 'w', newline='\r\n') as f:
        f.write(str(fh.read()))
        f.close()

if __name__ == '__main__':
    main()

I really need help here. In a nutshell, what's happening is the output CSV I'm getting is this:

b' ,Jurisdiction,SHIPPING_ID,Combined,JobTitle,Shipping_Address,City,State,Zip,Phone,Email,Bill to addresses,,JobTitle,Address,City,State,Zip,Phone,Email\r\nDATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,,,,,,,,,\r\nDATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,,,,,,,,,\r\nDATA,DATA,DATA,DATA

instead of the desired output, which is this:

b' ,Jurisdiction,SHIPPING_ID,Combined,JobTitle,Shipping_Address,City,State,Zip,Phone,Email,Bill to addresses,,JobTitle,Address,City,State,Zip,Phone,Email DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,,,,,,,,, DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,DATA,,,,,,,,, DATA,DATA,DATA,DATA

I understand \r\n is supposed to translate to a newline, but it's not. Any help or something to steer me in the right direction is greatly appreciated. I'm cool with any solution, even if it involves some workaround method.


Solution

  • This is the example from the documentation. I question whether or not the issue isn't with how you are writing the bites to the file.

    file_id = '1ZdR3L3qP4Bkq8noWLJHSr_iBau0DNT4Kli4SxNc2YEo'
    request = drive_service.files().export_media(fileId=file_id,
                                                 mimeType='application/pdf')
    fh = io.BytesIO()
    downloader = MediaIoBaseDownload(fh, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print "Download %d%%." % int(status.progress() * 100)
    

    Why not go a bit more simple.

    with open('C:\\Users\AeroG\OneDrive\Desktop\SHEETSCSV\\' + FILE_NAME, "wb") as f:
        f.write(fh.getbuffer())