Search code examples
pythongoogle-apigoogle-drive-apigoogle-api-python-clienthttp-error

Google drive API returns 400 Bad request on using PageToken


I am trying to access the next page files in google drive API v3. But this particular snippet fails.

I checked out https://stackoverflow.com/a/38479234/11705040 but my query query_string="'{0}' in parents".format(item["id"]) seems fine by this post.

temp = (
       self.service.files().list(
           q=query_string,
           pageToken=nextPageToken,
           pageSize=5,
           fields="nextPageToken, files(id, name, mimeType, size, parents, modifiedTime)",
       ).execute()
   )

Error is this:

File "D:\Mac\Sites\project-reconnaissance\cronjob\env\lib\site-packages\googleapiclient\_helpers.py", line 134, in positional_wrapper
    return wrapped(*args, **kwargs)
  File "D:\Mac\Sites\project-reconnaissance\cronjob\env\lib\site-packages\googleapiclient\http.py", line 907, in execute
    raise HttpError(resp, content, uri=self.uri)
googleapiclient.errors.HttpError: <HttpError 400 when requesting https://www.googleapis.com/drive/v3/files?q=%271a49i6ivoZ_ErAin7KvvG9VBZOyeeImqc%27+in+parents&pageToken=~%21%21~AI9FV7TwnBwo_MYMR8Rap94OgbFCPDWcy3n6C7prOb_V7MjhT6tjVHHP58QBSyROFNI4HtgmMW6o1AznGcmN1yF_wKr5SSlSgiDocew78RYuyZXDdGO6WOnhrkNeuYW3RyD1nSSWKXNro-skWfZgMuKpQ9P1QfnAxQbdiXAVQ8lO6J2b_xzwpMhAjLWV1kOMCsTqVP9wI61SYJGa1qRJwhUzwL6OEaqffuhXaH3Aa6aXUWg6aIwzAYqGuwTDu1S9BIAqSE3qXDKpRebmvC-fXz0iiDlqlPsuJA-MWuzSNIk0_XlgRpRavrGN9c5miN32i9JML6VQNKs9c_mdb3Ggwzm8KUlPO8eeYg8rdM9gwg6asch22HjLNeKc5kqIRjG9OjVA0RVovpvE&pageSize=5&fields=nextPageToken%2C+files%28id%2C+name%2C+mimeType%2C+size%2C+parents%2C+modifiedTime%29&alt=json returned "Invalid Value">

EDIT Adding a minimum reproducible example: This issue is faced when I'm trying to run the query in a sub folder

import os
import pickle
from google.auth.transport.requests import Request
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build

ID = "1a49i6ivoZ_ErAin7KvvG9VBZOyeeImqc"

SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]


def get_gdrive_service():
    """Gets the service object of google drive"""
    creds = None
    # The file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first
    # time.
    if os.path.exists("token.pickle"):
        with open("token.pickle", "rb") as token:
            creds = pickle.load(token)
    # If there are no (valid) credentials available, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                "credentials.json", SCOPES
            )
            creds = flow.run_local_server(port=0)
        # Save the credentials for the next run
        with open("token.pickle", "wb") as token:
            pickle.dump(creds, token)
    # return Google Drive API service
    return build("drive", "v3", credentials=creds)


print("connecting to google drive...")
service = get_gdrive_service()
print("connected to google drive!")


def list_files(query_string, nextPageToken=None):
    """List the google drive files as returned by Google Drive API."""

    # Get the first set of results
    # BUG here when coming from line 68
    items = (
        service.files().list(
            q=query_string,
            pageToken=nextPageToken,
            pageSize=5,
            fields="nextPageToken, files(id, name, mimeType, size, parents, modifiedTime)",
        )
        .execute()
    )

    if items['files'] != []:
        print("get first set of results", items)

    nextPageToken = items.get("nextPageToken")
    items = items.get("files", [])

    # Do this for all internal folders as well
    for item in items:
        print("Doing item:", item["name"])
        # This works fine
        list_files(
            query_string="'{0}' in parents".format(item["id"])
        )

    # If there is a nextPage Token, go further to get next set of items
    if nextPageToken:
        print("Fetching next set of results")
        q = "'{0}' in parents".format(ID)
        # SOME BUG in line 50 when going from here, fails with HTTP 400
        list_files(q, nextPageToken=nextPageToken)


list_files("'{0}' in parents".format(ID))

Solution

  • I was getting the same thing. The issue was the query can't change at all. So add the next page token but otherwise you need to send everything else exactly the way it was when it generated the token.