There is a huge binary file uploaded to Google Drive. I am developing a tornado-based HTTP proxy server which provides a binary stream of the same huge file. It is natural to let the huge file to be proxied in multiple chunks (Download the contents using PyDrive, upload it with self.write(chunk)
or something).
The problem is that there seems to be single choice : googleapiclient.http.MediaIoBaseDownload
to download chunked binary files from Google Drive, but this library only supports FDs or io.Base
objects as it's first argument.
My code looks something like this:
import tornado.httpserver
import tornado.ioloop
import tornado.web
from googleapiclient.http import MediaIoBaseDownload
import io
class VideoContentHandler(tornado.web.RequestHandler):
def get(self,googledrive_id):
googledrive_id = googledrive_id[1:]
query = "'"+self.application.google_drive_folder_id+"' in parents and trashed=false"
file_list = self.application.drive.ListFile({'q': query}).GetList()
# io.FileIO will save the chunks to local file!
# This is not what I want.
# Using something different may solve the problem?
with io.FileIO('/tmp/bigvid-from-pydrive.mp4', mode='wb') as local_file:
for f in file_list:
if f['id'] != googledrive_id: continue
id = f.metadata.get('id')
request = self.application.drive.auth.service.files().get_media(fileId=id)
downloader = MediaIoBaseDownload(local_file, request, chunksize=2048*1024)
done = False
while done is False:
status, done = downloader.next_chunk()
# Flush buffer and self.write(chunk)?
def main():
gauth = GoogleAuth()
gauth.CommandLineAuth() # Already done
self.drive = GoogleDrive(gauth)
self.google_drive_folder_id = '<GOOGLE_DRIVE_FOLDER_ID>'
app = tornado.web.Application([
(r"^/videocontents(/.+)?$", handlers.api.VideoContentHandler),
])
http_server = tornado.httpserver.HTTPServer(app)
http_server.listen(8888)
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
main()
When should I call self.write(chunk)
?
You can use io.BytesIO
instead of io.FileIO
because it will be faster.
I haven't tested it, but this is how your code would look (read the comments for explanation):
from tornado import gen
# need make your get method a coroutine
@gen.coroutine
def get(self, googledrive_id):
...
# with io.FileIO(...) <<<< YOU DON'T NEED THIS LINE NOW
for f in file_list:
...
buffer = io.BytesIO() # create a BytesIO object
downloader = MediaIoBaseDownload(buffer, request, chunksize=2048*1024)
# Now is the time to set appropriate headers
self.set_header('Content-Type', 'video/mp4')
# if you know the size of the video
# write the Content-length header
self.set_header('Content-Length', <size of file in bytes>)
# if not, it's still ok
done = False
while done is False:
status, done = downloader.next_chunk()
# at this point, downloader has written
# the chunk to buffer
# we'll read that data and write it to the response
self.write(buffer.getvalue())
# now fulsh the data to socket
yield self.flush()
# we'll also need to empty the buffer
# otherwise, it will eat up all the RAM
buffer.truncate(0)
# seek to the beginning or else it will mess up
buffer.seek(0)