The following function serves well for copying a single file to the google cloud storage.
#!/usr/bin/python3.5
import googleapiclient.discovery
from google.cloud import storage
def upload_blob(bucket_name, source_file_name, destination_blob_name, project):
storage_client = storage.Client(project=project)
bucket = storage_client.get_bucket(bucket_name)
blob = bucket.blob(destination_blob_name)
blob.upload_from_filename(source_file_name)
print('File {} uploaded to {}.'.format(
source_file_name,
destination_blob_name))
Now instead of giving a filename, i tried inputting a directory name, upload_blob('mybucket','/data/inputdata/', 'myapp/inputdata/','myapp')
but then i get this error:
AttributeError: 'str' object has no attribute 'read'
Do i need to give any additional parameters when calling the function blob.upload_from_file()
to copy a directory?
Here's some code you can use to accomplish this:
import os
import glob
def copy_local_directory_to_gcs(local_path, bucket, gcs_path):
"""Recursively copy a directory of files to GCS.
local_path should be a directory and not have a trailing slash.
"""
assert os.path.isdir(local_path)
for local_file in glob.glob(local_path + '/**'):
if not os.path.isfile(local_file):
continue
remote_path = os.path.join(gcs_path, local_file[1 + len(local_path) :])
blob = bucket.blob(remote_path)
blob.upload_from_filename(local_file)
Use it like so:
copy_local_directory_to_gcs('path/to/foo', bucket, 'remote/path/to/foo')
Where bucket
is the usual object from the Google Cloud Storage API:
from google.cloud import storage
client = storage.Client(project='your-project')
bucket = client.get_bucket('bucket-name')