I trying to create a page for uploading images to the Google App Engine blobstore. I'm using angularjs and ng-flow to realise this.
The uploading part seems to work fine except all blobs are stored as 'application/octet-stream' and named 'blob'. How can I get the blobstore to recognise the filename and content-type?
This is the code I use to upload the files.
Inside FlowEventsCtrl:
$scope.$on('flow::filesSubmitted', function (event, $flow, files) {
$http.get('/files/upload/create').then(function (resp) {
$flow.opts.target = resp.data.url;
$flow.upload();
});
});
Inside view.html:
<div flow-init="{testChunks:false, singleFile:true}"
ng-controller="FlowEventsCtrl">
<div class="panel">
<span flow-btn>Upload File</span>
</div>
<div class="show-files">...</div>
</div>
The serverside is as specified in the blobstore documentation.
Thanks
I've solved my problem and in retrospect the answer seems obvious. Flow.js and the Blobstore Upload URL do different things. I'll leave my explanation below for people making the same naive mistake as I did.
The blobstore expects a field with the file. This field contains the filename and content-type of the data uploaded. This data is stored as the file in blobstore. By default this field is named 'file'.
Flow uploads the data in chunks and includes a number of fields for filename and other data. The actual chunk data is uploaded in a field specifying the filename as 'blob' and the content-type as 'application/octet-stream'. It is expected from the server to store the chunks and reassemble to the file. Because it is just part of the file and not the entire file it is neither named after the file nor the same content-type. By default this field is named 'file'.
So the answer to the question is: The files are stored as 'application/octet-stream' and named 'blob', because I was storing chunks instead of the actual file. That I was able to store something at all seems to be a result of both using the same default name for the field.
As a result the solution was to write my own handler for Flow requests:
class ImageUploadHandler(webapp2.RequestHandler):
def post(self):
chunk_number = int(self.request.params.get('flowChunkNumber'))
chunk_size = int(self.request.params.get('flowChunkSize'))
current_chunk_size = int(self.request.params.get('flowCurrentChunkSize'))
total_size = int(self.request.params.get('flowTotalSize'))
total_chunks = int(self.request.params.get('flowTotalChunks'))
identifier = str(self.request.params.get('flowIdentifier'))
filename = str(self.request.params.get('flowFilename'))
data = self.request.params.get('file')
f = ImageFile(filename, identifier, total_chunks, chunk_size, total_size)
f.write_chunk(chunk_number, current_chunk_size, data)
if f.ready_to_build():
info = f.build()
if info:
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(json.dumps(info.as_dict()))
else:
self.error(500)
else:
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(json.dumps({
'chunkNumber': chunk_number,
'chunkSize': chunk_size,
'message': 'Chunk ' + str(chunk_number) + ' written'
}))
Where ImageFile is a class that writes to the google cloud storage.
Edit:
Below the ImageFile class. Only thing missing is the FileInfo class which is a simple model to store the generated url with the filename.
class ImageFile:
def __init__(self, filename, identifier, total_chunks, chunk_size, total_size):
self.bucket_name = os.environ.get('BUCKET_NAME', app_identity.get_default_gcs_bucket_name())
self.original_filename = filename
self.filename = '/' + self.bucket_name + '/' + self.original_filename
self.identifier = identifier
self.total_chunks = total_chunks
self.chunk_size = chunk_size
self.total_size = total_size
self.stat = None
self.chunks = []
self.load_stat()
self.load_chunks(identifier, total_chunks)
def load_stat(self):
try:
self.stat = gcs.stat(self.filename)
except gcs.NotFoundError:
self.stat = None
def load_chunks(self, identifier, number_of_chunks):
for n in range(1, number_of_chunks + 1):
self.chunks.append(Chunk(self.bucket_name, identifier, n))
def exists(self):
return not not self.stat
def content_type(self):
if self.filename.lower().endswith('.jpg'):
return 'image/jpeg'
elif self.filename.lower().endswith('.jpeg'):
return 'image/jpeg'
elif self.filename.lower().endswith('.png'):
return 'image/png'
elif self.filename.lower().endswith('.git'):
return 'image/gif'
else:
return 'binary/octet-stream'
def ready(self):
return self.exists() and self.stat.st_size == self.total_size
def ready_chunks(self):
for c in self.chunks:
if not c.exists():
return False
return True
def delete_chunks(self):
for c in self.chunks:
c.delete()
def ready_to_build(self):
return not self.ready() and self.ready_chunks()
def write_chunk(self, chunk_number, current_chunk_size, data):
chunk = self.chunks[int(chunk_number) - 1]
chunk.write(current_chunk_size, data)
def build(self):
try:
log.info('File \'' + self.filename + '\': assembling chunks.')
write_retry_params = gcs.RetryParams(backoff_factor=1.1)
gcs_file = gcs.open(self.filename,
'w',
content_type=self.content_type(),
options={'x-goog-meta-identifier': self.identifier},
retry_params=write_retry_params)
for c in self.chunks:
log.info('Writing chunk ' + str(c.chunk_number) + ' of ' + str(self.total_chunks))
c.write_on(gcs_file)
gcs_file.close()
except Exception, e:
log.error('File \'' + self.filename + '\': Error during assembly - ' + e.message)
else:
self.delete_chunks()
key = blobstore.create_gs_key('/gs' + self.filename)
url = images.get_serving_url(key)
info = ImageInfo(name=self.original_filename, url=url)
info.put()
return info
The Chunk class:
class Chunk:
def __init__(self, bucket_name, identifier, chunk_number):
self.chunk_number = chunk_number
self.filename = '/' + bucket_name + '/' + identifier + '-chunk-' + str(chunk_number)
self.stat = None
self.load_stat()
def load_stat(self):
try:
self.stat = gcs.stat(self.filename)
except gcs.NotFoundError:
self.stat = None
def exists(self):
return not not self.stat
def write(self, size, data):
write_retry_params = gcs.RetryParams(backoff_factor=1.1)
gcs_file = gcs.open(self.filename, 'w', retry_params=write_retry_params)
for c in data.file:
gcs_file.write(c)
gcs_file.close()
self.load_stat()
def write_on(self, stream):
gcs_file = gcs.open(self.filename)
try:
data = gcs_file.read()
while data:
stream.write(data)
data = gcs_file.read()
except gcs.Error, e:
log.error('Error writing data to chunk: ' + e.message)
finally:
gcs_file.close()
def delete(self):
try:
gcs.delete(self.filename)
self.stat = None
except gcs.NotFoundError:
pass