python angularjs google-app-engine file-upload flow-js

Files uploaded using ng-flow to upload to gae blobstore always named 'blob'

I trying to create a page for uploading images to the Google App Engine blobstore. I'm using angularjs and ng-flow to realise this.

The uploading part seems to work fine except all blobs are stored as 'application/octet-stream' and named 'blob'. How can I get the blobstore to recognise the filename and content-type?

This is the code I use to upload the files.

Inside FlowEventsCtrl:

$scope.$on('flow::filesSubmitted', function (event, $flow, files) {
            $http.get('/files/upload/create').then(function (resp) {
                $flow.opts.target = resp.data.url;
                $flow.upload();
            });
        });

Inside view.html:

<div flow-init="{testChunks:false, singleFile:true}" 
     ng-controller="FlowEventsCtrl">
    <div class="panel">
        <span flow-btn>Upload File</span>
    </div>
    <div class="show-files">...</div>
</div>

The serverside is as specified in the blobstore documentation.

Thanks

Solution

I've solved my problem and in retrospect the answer seems obvious. Flow.js and the Blobstore Upload URL do different things. I'll leave my explanation below for people making the same naive mistake as I did.

The blobstore expects a field with the file. This field contains the filename and content-type of the data uploaded. This data is stored as the file in blobstore. By default this field is named 'file'.

Flow uploads the data in chunks and includes a number of fields for filename and other data. The actual chunk data is uploaded in a field specifying the filename as 'blob' and the content-type as 'application/octet-stream'. It is expected from the server to store the chunks and reassemble to the file. Because it is just part of the file and not the entire file it is neither named after the file nor the same content-type. By default this field is named 'file'.

So the answer to the question is: The files are stored as 'application/octet-stream' and named 'blob', because I was storing chunks instead of the actual file. That I was able to store something at all seems to be a result of both using the same default name for the field.

As a result the solution was to write my own handler for Flow requests:

class ImageUploadHandler(webapp2.RequestHandler):
    def post(self):
        chunk_number = int(self.request.params.get('flowChunkNumber'))
        chunk_size = int(self.request.params.get('flowChunkSize'))
        current_chunk_size = int(self.request.params.get('flowCurrentChunkSize'))
        total_size = int(self.request.params.get('flowTotalSize'))
        total_chunks = int(self.request.params.get('flowTotalChunks'))
        identifier = str(self.request.params.get('flowIdentifier'))
        filename = str(self.request.params.get('flowFilename'))
        data = self.request.params.get('file')

        f = ImageFile(filename, identifier, total_chunks, chunk_size, total_size)
        f.write_chunk(chunk_number, current_chunk_size, data)

        if f.ready_to_build():
            info = f.build()
            if info:
                self.response.headers['Content-Type'] = 'application/json'
                self.response.out.write(json.dumps(info.as_dict()))
            else:
                self.error(500)
        else:
            self.response.headers['Content-Type'] = 'application/json'
            self.response.out.write(json.dumps({
                'chunkNumber': chunk_number,
                'chunkSize': chunk_size,
                'message': 'Chunk ' + str(chunk_number) + ' written'
            }))

Where ImageFile is a class that writes to the google cloud storage.

Edit:

Below the ImageFile class. Only thing missing is the FileInfo class which is a simple model to store the generated url with the filename.

class ImageFile:
    def __init__(self, filename, identifier, total_chunks, chunk_size, total_size):
        self.bucket_name = os.environ.get('BUCKET_NAME', app_identity.get_default_gcs_bucket_name())
        self.original_filename = filename
        self.filename = '/' + self.bucket_name + '/' + self.original_filename
        self.identifier = identifier
        self.total_chunks = total_chunks
        self.chunk_size = chunk_size
        self.total_size = total_size
        self.stat = None
        self.chunks = []
        self.load_stat()
        self.load_chunks(identifier, total_chunks)

    def load_stat(self):
        try:
            self.stat = gcs.stat(self.filename)
        except gcs.NotFoundError:
            self.stat = None

    def load_chunks(self, identifier, number_of_chunks):
        for n in range(1, number_of_chunks + 1):
            self.chunks.append(Chunk(self.bucket_name, identifier, n))

    def exists(self):
        return not not self.stat

    def content_type(self):
        if self.filename.lower().endswith('.jpg'):
            return 'image/jpeg'
        elif self.filename.lower().endswith('.jpeg'):
            return 'image/jpeg'
        elif self.filename.lower().endswith('.png'):
            return 'image/png'
        elif self.filename.lower().endswith('.git'):
            return 'image/gif'
        else:
            return 'binary/octet-stream'

    def ready(self):
        return self.exists() and self.stat.st_size == self.total_size

    def ready_chunks(self):
        for c in self.chunks:
            if not c.exists():
                return False
        return True

    def delete_chunks(self):
        for c in self.chunks:
            c.delete()

    def ready_to_build(self):
        return not self.ready() and self.ready_chunks()

    def write_chunk(self, chunk_number, current_chunk_size, data):
        chunk = self.chunks[int(chunk_number) - 1]
        chunk.write(current_chunk_size, data)

    def build(self):
        try:
            log.info('File \'' + self.filename + '\': assembling chunks.')
            write_retry_params = gcs.RetryParams(backoff_factor=1.1)
            gcs_file = gcs.open(self.filename,
                                'w',
                                content_type=self.content_type(),
                                options={'x-goog-meta-identifier': self.identifier},
                                retry_params=write_retry_params)
            for c in self.chunks:
                log.info('Writing chunk ' + str(c.chunk_number) + ' of ' + str(self.total_chunks))
                c.write_on(gcs_file)
            gcs_file.close()
        except Exception, e:
            log.error('File \'' + self.filename + '\': Error during assembly - ' + e.message)
        else:
            self.delete_chunks()
            key = blobstore.create_gs_key('/gs' + self.filename)
            url = images.get_serving_url(key)
            info = ImageInfo(name=self.original_filename, url=url)
            info.put()
            return info

The Chunk class:

class Chunk:
    def __init__(self, bucket_name, identifier, chunk_number):
        self.chunk_number = chunk_number
        self.filename = '/' + bucket_name + '/' + identifier + '-chunk-' + str(chunk_number)
        self.stat = None
        self.load_stat()

    def load_stat(self):
        try:
            self.stat = gcs.stat(self.filename)
        except gcs.NotFoundError:
            self.stat = None

    def exists(self):
        return not not self.stat

    def write(self, size, data):
        write_retry_params = gcs.RetryParams(backoff_factor=1.1)
        gcs_file = gcs.open(self.filename, 'w', retry_params=write_retry_params)
        for c in data.file:
            gcs_file.write(c)
        gcs_file.close()
        self.load_stat()

    def write_on(self, stream):
        gcs_file = gcs.open(self.filename)

        try:
            data = gcs_file.read()
            while data:
                stream.write(data)
                data = gcs_file.read()
        except gcs.Error, e:
            log.error('Error writing data to chunk: ' + e.message)
        finally:
            gcs_file.close()

    def delete(self):
        try:
            gcs.delete(self.filename)
            self.stat = None
        except gcs.NotFoundError:
            pass