Search code examples
pythondjangodjango-formsdjango-file-upload

clean() method causes files to lose data using POST form


I have set up a form and view to upload multiple *.gpx files to my website at once. These files are validated using a clean() method on the form and then once validated passed to a function for processing.

When I upload some invalid files the clean() method catches them and informs the user as expected.

When I upload some valid files the processing function crashes with an error saying the files are empty.

If I comment out the clean() method then the valid files are uploaded fine.

What can be happening to the form during the clean() method than means the files are being blanked?

here is my form:

class UploadGpxForm(forms.Form):

    gpx_file = forms.FileField(widget=forms.ClearableFileInput(attrs={'multiple': True}))

here is my view:

class UploadGpxView(FormView):
    form_class = UploadGpxForm
    template_name = 'dashboard/upload.html' # Replace with your template.
    success_url = reverse_lazy('dashboard:index')  # Replace with your URL or reverse().

    def post(self, request, *args, **kwargs):
        form_class = self.get_form_class()
        form = self.get_form(form_class)
        files = request.FILES.getlist('gpx_file')
        if form.is_valid():
            for f in files:
                SaveGPXtoPostGIS(f)
            return self.form_valid(form)
        else:
            return self.form_invalid(form)

Here is my clean method for the UploadGpxForm:

    def clean(self):

        file_errors=[]

        files = list(self.files.getlist('gpx_file'))

        for f in list(files):
            #check file has only one full stop in it.
            if len(f.name.split('.')) != 2:
                file_errors.append(ValidationError(
                    _('%(file_name)s has not been uploaded:'\
                    'File type is not supported')
                    , params = { 'file_name': f.name }
                    , code = 'file_type')
                    )

            #check file doesn't breach the file size listed in settings
            if f.content_type in settings.DASHBOARD_UPLOAD_FILE_TYPES:
                if f._size > settings.DASHBOARD_UPLOAD_FILE_MAX_SIZE:
                    file_errors.append(ValidationError(
                        _('%(file_name)s has not been uploaded: File too big.'\
                        'Please keep filesize under %(setting_size)s.'\
                        'Current filesize %(file_size)s') ,
                        params = {
                            'file_name': f.name,
                            'setting_size': filesizeformat(
                                settings.DASHBOARD_UPLOAD_FILE_MAX_SIZE),
                            'file_size': filesizeformat(f._size)
                            },
                        code = 'file_size'
                            )
                            )
            #check it is one of our allowed file types
            else:
                file_errors.append(ValidationError(
                    _('%(file_name)s has not been uploaded:'\
                    'File type is not supported')
                     , params = { 'file_name' : f.name }
                     , code = 'file_type'
                     )
                     )
            #next check the file hasn't been uploaded before
            #generate MD5
            md5hash = md5()
            for chunk in f.chunks():
                md5hash.update(chunk)
            file_hash = md5hash.hexdigest()

            if gpxTrack.objects.filter(file_hash=file_hash).exists():
                file_errors.append(ValidationError(
                    _('%(file_name)s has not been uploaded as a identical file'\
                    'has already been uploaded previously'),
                    params = { 'file_name' : f.name },
                    code = 'file_hash'))

        #finally raise errors if there are any
        if len(file_errors) > 0:
            raise ValidationError(file_errors)
        else:
            return files

Solution

  • When you read the file content (for calculating md5 hash) you need to move the file object’s position to the beginning (0th byte) using file.seek:

    md5hash = md5()
    for chunk in f.chunks():
        md5hash.update(chunk)
    file_hash = md5hash.hexdigest()
    f.seek(0)  #<-- add this line