Search code examples
djangodjango-modelsdjango-uploads

Django: Validate file type of uploaded file


I have an app that lets people upload files, represented as UploadedFiles. However, I want to make sure that users only upload xml files. I know I can do this using magic, but I don't know where to put this check - I can't put it in the clean function since the file is not yet uploaded when clean runs, as far as I can tell.

Here's the UploadedFile model:

class UploadedFile(models.Model):
    """This represents a file that has been uploaded to the server."""
    STATE_UPLOADED = 0
    STATE_ANNOTATED = 1
    STATE_PROCESSING = 2
    STATE_PROCESSED = 4
    STATES = (
        (STATE_UPLOADED, "Uploaded"),
        (STATE_ANNOTATED, "Annotated"),
        (STATE_PROCESSING, "Processing"),
        (STATE_PROCESSED, "Processed"),
    )

    status = models.SmallIntegerField(choices=STATES,
        default=0, blank=True, null=True) 
    file = models.FileField(upload_to=settings.XML_ROOT)
    project = models.ForeignKey(Project)

    def __unicode__(self):
        return self.file.name

    def name(self):
        return os.path.basename(self.file.name)

    def save(self, *args, **kwargs):
        if not self.status:
            self.status = self.STATE_UPLOADED
        super(UploadedFile, self).save(*args, **kwargs)

    def delete(self, *args, **kwargs):
        os.remove(self.file.path)
        self.file.delete(False)
        super(UploadedFile, self).delete(*args, **kwargs)

    def get_absolute_url(self):
        return u'/upload/projects/%d' % self.id

    def clean(self):
        if not "XML" in magic.from_file(self.file.url):
            raise ValidationError(u'Not an xml file.')

class UploadedFileForm(forms.ModelForm):
    class Meta:                
        model = UploadedFile
        exclude = ('project',)

Solution

  • For posterity: the solution is to use the read method and pass that to magic.from_buffer.

    class UploadedFileForm(ModelForm):
        def clean_file(self):
            file = self.cleaned_data.get("file", False)
            filetype = magic.from_buffer(file.read())
            if not "XML" in filetype:
                raise ValidationError("File is not XML.")
            return file
    
        class Meta:
            model = models.UploadedFile
            exclude = ('project',)