Determine if file is zip or docx/xlsx based on only base64 string

Is there any way to determine if uploaded file is zip file or docx/xlsx file based on only base64 string i specifying this since i do not have file extension.

I so far found solution which doing it using also file extension like this one

     private static readonly byte[] ZIP_DOCX = { 80, 75, 3, 4 };

     public static string GetMimeType(byte[] file, string fileName)
     {
        string extension = Path.GetExtension(fileName) == null
                           ? string.Empty
                           : Path.GetExtension(fileName).ToUpper();

         if (file.Take(4).SequenceEqual(ZIP_DOCX))
         {
            mime = extension == ".DOCX" ? "application/vnd.openxmlformats-officedocument.wordprocessingml.document" : "application/x-zip-compressed";
         }
      }

But as i said this solution is not working for me since i do not have extension of the file. Any ideas ?

Solution

Sounded interesting so I played around with it. This isn't based on a spec so not super reliable. I just extracted one excel file and one word file and guessed at what the identifying characteristics might be. Seems to work.

public enum FileKind
{
    NotZip,
    OtherZip,
    Xlsx,
    Docx
}

public static class FileKindDecoder
{
    public static FileKind DetermineFileKind(string base64)
    {
        XElement? contentTypesXml;

        var bytes = Convert.FromBase64String(base64);
        using (var stream = new MemoryStream(bytes))
        using (var zip = OpenZip(stream))
        {
            if (zip == null)
                return FileKind.NotZip;

            var contentTypesEntry = zip.GetEntry(@"[Content_Types].xml");
            if (contentTypesEntry == null)
                return FileKind.OtherZip;
            contentTypesXml = ReadXmlFromZip(contentTypesEntry);
        }

        if (contentTypesXml == null)
            return FileKind.OtherZip;

        XNamespace ns = @"http://schemas.openxmlformats.org/package/2006/content-types";
        if (contentTypesXml.Name != ns + "Types")
            return FileKind.OtherZip;

        foreach (var overrideElement in contentTypesXml.Elements(ns + "Override"))
        {
            var contentType = overrideElement.Attribute("ContentType")?.Value;
            if (contentType == null)
                continue;
            if (contentType.Contains("spreadsheetml"))
                return FileKind.Xlsx;
            if (contentType.Contains("wordprocessingml"))
                return FileKind.Docx;
        }

        return FileKind.OtherZip;



        static ZipArchive? OpenZip(Stream stream)
        {
            try
            {
                return new ZipArchive(stream);
            }
            catch (InvalidDataException)
            {
                return null;
            }
        }

        static XElement? ReadXmlFromZip(ZipArchiveEntry entry)
        {
            using var stream = entry.Open();
            try
            {
                return XElement.Load(stream);
            }
            catch (XmlException)
            {
                return null;
            }
        }
    }
}

Simplified approach which doesn't unzip any file or parse any XML.

public enum FileKind
{
    NotZip,
    OtherZip,
    Xlsx,
    Docx
}

public static class FileKindDecoder
{
    public static FileKind DetermineFileKind(string base64)
    {
        HashSet<string> containedFiles;

        try
        {
            var bytes = Convert.FromBase64String(base64);
            using var stream = new MemoryStream(bytes);
            using var zip = new ZipArchive(stream);
            containedFiles = zip.Entries.Select(e => e.FullName).ToHashSet();
        }
        catch (InvalidDataException)
        {
            return FileKind.NotZip;
        }

        if (containedFiles.Contains("[Content_Types].xml"))
        {
            if (containedFiles.Contains("word/document.xml"))
                return FileKind.Docx;
            if (containedFiles.Contains("xl/workbook.xml"))
                return FileKind.Xlsx;
        }

        return FileKind.OtherZip;
    }
}