Is there any way to determine if uploaded file is zip file or docx/xlsx file based on only base64 string i specifying this since i do not have file extension.
I so far found solution which doing it using also file extension like this one
private static readonly byte[] ZIP_DOCX = { 80, 75, 3, 4 };
public static string GetMimeType(byte[] file, string fileName)
{
string extension = Path.GetExtension(fileName) == null
? string.Empty
: Path.GetExtension(fileName).ToUpper();
if (file.Take(4).SequenceEqual(ZIP_DOCX))
{
mime = extension == ".DOCX" ? "application/vnd.openxmlformats-officedocument.wordprocessingml.document" : "application/x-zip-compressed";
}
}
But as i said this solution is not working for me since i do not have extension of the file. Any ideas ?
Sounded interesting so I played around with it. This isn't based on a spec so not super reliable. I just extracted one excel file and one word file and guessed at what the identifying characteristics might be. Seems to work.
public enum FileKind
{
NotZip,
OtherZip,
Xlsx,
Docx
}
public static class FileKindDecoder
{
public static FileKind DetermineFileKind(string base64)
{
XElement? contentTypesXml;
var bytes = Convert.FromBase64String(base64);
using (var stream = new MemoryStream(bytes))
using (var zip = OpenZip(stream))
{
if (zip == null)
return FileKind.NotZip;
var contentTypesEntry = zip.GetEntry(@"[Content_Types].xml");
if (contentTypesEntry == null)
return FileKind.OtherZip;
contentTypesXml = ReadXmlFromZip(contentTypesEntry);
}
if (contentTypesXml == null)
return FileKind.OtherZip;
XNamespace ns = @"http://schemas.openxmlformats.org/package/2006/content-types";
if (contentTypesXml.Name != ns + "Types")
return FileKind.OtherZip;
foreach (var overrideElement in contentTypesXml.Elements(ns + "Override"))
{
var contentType = overrideElement.Attribute("ContentType")?.Value;
if (contentType == null)
continue;
if (contentType.Contains("spreadsheetml"))
return FileKind.Xlsx;
if (contentType.Contains("wordprocessingml"))
return FileKind.Docx;
}
return FileKind.OtherZip;
static ZipArchive? OpenZip(Stream stream)
{
try
{
return new ZipArchive(stream);
}
catch (InvalidDataException)
{
return null;
}
}
static XElement? ReadXmlFromZip(ZipArchiveEntry entry)
{
using var stream = entry.Open();
try
{
return XElement.Load(stream);
}
catch (XmlException)
{
return null;
}
}
}
}
Simplified approach which doesn't unzip any file or parse any XML.
public enum FileKind
{
NotZip,
OtherZip,
Xlsx,
Docx
}
public static class FileKindDecoder
{
public static FileKind DetermineFileKind(string base64)
{
HashSet<string> containedFiles;
try
{
var bytes = Convert.FromBase64String(base64);
using var stream = new MemoryStream(bytes);
using var zip = new ZipArchive(stream);
containedFiles = zip.Entries.Select(e => e.FullName).ToHashSet();
}
catch (InvalidDataException)
{
return FileKind.NotZip;
}
if (containedFiles.Contains("[Content_Types].xml"))
{
if (containedFiles.Contains("word/document.xml"))
return FileKind.Docx;
if (containedFiles.Contains("xl/workbook.xml"))
return FileKind.Xlsx;
}
return FileKind.OtherZip;
}
}