Mime-Detective
Mime-Detective copied to clipboard
PDF files detected as plain/text
This example PDF file gets detected as text/plain when MaxHeaderSize
first bytes are used for the detection: http://www.orimi.com/pdf-test.pdf
I would run the file signature detection before checking for plain text files.
public static FileType GetFileType(Func<byte[]> fileHeaderReadFunc, string fileFullName = "")
{
// if none of the types match, return null
FileType fileType = null;
// read first n-bytes from the file
byte[] fileHeader = fileHeaderReadFunc();
// compare the file header to the stored file headers
foreach (FileType type in types)
{
int matchingCount = GetFileMatchingCount(fileHeader, type);
if (matchingCount == type.Header.Length)
{
// check for docx and xlsx only if a file name is given
// there may be situations where the file name is not given
// or it is unpracticable to write a temp file to get the FileInfo
if (type.Equals(ZIP) && !String.IsNullOrEmpty(fileFullName))
fileType = CheckForDocxAndXlsx(type, fileFullName);
else
fileType = type; // if all the bytes match, return the type
break;
}
}
if (fileType == null)
{
// nothing found yet; maybe just plain text?
// checking if it's binary (not really exact, but should do the job)
// shouldn't work with UTF-16 OR UTF-32 files
if (!fileHeader.Any(b => b == 0))
{
fileType = TXT;
}
// this would be the place to add detection based on file extension e.g. .csv
}
return fileType;
}
Tnx, my tests is green after this patch