Validate file content type for txt, log, JSON file in C#

Surajit Kumar Shah 0 Reputation points
2025-01-08T17:16:17.0733333+00:00

public static bool IsFileValid(IFormFile file)

{

using (var reader = new BinaryReader(file.OpenReadStream()))

{

    var signatures = _fileSignatures.Values.SelectMany(x => x).ToList();  // flatten all signatures to single list

    var headerBytes = reader.ReadBytes(_fileSignatures.Max(m => m.Value.Max(n => n.Length)));

    bool result = signatures.Any(signature => headerBytes.Take(signature.Length).SequenceEqual(signature));

    return result;

}

}

private static readonly Dictionary<string, List<byte[]>> _fileSignatures = new()

{

{ ".gif", new List<byte[]> { new byte[] { 0x47, 0x49, 0x46, 0x38 } } },

{ ".png", new List<byte[]> { new byte[] { 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A } } },

{ ".jpeg", new List<byte[]>

    {

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE0 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE2 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE3 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xEE },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xDB },

    }

},

{ ".jpeg2000", new List<byte[]> { new byte[] { 0x00, 0x00, 0x00, 0x0C, 0x6A, 0x50, 0x20, 0x20, 0x0D, 0x0A, 0x87, 0x0A } } },



{ ".jpg", new List<byte[]>

    {

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE0 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE1 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xE8 },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xEE },

        new byte[] { 0xFF, 0xD8, 0xFF, 0xDB },

    }

},

{ ".zip", new List<byte[]> //also docx, xlsx, pptx, ...

    {

        new byte[] { 0x50, 0x4B, 0x03, 0x04 },

        new byte[] { 0x50, 0x4B, 0x4C, 0x49, 0x54, 0x45 },

        new byte[] { 0x50, 0x4B, 0x53, 0x70, 0x58 },

        new byte[] { 0x50, 0x4B, 0x05, 0x06 },

        new byte[] { 0x50, 0x4B, 0x07, 0x08 },

        new byte[] { 0x57, 0x69, 0x6E, 0x5A, 0x69, 0x70 },

    }

},

{ ".pdf", new List<byte[]> { new byte[] { 0x25, 0x50, 0x44, 0x46 } } },

{ ".z", new List<byte[]>

    {

        new byte[] { 0x1F, 0x9D },

        new byte[] { 0x1F, 0xA0 }

    }

},

{ ".tar", new List<byte[]>

    {

        new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30 , 0x30 },

        new byte[] { 0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20 , 0x00 },

    }

},

{ ".tar.z", new List<byte[]>

    {

        new byte[] { 0x1F, 0x9D },

        new byte[] { 0x1F, 0xA0 }

    }

},

{ ".tif", new List<byte[]>

    {

        new byte[] { 0x49, 0x49, 0x2A, 0x00 },

        new byte[] { 0x4D, 0x4D, 0x00, 0x2A }

    }

},

{ ".tiff", new List<byte[]>

    {

        new byte[] { 0x49, 0x49, 0x2A, 0x00 },

        new byte[] { 0x4D, 0x4D, 0x00, 0x2A }

    }

},

{ ".rar", new List<byte[]>

    {

        new byte[] { 0x52, 0x61, 0x72, 0x21, 0x1A, 0x07 , 0x00 },

        new byte[] { 0x52, 0x61, 0x72, 0x21, 0x1A, 0x07 , 0x01, 0x00 },

    }

},

{ ".7z", new List<byte[]>

    {

        new byte[] { 0x37, 0x7A, 0xBC, 0xAF, 0x27 , 0x1C },

    }

},

{ ".txt", new List<byte[]>

    {

        new byte[] { 0xEF, 0xBB , 0xBF },

        new byte[] { 0xFF, 0xFE},

        new byte[] { 0xFE, 0xFF },

        new byte[] { 0x00, 0x00, 0xFE, 0xFF },

    }

},

{ ".mp3", new List<byte[]>

    {

        new byte[] { 0xFF, 0xFB },

        new byte[] { 0xFF, 0xF3},

        new byte[] { 0xFF, 0xF2},

        new byte[] { 0x49, 0x44, 0x43},

    }

},

};

Hello I found the above code to validate the file content in c#. This is helpful on validating the content of some known file types. However the logic to compare the signature works fine for some known file type like jpeg, gif, mp3, doc, docx etc. The logic doesn't work for file types like txt, log, JSON. Is there any solution to validate the content type of txt, log, JSON files ? I tried to match the signatures of txt, log, JSON files but it's always different for different files.

Developer technologies ASP.NET ASP.NET Core
Microsoft 365 and Office Development Office JavaScript API
Developer technologies .NET Other
Developer technologies ASP.NET Other
Developer technologies C#
0 comments No comments
{count} votes

2 answers

Sort by: Most helpful
  1. Bruce (SqlWork.com) 77,686 Reputation points Volunteer Moderator
    2025-01-08T17:51:35.44+00:00

    txt and log files have no defined format and can contain any characters, so hard to validate.

    to validate a json file you need to read the entire file to if its valid. if you just want validation the start characters, json file has couple formats:

    • an object, then the file starts with a <whitespace>{ and ends with a }<whitespace>
    • an array, then the file starts with a <whitespace>[ and ends with ]<whitespace>
    • bool value, then <whitespace>true<whitespace> or <whitespace>false<whitespace>
    • null value, then <whitespace>null<whitespace>
    • string value, then <whitespace>" and ends with "<whitespace>
    • numeric value, then <whitespace><numeric string<whitespace>. a numeric string can be an int, hex, decimal, or exponent format.

    see:

    https://www.json.org/json-en.html

    0 comments No comments

  2. SurferOnWww 4,631 Reputation points
    2025-01-09T00:54:31.8266667+00:00

    Is there any solution to validate the content type of txt, log, JSON files ?

    No, there is no practical solution since they are all text file. If they have BOM you will be able to guess that they are text file. However, there will be no way to differentiate among txt, log and JSON files.


Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.