Distinguish Compound File Binary Format

StewartBW 1,905 Reputation points
2024-08-22T18:35:45.23+00:00

Hello experts

Without knowing the file extension, just with file contents, I read the initial bytes of file and if 8 bytes are:

D0 CF 11 E0 A1 B1 1A E1

then we have Compound File Binary Format, can be .msg .oft .doc .xls .ppt .msi .msp etc...

The question, how to Distinguish between .msg / .oft and all other formats?

Are additional constant magic numbers based on the format?

Thanks.

  • VB.net with .net framework 4.0 full
Developer technologies | VB
Developer technologies | C#
Developer technologies | C#
An object-oriented and type-safe programming language that has its roots in the C family of languages and includes support for component-oriented programming.
{count} votes

Answer accepted by question author
  1. Castorix31 91,511 Reputation points
    2024-08-22T21:01:37.7333333+00:00

    Use StgOpenStorage

    Test with 3 CLSID, add others from Google... :

    HRESULT hr;           
    IStorage pStorage = null;
    string sFile = "E:\\Sources\\CSharp_StgOpenStorage\\test.doc";
    //0x80030020  STG_E_SHAREVIOLATION
    hr = StgOpenStorage(sFile, null, STGM_READ | STGM_SHARE_EXCLUSIVE, IntPtr.Zero, 0, out pStorage);
    if (hr == HRESULT.S_OK)
    {
        System.Runtime.InteropServices.ComTypes.STATSTG statstg;
        hr = pStorage.Stat(out statstg, STATFLAG.STATFLAG_NONAME);
        if (hr == HRESULT.S_OK)
        {
            if (statstg.clsid == CLSID_MailMessage)
            {
                Console.WriteLine("Mail Message");
            }
            else if (statstg.clsid == CLSID_TemplateMessage)
            {
                Console.WriteLine("Template Message");
            }
            else if (statstg.clsid == CLSID_WordDocument)
            {
                Console.WriteLine("Word Document");
            }
        }
        Marshal.ReleaseComObject(pStorage);
    }
    
    

    with :

            public enum HRESULT : int
            {
                S_OK = 0,
                S_FALSE = 1,
                E_NOINTERFACE = unchecked((int)0x80004002),
                E_NOTIMPL = unchecked((int)0x80004001),
                E_FAIL = unchecked((int)0x80004005)
            }
    
            public const int STGM_DIRECT = 0x00000000;
            public const int STGM_TRANSACTED = 0x00010000;
            public const int STGM_SIMPLE = 0x08000000;
    
            public const int STGM_READ = 0x00000000;
            public const int STGM_WRITE = 0x00000001;
            public const int STGM_READWRITE = 0x00000002;
    
            public const int STGM_SHARE_DENY_NONE = 0x00000040;
            public const int STGM_SHARE_DENY_READ = 0x00000030;
            public const int STGM_SHARE_DENY_WRITE = 0x00000020;
            public const int STGM_SHARE_EXCLUSIVE = 0x00000010;
    
            [DllImport("Ole32.dll", SetLastError = true, CharSet = CharSet.Unicode)]
            public static extern HRESULT StgOpenStorage(string pwcsName, IStorage pstgPriority, int grfMode, IntPtr snbExclude, int reserved, out IStorage ppstgOpen);
    
            [ComImport, ComConversionLoss, InterfaceType(ComInterfaceType.InterfaceIsIUnknown), Guid("0000000b-0000-0000-c000-000000000046")]
            public interface IStorage
            {
                HRESULT CreateStream(string pwcsName, int grfMode, int reserved1, int reserved2, out System.Runtime.InteropServices.ComTypes.IStream ppstm);
                HRESULT OpenStream(string pwcsName, IntPtr reserved1, int grfMode, int reserved2, out System.Runtime.InteropServices.ComTypes.IStream ppstm);
                HRESULT CreateStorage(string pwcsName, int grfMode, int reserved1, int reserved2, out IStorage ppstg);
                HRESULT OpenStorage(string pwcsName, IStorage pstgPriority, int grfMode, IntPtr snbExclude, int reserved, out IStorage ppstg);
                HRESULT CopyTo(int ciidExclude, [In, MarshalAs(UnmanagedType.LPArray)] Guid[] rgiidExclude, IntPtr snbExclude, [In, MarshalAs(UnmanagedType.Interface)] IStorage stgDest);
                HRESULT MoveElementTo(string pwcsName, IStorage pstgDest, string pwcsNewName, int grfFlags);
                HRESULT Commit(int grfCommitFlags);
                HRESULT Revert();
                HRESULT EnumElements(int reserved1, IntPtr reserved2, int reserved3, out IEnumSTATSTG ppenum);
                HRESULT DestroyElement(string pwcsName);
                HRESULT RenameElement(string pwcsOldName, string pwcsNewName);
                HRESULT SetElementTimes(string pwcsName, ref System.Runtime.InteropServices.ComTypes.FILETIME pctime, ref System.Runtime.InteropServices.ComTypes.FILETIME patime, ref System.Runtime.InteropServices.ComTypes.FILETIME pmtime);
                HRESULT SetClass(ref Guid clsid);
                HRESULT SetStateBits(int grfStateBits, int grfMask);
                HRESULT Stat(out System.Runtime.InteropServices.ComTypes.STATSTG pstatstg, STATFLAG grfStatFlag);
            }
    
            public enum STATFLAG
            {
                STATFLAG_DEFAULT = 0,
                STATFLAG_NONAME = 1,
                STATFLAG_NOOPEN = 2
            }
      
            public enum STGTY : int
            {
                STGTY_STORAGE = 1,
                STGTY_STREAM = 2,
                STGTY_LOCKBYTES = 3,
                STGTY_PROPERTY = 4
            }
    
            [ComImport]
            [Guid("0000000d-0000-0000-C000-000000000046")]
            [InterfaceType(ComInterfaceType.InterfaceIsIUnknown)]
            public interface IEnumSTATSTG
            {
                [PreserveSig]
                HRESULT Next(uint celt, [MarshalAs(UnmanagedType.LPArray), Out] System.Runtime.InteropServices.ComTypes.STATSTG[] rgelt, out uint pceltFetched);
                HRESULT Skip(uint celt);
                HRESULT Reset();
                HRESULT Clone(out IEnumSTATSTG ppenum);
            }
    
            Guid CLSID_MailMessage = new Guid("00020d0b-0000-0000-c000-000000000046");
            Guid CLSID_TemplateMessage = new Guid("0006f046-0000-0000-c000-000000000046");
            // Word.Document.8
            Guid CLSID_WordDocument = new Guid("00020906-0000-0000-c000-000000000046");
    
    
    1 person found this answer helpful.
    0 comments No comments

0 additional answers

Sort by: Most helpful

Your answer

Answers can be marked as 'Accepted' by the question author and 'Recommended' by moderators, which helps users know the answer solved the author's problem.