Email Addresses from Stream_Autocomplete##### File in C#

Ansh 25 Reputation points
2024-10-04T21:28:04.4966667+00:00

I'm trying to parse the Stream_Autocomplete##### file in C# to extract email addresses, but I'm encountering some issues. I've come across a PowerShell script that works for extracting email addresses, but when I attempt to replicate it in C#, I only retrieve the top 12 email addresses. After that, the PropertyCountInt value unexpectedly jumps into the billions, which is not what I expected based on previous experience where it typically ranged between 20 to 30 before the 12th row.
Path to AutoComplete stream should be here:
C:\Users######\AppData\Local\Microsoft\Outlook\RoamCache

I can't just use Regex for email address to extract as it is not reliable here in this file.

Has anyone successfully implemented a C# solution to extract all email addresses from this file?

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
class AutoCompleteObj
{
    public byte[] AutoCompleteFile { get; set; }
    public byte[] Contents { get; set; }
    public List<byte> NewContents { get; set; } = new List<byte>();
    public List<Row> RowList { get; set; } = new List<Row>();
    public byte[] MetaDataBegin { get; set; }
    public byte[] MetaDataEnd { get; set; }
    public byte[] NumberOfRows { get; set; }
    public int NumberOfRowsInt { get; set; }
    public byte[] PropertyTag { get; set; }
    public Exception Exception { get; set; }
    public DateTime ObjTimestamp { get; set; }
    public List<string> GetEmails()
    {
        var emailList = new List<string>();
        try
        {
            foreach (var row in RowList)
            {
                if (!string.IsNullOrEmpty(row.Email))
                {
                    emailList.Add(row.Email);
                }
            }
        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.Message);
            this.Exception = ex;
        }
        return emailList;
    }
    public void ReadFile(FileInfo file)
    {
        try
        {
            this.AutoCompleteFile = File.ReadAllBytes(file.FullName);
            this.Contents = this.AutoCompleteFile;
        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.Message);
            this.Exception = ex;
        }
    }
    public void ParseFile()
    {
        try
        {
            RowList.Clear();
            MetaDataBegin = Contents.Take(12).ToArray();
            NumberOfRows = Contents.Skip(12).Take(4).ToArray();
            NumberOfRowsInt = BitConverter.ToInt32(NumberOfRows, 0);
            int byteCounter = 16;
            Console.WriteLine($"NUMBER OF ROWS: {NumberOfRowsInt}");
            for (int i = 1; i <= NumberOfRowsInt; i++)
            {
                Console.WriteLine($"PARSING ROW: {i}");
                var row = new Row
                {
                    StartIndex = byteCounter,
                    PropertyList = new List<Property>()
                };
                row.PropertyCount = Contents.Skip(byteCounter).Take(4).ToArray();
                byteCounter += 4;
                row.PropertyCountInt = BitConverter.ToInt32(row.PropertyCount, 0);
                Console.WriteLine(row.PropertyCountInt);
                if (row.PropertyCountInt < 0 || row.PropertyCountInt > 100) 
                {
                    Console.WriteLine($"Warning: Unexpected PropertyCountInt {row.PropertyCountInt} at Row {i}. Skipping.");
                    continue;
                }
                for (int p = 1; p <= row.PropertyCountInt; p++)
                {
                    var property = new Property
                    {
                        Tag = Contents.Skip(byteCounter).Take(4).ToArray()
                    };
                    byteCounter += 4;
                    property.TagBinary = ConvertByte2Hex(property.Tag);
                    property.Reserved = Contents.Skip(byteCounter).Take(4).ToArray();
                    byteCounter += 4;
                    property.Union = Contents.Skip(byteCounter).Take(8).ToArray();
                    byteCounter += 8;
                    if (property.TagBinary.StartsWith("1F") || property.TagBinary.StartsWith("02"))
                    {
                        property.Bytes = Contents.Skip(byteCounter).Take(4).ToArray();
                        byteCounter += 4;
                        property.BytesInt = BitConverter.ToInt32(property.Bytes, 0);
                        property.Value = Contents.Skip(byteCounter).Take(property.BytesInt).ToArray();
                        byteCounter += property.BytesInt;
                        if (property.TagBinary.StartsWith("1F"))
                        {
                            var stringBuilder = new StringBuilder();
                            foreach (var b in property.Value)
                            {
                                if (b.ToString("X2") != "00")
                                {
                                    stringBuilder.Append((char)b);
                                }
                            }
                            property.ValueStr = stringBuilder.ToString();
                            if (property.TagBinary == "1F000160")
                            {
                                row.Email = property.ValueStr;
                            }
                        }
                    }
                    row.PropertyList.Add(property);
                }
                RowList.Add(row);
            }
            MetaDataEnd = Contents.Skip(byteCounter).Take(12).ToArray();
        }
        catch (Exception ex)
        {
            Console.WriteLine(ex.Message);
            this.Exception = ex;
        }
    }
    public static string ConvertByte2Hex(byte[] byteArray)
    {
        StringBuilder hexString = new StringBuilder();
        foreach (byte b in byteArray)
        {
            hexString.Append(b.ToString("X2"));
        }
        return hexString.ToString();
    }
}
class Row
{
    public int StartIndex { get; set; }
    public byte[] PropertyCount { get; set; }
    public int PropertyCountInt { get; set; }
    public string Email { get; set; }
    public List<Property> PropertyList { get; set; }
    public DateTime ObjTimestamp { get; set; }
}
class Property
{
    public int Number { get; set; }
    public byte[] Tag { get; set; }
    public string TagBinary { get; set; }
    public byte[] Reserved { get; set; }
    public byte[] Union { get; set; }
    public byte[] Bytes { get; set; }
    public int BytesInt { get; set; }
    public byte[] Value { get; set; }
    public string ValueStr { get; set; }
    public DateTime ObjTimestamp { get; set; }
}
class Program
{
    static void Main(string[] args)
    {
        var autoCompleteObj = new AutoCompleteObj();
        var fileInfo = new FileInfo(@"path-to-autocomplete");
        autoCompleteObj.ReadFile(fileInfo);
        autoCompleteObj.ParseFile();
        var emails = autoCompleteObj.GetEmails();
        var outputFilePath = @"C:\Users\###\Documents\ParsedEmails.txt";
        File.WriteAllLines(outputFilePath, emails);
        Console.WriteLine($"Emails saved to {outputFilePath}");
    }
}

Outlook
Outlook
A family of Microsoft email and calendar products.
4,004 questions
C#
C#
An object-oriented and type-safe programming language that has its roots in the C family of languages and includes support for component-oriented programming.
11,008 questions
PowerShell
PowerShell
A family of Microsoft task automation and configuration management frameworks consisting of a command-line shell and associated scripting language.
2,593 questions
{count} votes

Accepted answer
  1. Jiale Xue - MSFT 46,456 Reputation points Microsoft Vendor
    2024-10-07T09:07:00.3033333+00:00

    Hi @Ansh , Welcome to Microsoft Q&A,

    Updated:


    I implemented two API calls to the Microsoft Graph API:

    public async Task<HashSet<NameAndEmail>> Search(string searchQuery)
    {
        try
        {
            await ConfigureClient();
            var uniqueResults = new HashSet<NameAndEmail>(new NameAndEmailComparer());
    
            // First task: Search for people
            var task1 = Task.Run(async () =>
            {
                string url1 = "https://graph.microsoft.com/beta/search/query";
                string jsonData = $@"
                    {{
                        ""requests"": [
                            {{
                                ""entityTypes"": [""person""],
                                ""query"": {{
                                    ""queryString"": ""{searchQuery}""
                                }},
                                ""From"": 0,
                                ""Size"": 25,
                                ""Fields"": [""DisplayName"", ""EmailAddresses""]
                            }}
                        ]
                    }}";
    
                HttpContent content1 = new StringContent(jsonData, Encoding.UTF8, "application/json");
                var response1 = await _httpClient.PostAsync(url1, content1);
                var responseString1 = await response1.Content.ReadAsStringAsync();
                AddUniqueEmails(ExtractEmailsAndDisplayNames(responseString1), uniqueResults);
            });
    
            // Second task: Search users by given name
            var task2 = Task.Run(async () =>
            {
                string url2 = $"https://graph.microsoft.com/v1.0/users?$filter=startswith(givenName,'{searchQuery}')";
                var response2 = await _httpClient.GetAsync(url2);
                var responseString2 = await response2.Content.ReadAsStringAsync();
                AddUniqueEmails(ExtractEmail(responseString2), uniqueResults);
            });
    
            await Task.WhenAll(task1, task2);
            return uniqueResults;
        }
        catch (Exception ex)
        {
            throw new Exception(ex.Message);
        }
    }
    
    

    In this approach, I run two asynchronous tasks: one to search for people using the endpoint and another to filter users with . I then merge the results, ensuring there are no duplicates, using a ./beta/search/querystartswith(givenName, '{searchQuery}')HashSet

    Best Regards,

    Jiale


    If the answer is the right solution, please click "Accept Answer" and kindly upvote it. If you have extra questions about this answer, please click "Comment". 

    Note: Please follow the steps in our documentation to enable e-mail notifications if you want to receive the related email notification for this thread.

    0 comments No comments

0 additional answers

Sort by: Most helpful

Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.