C# How to read tabular data from selection and show in DataGridView Winform

T.Zacks 3,996 Reputation points
2023-10-18T14:49:23.6766667+00:00

I have a web browser control where a web site will load. web page has many tabular data which user will select and need to parse those data and show in datagridview.

This is way i am extracting selected text from web browser control.

private string GetSelectedText()
{
	dynamic document = webBrowser1.Document.DomDocument;
	dynamic selection = document.selection;
	dynamic text = selection.createRange().text;
	return (string)text;
}

Now it is getting very hard to extract data properly from selected text. So my question is is it possible to get html data from selected text?

these are my site from where i need to parse tabular data. user will select multiple set of tabular data by mouse.

https://www.sec.gov/Archives/edgar/data/1108134/000110813423000018/bhlb-20230630.htm https://www.sec.gov/Archives/edgar/data/66740/000006674023000058/mmm-20230630.htm

this is my current routine which i am using to parse selected data but not working well.

public string SelectedText { get; set; }
private void Form2_Load(object sender, EventArgs e)
{
    bool startparse = false;
    int colCounter = 1;
    DataTable dt = new DataTable();
    string selectedtext = SelectedText;
    string[] lines = null;
    List<string> colvalues = null;
    //list of char need to replace from selected line item name
    // storing new lineitem & carriage return
    string[] stringSeparators = new string[] { "\r\n" };
    char[] patternone = new char[] { '%', '€', ';', ',', '.', '$', '£', '(', ')' };

    #region Data parsing logic from browser & storing into datatable
    //splitting selected text
    lines = selectedtext.Split(stringSeparators, StringSplitOptions.None);

    List<string> columns = null;
    string strLeftColumnName = "";
    string tmp = "";
    string lineitem = "", strValues = "", strTmpdata, strNewValues = "";

    #region Extract data for each rows
    foreach (string s in lines)
    {
        columns = null;
        tmp = "";
        lineitem = "";
        strValues = "";
        strTmpdata = "";
        strNewValues = "";

        #region Extract data for building columns
        foreach (string line in lines)
        {
            tmp = line;
            //if (line.Contains("Dollars in millions"))
            //{
                var match = Regex.Match(line, "\\(\\D*\\)", RegexOptions.IgnoreCase);
                if (match.Success)
                {
                    strLeftColumnName = match.Groups[0].Value;
                }
                tmp = tmp.Trim().Replace(strLeftColumnName.Trim(), "");
                columns = tmp.Trim().Split(new char[] { ' ' }).ToList();
                columns.Insert(0, strLeftColumnName);
                break;
            //}
        }
        #endregion

        #region Build Datagrid columns
        if (columns != null && columns.Count > 0)
        {
            if (dgv.Columns.Count < columns.Count)
            {
                foreach (string col in columns)
                {
                    if (col.All(char.IsNumber))
                    {
                        dgv.Columns.Add("col_" + colCounter, "");
                        dgv.Columns["col_" + colCounter].SortMode = DataGridViewColumnSortMode.NotSortable;
                    }
                    else
                    {
                        dgv.Columns.Add("col_" + colCounter, "");
                        dgv.Columns["col_" + colCounter].SortMode = DataGridViewColumnSortMode.NotSortable;
                    }
                    colCounter++;
                }
            }
        }
        #endregion

        if (s != "" && (s.Contains("Dollars in millions") || startparse))
        {

            if (s.Contains("Net changes related to available-for-sale securities"))
            {

            }

            strTmpdata = s;

            //Here storing lineitem name
            lineitem = Regex.Replace(s.Trim(), @"[\d-1]", string.Empty);
            //lineitem = Regex.Replace(s.Trim(), @"[^A-Za-z0-9 -]", string.Empty);
            lineitem = ReplaceMultipleChar(lineitem, patternone, string.Empty);
            lineitem = lineitem.Trim();

            if (lineitem != "")
            {
                //here split numeric data only
                if (strTmpdata.Length > lineitem.Length)
                {
                    //strValues = strTmpdata.Substring(lineitem.Length, (strTmpdata.Length - lineitem.Length));
                    //lineitem = Regex.Escape(lineitem);
                    //strTmpdata =Regex.Escape(strTmpdata);
                    //strTmpdata = Regex.Replace(strTmpdata, lineitem, "");

                    strTmpdata = GetNumericData(strTmpdata);
                    //strValues = ReplaceWholeWord( strTmpdata, lineitem,"");
                    strValues = strTmpdata.Trim();
                    strValues = strValues.Replace("(", "-").Replace(")", " ").Replace(",", "").Trim();
                    //strNewValues = strValues;
                    //for (int i = 0; i < strValues.Length; i++)
                    //{
                    //    if (Char.IsDigit(strValues[i]) || strValues[i] == '-' || strValues[i] == ' ' || strValues[i] == '.')
                    //        strNewValues += strValues[i];
                    //}
                }

                //strValues = strNewValues.Trim();
                colvalues = strValues.Trim().Split(new char[] { ' ' }).ToList();
                if (colvalues.Count > 0)
                {
                    colvalues.Insert(0, lineitem);
                    dgv.Rows.Add(colvalues.ToArray());
                }
            }
            startparse = true;
        }
    }
    #endregion

    #endregion
}
private string GetNumericData(string input)
{
    string output = "";
    for (int i = 0; i < input.Length; i++)
    {
        if (input[i] == '3')
        {

        }
        if (input[i] == '.' || input[i] == ' ' || input[i] == '-' || input[i] == '(' || input[i] == ')' || Char.IsDigit(input[i]))
        {
            if (input[i] == '(' && Char.IsDigit(input[i + 1]) && (i + 1) < input.Length)
            {
                output += input[i];
            }
            else if (input[i] == ')' && Char.IsDigit(input[i - 1]) && i > 0)
            {
                output += input[i];
            }
            else if (input[i] == '.' || input[i] == '-' || Char.IsDigit(input[i]) || input[i] == ' ')
            {
                output += input[i];
            }
        }
    }
    return output;
}
public string ReplaceMultipleChar(string s, char[] separators, string newVal)
{
    string[] temp;

    temp = s.Split(separators, StringSplitOptions.RemoveEmptyEntries);
    return String.Join(newVal, temp);

}

public string ReplaceAll(string s, string separators, string newVal)
{
    return Regex.Replace(s, separators, newVal);
}

public string ReplaceWholeWord(string original, string wordToFind, string replacement, RegexOptions regexOptions = RegexOptions.None)
{
    string pattern = String.Format(@"\b{0}\b", wordToFind);
    string ret = Regex.Replace(original, pattern, replacement, regexOptions);
    return ret;
}

Please some one help me to multiple tabular data and show in datagridview. Thanks

C#
C#
An object-oriented and type-safe programming language that has its roots in the C family of languages and includes support for component-oriented programming.
11,122 questions
0 comments No comments
{count} votes

1 answer

Sort by: Most helpful
  1. gekka 10,236 Reputation points MVP
    2023-10-18T18:38:46.3+00:00

    It is possible to get a IHTMLTxtRange from the selection and find table containing that range.

    using System;
    using System.Collections.Generic;
    using System.Data;
    using System.Linq;
    using System.Text;
    using System.Windows.Forms;
    
    namespace WindowsFormsApp1
    {
        public partial class Form1 : Form
        {
            private WebBrowser webBrowser1;
    
            public Form1()
            {
                Button btn = new Button();
                btn.Text = "Test";
                btn.Click += button1_Click;
                this.Controls.Add(btn);
    
                var panel = new Panel();
                panel.Top = btn.Height + 2;
                panel.Height = this.ClientSize.Height - btn.Height + 2;
                panel.Width = this.ClientSize.Width;
                panel.Anchor = AnchorStyles.Left | AnchorStyles.Right | AnchorStyles.Top | AnchorStyles.Bottom;
    
                webBrowser1 = new WebBrowser();
                webBrowser1.Dock = DockStyle.Fill;
                webBrowser1.Url = new Uri("https://www.sec.gov/Archives/edgar/data/1108134/000110813423000018/bhlb-20230630.htm");
    
                panel.Controls.Add(webBrowser1);
                this.Controls.Add(panel);
    
            }
    
            private void button1_Click(object sender, EventArgs e)
            {
                TestSelection();
                TestAllTable();
            }
    
            private void TestSelection()
            {
                var domdoc = this.webBrowser1.Document.DomDocument as mshtml.IHTMLDocument2;
                var sel = domdoc.selection;
                var range = sel.createRange();
                var trange = range as mshtml.IHTMLTxtRange;
    
                var table = GetParentTable(trange.parentElement());
                if (table == null)
                {
                    var startPointRange = trange.duplicate();
                    startPointRange.setEndPoint("EndToStart", trange);
                    var startPointTable = GetParentTable(startPointRange.parentElement());
    
                    var endPointRange = trange.duplicate();
                    startPointRange.setEndPoint("StartToEnd", trange);
                    var endPointTable = GetParentTable(endPointRange.parentElement());
    
                    if (startPointTable != null)
                    {
                        table = startPointTable;
                    }
                    else if (endPointTable != null)
                    {
                        table = endPointTable;
                    }
                    else
                    {
                        MessageBox.Show("Selection is not in Table");
                        return;
                    }
                }
    
                var tableData = TableData.GetTableData(table);
    
                System.Diagnostics.Debug.WriteLine(tableData.ToString());
            }
    
            private mshtml.IHTMLTable GetParentTable(mshtml.IHTMLElement element)
            {
                var parent = element;
                while (parent != null)
                {
                    if (parent is mshtml.IHTMLTable table)
                    {
                        return table;
                    }
                    parent = parent.parentElement;
                }
                return null;
            }
    
            private void TestAllTable()
            {
                var domdoc = this.webBrowser1.Document.DomDocument as mshtml.HTMLDocument;
                foreach (var table in domdoc.getElementsByTagName("table").OfType<mshtml.IHTMLTable>())
                {
    
                    var tableData = TableData.GetTableData(table);
    
                    System.Diagnostics.Debug.WriteLine(tableData.ToString());
                    System.Diagnostics.Debug.WriteLine(new string('=', 20));
                }
            }
    
        }
    
        class TableData
        {
            public static TableData GetTableData(mshtml.IHTMLTable table)
            {
                TableData tableData = new TableData();
    
                foreach (var tableRow in table.rows.OfType<mshtml.IHTMLTableRow>())
                {
                    RowData rowdata = new RowData();
                    foreach (var tablecell in tableRow.cells.OfType<mshtml.HTMLTableCell>())
                    {
                        CellData cell = new CellData();
                        cell.Text = tablecell.innerText;
                        cell.RowSpan = tablecell.rowSpan;
                        cell.ColSpan = tablecell.colSpan;
                        rowdata.Add(cell);
                    }
    
                    tableData.Rows.Add(rowdata);
                }
    
                return tableData;
            }
    
            public List<RowData> Rows { get; } = new List<RowData>();
    
    
            public override string ToString()
            {
                System.Text.StringBuilder sb = new StringBuilder();
                foreach (var row in this.Rows)
                {
                    sb.AppendLine(row.ToString());
                }
                return sb.ToString();
            }
        }
    
        class RowData : List<CellData>
        {
            public override string ToString()
            {
                return string.Join("\t", this.Select(cell => cell.Text + new string('\t', cell.ColSpan)));
            }
        }
    
        class CellData
        {
            public string Text { get; set; }
            public int ColSpan { get; set; }
            public int RowSpan { get; set; }
    
            public override string ToString() => Text;
        }
    
    }
    

    edit: Modify to find table at start point or end point table even if outside the table range is selected.


Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.