I am jostling for few day to inject missing TR & TD tags in my html programmatically. from this link https://stackoverflow.com/a/18397864/14171304 i found that HtmlAgility pack does this job but i saw HtmlAgility pack adding missing closing tag but it can not add missing opening tag.
I have a html where high chance that any number of TR & TD could be missing into html data. i am looking for a routine which would inject Table, TR & TD tag if any missing.
my objective is to add/inject missing any opening & closing Table, TR & TD tag in html.
after sanitize html each row (TR) must have same number of TD tag. so want to could max number of TD and if any row(TR) has lesser number of TD tag then add empty tag in the beginning of that TR tag.
This way i tried to inject missing TR & TD tag but no luck. any can help me.
private void button1_Click(object sender, EventArgs e)
{
string htmldata = @"<table>
<tr>
<th>Company</th>
<th>Contact</th>
<th>Country</th>
</tr>
Alfreds Futterkiste</td>
<td>Maria Anders</td>
<td>Germany
</tr>
<tr>
<td>Centro comercial Moctezuma</td>
<td>Francisco Chang</td>
<td>Mexico
</tr>
</table>";
string abc = AddMissingHtmlTags(htmldata);
}
public static string AddMissingHtmlTags(string htmlData)
{
// Define regular expressions for table, tr, and td tags
Regex tableRegex = new Regex(@"<table[^>]*>.*?</table>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
Regex trRegex = new Regex(@"<tr[^>]*>.*?</tr>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
Regex tdRegex = new Regex(@"<td[^>]*>.*?</td>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
// Match all table elements in the HTML
MatchCollection tableMatches = tableRegex.Matches(htmlData);
foreach (Match tableMatch in tableMatches)
{
string tableHtml = tableMatch.Value;
// Match all tr elements within the table
MatchCollection trMatches = trRegex.Matches(tableHtml);
foreach (Match trMatch in trMatches)
{
string trHtml = trMatch.Value;
// If there are no td elements within the tr, add one
if (!tdRegex.IsMatch(trHtml))
{
string newTd = "<td></td>";
string modifiedTr = trHtml.Insert(trHtml.Length - 5, newTd); // Insert new td before the closing </tr> tag
htmlData = htmlData.Replace(trHtml, modifiedTr);
}
}
// If there are no tr elements within the table, add one
if (!trRegex.IsMatch(tableHtml))
{
string newTr = "<tr><td></td></tr>";
string modifiedTable = tableHtml.Insert(tableHtml.Length - 8, newTr); // Insert new tr before the closing </table> tag
htmlData = htmlData.Replace(tableHtml, modifiedTable);
}
}
return htmlData;
}
Thanks