Hi @Maurizio Porro , Welcome to Microsoft Q&A,
The error occurs because row.SelectNodes("tr|td")
is returning null
, meaning there are no tr
or td
elements inside the row
node.
The row.SelectNodes("tr|td")
XPath is incorrect. Instead, you likely want row.SelectNodes("th|td")
or row.SelectNodes("td")
.
using HtmlAgilityPack;
using System;
using System.IO;
class Program
{
static void Main()
{
string strLineOut = "";
string pathOut = "";
string delimiter = "|";
string url = ";
Console.WriteLine("Scraping tables from: " + url);
HtmlWeb web = new HtmlWeb();
var htmlDoc = web.Load(url);
using (var sw = new StreamWriter(pathOut, true))
{
foreach (HtmlNode table in htmlDoc.DocumentNode.SelectNodes("//table"))
{
Console.WriteLine("\nFound: " + table.Name);
foreach (HtmlNode row in table.SelectNodes(".//tr"))
{
strLineOut = "";
var cells = row.SelectNodes(".//th|.//td");
if (cells != null)
{
foreach (HtmlNode cell in cells)
{
Console.Write(cell.InnerText.Trim() + delimiter);
strLineOut += cell.InnerText.Trim() + delimiter;
}
if (strLineOut.Length > 0)
{
strLineOut = strLineOut.Substring(0, strLineOut.Length - 1);
}
sw.WriteLine(strLineOut);
}
}
}
}
Console.WriteLine("Scraping completed.");
}
}
Best Regards,
Jiale
If the answer is the right solution, please click "Accept Answer" and kindly upvote it. If you have extra questions about this answer, please click "Comment".
Note: Please follow the steps in our documentation to enable e-mail notifications if you want to receive the related email notification for this thread.