HtmlWeb web = new HtmlWeb();
ServicePointManager.SecurityProtocol = SecurityProtocolType.Tls12;
HtmlDocument doc = web.Load("https://www.fcagroup.com/en-US/group/regions/Pages/northamerica.aspx");
var foundAppropriateMetaTag = false;
var divs = doc.DocumentNode.SelectNodes("//div[contains(@class,'span12')]");
var linksOnPage = from lnks in divs.Descendants()
where lnks.Name == "a" &&
lnks.Attributes["href"] != null &&
lnks.InnerText.Trim().Length > 0
select new
{
Url = lnks.Attributes["href"].Value,
Text = lnks.InnerText,
};
I have tried above but it is extracting all the links from all the divs with class span12 but I only want links inside div that contains Automotive Brands PTag. Help me to achieve that.
Below
var divs = doc.DocumentNode.SelectNodes("//div[contains(@class,'span12')]");
you can add the following code.
var autoNodes = new List<HtmlNode>();
foreach (var div in divs)
{
if (div.ChildNodes.Any(c => c.InnerText.Contains("Automotive Brands")))
{
autoNodes.Add(div);
}
}
var links = new List<KeyValuePair<string, string>>();
foreach (var node in autoNodes)
{
var nodeLinks = node.Descendants().Where(c => c.Name.Equals("a")
&& c.Attributes["href"].Value.Contains("brands")
&& !string.IsNullOrEmpty(c.InnerText.Trim()));
links.AddRange(nodeLinks.Select(nl =>
new KeyValuePair<string, string>(nl.Attributes["href"].Value, nl.InnerText)));
}
This is the readable version.
You can transform it to
var autoNodes = divs.Where(div => div.ChildNodes.Any(c => c.InnerText.Contains("Automotive Brands"))).ToList();
if you like.