For loading HTML from a URL, I am using the method below
public HtmlDocument DownloadSource(string url)
{
try
{
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(DownloadString(url));
return doc;
}
catch (Exception e)
{
if (Task.Error == null)
Task.Error = e;
Task.Status = TaskStatuses.Error;
Done = true;
return null;
}
}
but suddenly today the code above stopped working. I discovered another method and it works correctly.
HtmlWeb web = new HtmlWeb();
HtmlAgilityPack.HtmlDocument doc = web.Load(url.ToString());
Now I just wanted to know the difference between both methods
Seems now User-Agent
header is mandatory for your site.
Everything is fine with HtmlAgilityPack
but you should change DownloadString(url)
method. If you check the request using Fiddler, you will see that it returns 403 Forbidden
:
Solution is to add any User-Agent
header to the request:
using HtmlAgilityPack;
using System;
using System.Net;
class Program
{
static void Main()
{
var doc = DownloadSource("https://videohive.net/item/inspired-slideshow/21544630");
Console.ReadKey();
}
public static HtmlDocument DownloadSource(string url)
{
try
{
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(DownloadString(url));
return doc;
}
catch (Exception e)
{
// exception handling here
}
return null;
}
static String DownloadString(String url)
{
WebClient client = new WebClient();
client.Headers.Add("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:x.x.x) Gecko/20041107 Firefox/x.x");
return client.DownloadString(url);
}
}