Search code examples
c#cookieshttpwebrequestwebclienthtml-agility-pack

How to pass cookies to HtmlAgilityPack or WebClient?


I use this code to login:

CookieCollection cookies = new CookieCollection();
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("example.com");
request.CookieContainer = new CookieContainer();
request.CookieContainer.Add(cookies);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
cookies = response.Cookies;

string getUrl = "example.com";
string postData = String.Format("my parameters");
HttpWebRequest getRequest = (HttpWebRequest)WebRequest.Create(getUrl);
getRequest.CookieContainer = new CookieContainer();
getRequest.CookieContainer.Add(cookies);
getRequest.Method = WebRequestMethods.Http.Post;
getRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.2; WOW64; rv:19.0) Gecko/20100101 Firefox/19.0";
getRequest.AllowWriteStreamBuffering = true;
getRequest.ProtocolVersion = HttpVersion.Version11;
getRequest.AllowAutoRedirect = true;
getRequest.ContentType = "application/x-www-form-urlencoded";

byte[] byteArray = Encoding.ASCII.GetBytes(postData);
getRequest.ContentLength = byteArray.Length;
Stream newStream = getRequest.GetRequestStream();
newStream.Write(byteArray, 0, byteArray.Length);
newStream.Close();

HttpWebResponse getResponse = (HttpWebResponse)getRequest.GetResponse();
using (StreamReader sr = new StreamReader(getResponse.GetResponseStream(), Encoding.GetEncoding("windows-1251")))
{
        doc.LoadHtml(sr.ReadToEnd());
        webBrowser1.DocumentText = doc.DocumentNode.OuterHtml;
}

then I want to use HtmlWeb (HtmlAgilityPack) or Webclient to parse the HTML to HtmlDocument(HtmlAgilityPack).

My problem is that when I use:

WebClient wc = new WebClient();
webBrowser1.DocumentText = wc.DownloadString(site);

or

doc = web.Load(site);
webBrowser1.DocumentText = doc.DocumentNode.OuterHtml;

The login disappear so i think I must somehow pass the cookies.. Any suggestions?


Solution

  • Check HtmlAgilityPack.HtmlDocument Cookies

    Here is an example of what you're looking for (syntax not 100% tested, I just modified some class I usually use):

    public class MyWebClient
    {
        //The cookies will be here.
        private CookieContainer _cookies = new CookieContainer();
    
        //In case you need to clear the cookies
        public void ClearCookies() {
            _cookies = new CookieContainer();
        }
    
        public HtmlDocument GetPage(string url) {
            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
            request.Method = "GET";
    
            //Set more parameters here...
            //...
    
            //This is the important part.
            request.CookieContainer = _cookies;
    
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            var stream = response.GetResponseStream();
    
            //When you get the response from the website, the cookies will be stored
            //automatically in "_cookies".
    
            using (var reader = new StreamReader(stream)) {
                string html = reader.ReadToEnd();
                var doc = new HtmlDocument();
                doc.LoadHtml(html);
                return doc;
            }
        }
    }
    

    Here is how you use it:

    var client = new MyWebClient();
    HtmlDocument doc = client.GetPage("http://somepage.com");
    
    //This request will be sent with the cookies obtained from the page
    doc = client.GetPage("http://somepage.com/another-page");
    

    Note: If you also want to use POST method, just create a method similar to GetPage with the POST logic, refactor the class, etc.