Search code examples
phpcookieshttp-headersfile-get-contents

Invalid cookies recieved from CURL request and file_get_contents


I am receiving an invalid cookie string when trying capture the cookie using file_get_contents and curl. The cookie received while browsing directly from the browser is valid/active. But, the cookie captured from file_get_contents and curl seems to be invalid.

I am trying to capture from file_get_contents like this

 $context =      array(
  'http' => array(
   'method' => 'GET',
   'header' => array('Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*\/*;q=0.8', 'User-Agent:Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/63.0.3239.84 Chrome/63.0.3239.84 Safari/537.36'),
  )
 );

   $cxContext = stream_context_create($context);


    file_get_contents($url, false, $cxContext);
    $cookies = array();

    foreach ($http_response_header as $hdr) {
    if (preg_match('/^Set-Cookie:\s*([^;]+)/', $hdr, $matches)) {
        $cookies = $matches[1];
    }

}
return $cookies;

I tried playing around with this, by setting headers, but the cookies returned always is either expired or simply invalid.

But, through a browser the cookie I get is always valid.

Anyone faced a similar problem, don't know how to tackle this issue.


Solution

  • There are several unanswered questions from my above comment, but I'll share this bit of code for example purposes. It's what I've used in the past as a base class for browser emulation using cURL:

    <?php
    if(!function_exists("curl_init")) { throw new Exception("CurlBrowser requires the cURL extension, which is not enabled!"); }
    class CurlBrowser
    {
        public $userAgent = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0";
        /*
        Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0
        Mozilla/5.0 (Windows NT 6.1; WOW64; rv:9.0.1) Gecko/20100101 Firefox/9.0.1");
        Mozilla/5.0 (Windows NT 6.1; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0 
        */
    
        public $cookiesFile = null;
        public $proxyURL = null;
        public $saveLastOutput = "";
        public $caBundle = "cacert.pem";
        public $httpHeaders = array();
    
        public function __construct($UseCookies = true)
        {
            if(is_bool($UseCookies) && $UseCookies)
            {
                $this->cookiesFile = dirname(__FILE__)."/cookies.txt";
            }
            elseif(is_string($UseCookies) && ($UseCookies != ""))
            {
                $this->cookiesFile = $UseCookies;
            }
        }
    
        public function SetCustomHTTPHeaders($arrHeaders)
        {
            $this->httpHeaders = $arrHeaders;
        }
    
        public function SetProxy($proxy)
        {
            $this->proxyURL = $proxy;
        }
    
        public function Get($url)
        {
            return $this->_request($url);
        }
    
        public function Post($url,$data = array())
        {
            return $this->_request($url,$data);
        }
    
        private function _request($form_url,$data = null)
        {
            $ch = curl_init($form_url);
    
            // CA bundle
            $caBundle = $this->caBundle;
            if(file_exists($caBundle))
            {
                // Detect and convert relative path to absolute path
                if(basename($caBundle) == $caBundle)
                {
                    $caBundle = getcwd() . DIRECTORY_SEPARATOR  . $caBundle;
                }
                // Set CA bundle
                curl_setopt($ch, CURLOPT_CAINFO, $caBundle);
            }
    
            // Cookies
            if($this->cookiesFile !== null)
            {
                curl_setopt($ch, CURLOPT_COOKIEFILE, $this->cookiesFile);
                curl_setopt($ch, CURLOPT_COOKIEJAR, $this->cookiesFile);
            }
    
            // User Agent
            curl_setopt($ch, CURLOPT_USERAGENT, $this->userAgent); 
    
            // Misc
            curl_setopt($ch, CURLOPT_HEADER, 0);
            curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
            curl_setopt($ch, CURLOPT_ENCODING, "gzip, deflate");
    
            // Optional proxy
            if($this->proxyURL !== null)
            {
                curl_setopt($ch, CURLOPT_PROXY, $this->proxyURL);
            }
    
            // Custom HTTP headers
            if(count($this->httpHeaders))
            {
                curl_setopt($ch, CURLOPT_HTTPHEADER, $this->httpHeaders);
            }
    
            // POST data
            if($data !== null)
            {
                curl_setopt($ch, CURLOPT_POST, 1);
                curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
            }
    
            // Run operation
            $result = curl_exec($ch);
    
            if($result === false)
            {
              throw new Exception(curl_error($ch));
            }
            else
            {
                if(!empty($this->saveLastOutput))
                {
                file_put_contents($this->saveLastOutput,$result);
              }
              return $result;
            }
        }
    }
    ?>
    

    You'd use it like so:

    <?php
    $browser = new CurlBrowser();
    $html = $browser->Get("https://....");
    ...etc...
    

    My gut guess is that you're simply missing a cookie jar in your original code, but that's mostly based on gut feeling, since we don't have all your problem code at this time.