Search code examples
phpmultithreadingcurllibcurlcurl-multi

Problem with CURL (Multi)


I'm having a problem with curl_multi_*, I want to create a class / function that receives, lets say 1000 URLs, and processes all those URLs 5 at a time, so when a URL finishes downloading it will allocate the now available slot to a new URL that hasn't been processed yet.

I've seen some implementations of curl_multi, but none of them allows me to do what I want, I believe the solution lies somewhere in the usage of curl_multi_select but the documentation isn't very clear and the user notes don't help much.

Can anyone please provide me with some examples how I can implement such a feature?


Solution

  • Here's one way to do it. This script will fetch any number of urls at a time, and add a new one as each is finished (so it's always fetching $maxConcurrent pages).

    $sites = array('http://example.com', 'http://google.com', 'http://stackoverflow.com');
    $concurrent = 2;   // Any number.
    
    $mc = new MultiCurl($sites, $concurrent);
    $mc->process();
    
    echo '</pre>';
    
    class MultiCurl
    {
        private $allToDo;
        private $multiHandle;
        private $maxConcurrent = 2;
        private $currentIndex  = 0;
        private $info          = array();
        private $options       = array(CURLOPT_RETURNTRANSFER => true,
                                       CURLOPT_FOLLOWLOCATION => true,
                                       CURLOPT_MAXREDIRS      => 3,
                                       CURLOPT_TIMEOUT        => 3);
    
        public function __construct($todo, $concurrent)
        {
            $this->allToDo = $todo;
            $this->maxConcurrent = $concurrent;
            $this->multiHandle = curl_multi_init();
        }
    
        public function process()
        {
            $running = 0;
            do {
                $this->_addHandles(min(array($this->maxConcurrent - $running, $this->_moreToDo())));
                while ($exec = curl_multi_exec($this->multiHandle, $running) === -1) {
                }
                curl_multi_select($this->multiHandle);
                while ($multiInfo = curl_multi_info_read($this->multiHandle, $msgs)) {
                    $this->_showData($multiInfo);
                    curl_multi_remove_handle($this->multiHandle, $multiInfo['handle']);
                    curl_close($multiInfo['handle']);
                }
            } while ($running || $this->_moreTodo());
            return $this;
        }    
    
        private function _addHandles($num)
        {
            while ($num-- > 0) {
                $handle = curl_init($this->allToDo[$this->currentIndex]);
                curl_setopt_array($handle, $this->options);
                curl_multi_add_handle($this->multiHandle, $handle);
                $this->info[$handle]['url'] = $this->allToDo[$this->currentIndex];
                $this->currentIndex++;
            }
        }        
    
        private function _moreToDo()
        {
            return count($this->allToDo) - $this->currentIndex;
        }
    
        private function _showData($multiInfo)
        {
            $this->info[$multiInfo['handle']]['multi'] = $multiInfo;
            $this->info[$multiInfo['handle']]['curl']  = curl_getinfo($multiInfo['handle']);
            //print_r($this->info[$multiInfo['handle']]);
            $content = curl_multi_getcontent($multiInfo['handle']);
            echo $this->info[$multiInfo['handle']]['url'] . ' - ' . strlen($content) . ' bytes<br />';
            //echo htmlspecialchars($content);
        }
    }