Search code examples
phpcurlcurl-multi

Link-Checking with Multi-Curl


Im building a Link Checker function that checks if the link has code 200/301/302.

I want to check about 1000 links so i I used the Multi-CURL functionality and i do get all the headers, codes, the URL to which a URL redirected.

The Problem is that Multi-CURL executes in parallel adding all the URLs to curl_multi_add_handle and returns the results it gets and ignores the rest.

I know from the header which result i got back but i dont know which URL brought it. Is there an Identifier which URL i requested against a specific executed handle (may be something to do with curl_multi_info_read )?

Here my Code:

$curls = $listofurls;
$curl_arr = array();
          $master = curl_multi_init();

          for($i = 0; $i < $node_count; $i++) {

            $curl_arr[$i] = curl_init($curls[$i][0]);
            curl_setopt($curl_arr[$i],CURLOPT_FRESH_CONNECT,true);
            curl_setopt($curl_arr[$i],CURLOPT_CONNECTTIMEOUT,10);
            curl_setopt($curl_arr[$i],CURLOPT_HEADER,true);
            curl_setopt($curl_arr[$i],CURLOPT_CUSTOMREQUEST,'HEAD');
            curl_setopt($curl_arr[$i],CURLOPT_RETURNTRANSFER,true);
            curl_setopt($curl_arr[$i],CURLOPT_NOBODY,true);
            curl_setopt($curl_arr[$i],CURLOPT_AUTOREFERER, 1);
            curl_setopt($curl_arr[$i],CURLOPT_TIMEOUT,30);

            curl_multi_add_handle($master, $curl_arr[$i]);
          }


          $finalresult = array();

          do{

            curl_multi_exec($master, $running);
            $info = curl_multi_info_read($master);

            if($info['handle']) {
              $finalresult[] = curl_multi_getcontent($info['handle']);
              curl_multi_remove_handle($master, $info['handle']);
            }

          $previousActive = $running;
          }
          while($running > 0);

           curl_multi_close($master);

Appreciate the Help.Thanx.


Solution

  • I got it solved-Its the returned Order thats important to understand and combine with the result. For someone who may be looking for the answer:

          $curls = $listofurls;
          $curl_arr = array();
          $master = curl_multi_init();
    
          for($i = 0; $i < $node_count; $i++) {
    
            $curl_arr[$i] = curl_init($curls[$i][0]);
            curl_setopt($curl_arr[$i],CURLOPT_FRESH_CONNECT,true);
            curl_setopt($curl_arr[$i],CURLOPT_CONNECTTIMEOUT,10);
            curl_setopt($curl_arr[$i],CURLOPT_HEADER,true);
            curl_setopt($curl_arr[$i],CURLOPT_CUSTOMREQUEST,'HEAD');
            curl_setopt($curl_arr[$i],CURLOPT_RETURNTRANSFER,true);
            curl_setopt($curl_arr[$i],CURLOPT_NOBODY,true);
            curl_setopt($curl_arr[$i],CURLOPT_AUTOREFERER, 1);
            curl_setopt($curl_arr[$i],CURLOPT_TIMEOUT,30);
    
            curl_multi_add_handle($master, $curl_arr[$i]);
          }
    
    
    
              $finalresult = array();
              $returnedOrder = array();
    
              do{
    
                curl_multi_exec($master, $running);
                $info = curl_multi_info_read($master);
    
                if($info['handle']) {
                  $finalresult[] = curl_multi_getcontent($info['handle']);
                  $returnedOrder[] = array_search($info['handle'], $curl_arr, true);
                  curl_multi_remove_handle($master, $info['handle']);
                  curl_close($curl_arr[end($returnedOrder)]);
                }
    
              $previousActive = $running;
              }
              while($running > 0);
    
              $res = array_combine($returnedOrder, $finalresult);
              curl_multi_close($master);