Search code examples
phpcurloptimization

php ideas to speed up script for 18000+ calls


I have a script that makes a few calls. The 1st call gets an array of keys and hash code. Then it goes into a for each and calls out to a 2nd API that will return a list of items that is then passed into a 2nd foreach loop that works with that data and posts to a MySQL database.

It takes 11 hours to run the script. I would appreciate ideas on how I can speed this up. I have looked into multi-processing, but everything I see says making a web request is a bad idea. Any ideas or links are welcome.

ini_set('max_execution_time', 60000);
date_default_timezone_set('UTC');
$time = date('Y-m-d');
echo $time;
$time_start = microtime(true);

$ch = curl_init();
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_URL, 'https://zkillboard.com/api/history/20170816/');
$result = curl_exec($ch);
curl_close($ch);
$obj = json_decode($result, true);
$holder = [];
$i = 0;
$urls = [];
foreach ($obj as $key => $item) {
    $urls[] = ['url' => "https://esi.evetech.net/dev/killmails/{$key}/{$item}/"];
}

$x = 0;
$limit = 15;
$urls = array_slice($urls,0,5);
//foreach (array_chunk($urls, 5, true) as $urlchunk) {
foreach ($urls as $urlchunk) {

        $x ++;
        $aURLs = $urlchunk; // array of URLs
        $mh = curl_multi_init(); // init the curl Multi

        $aCurlHandles = array(); // create an array for the individual curl handles

        foreach ($aURLs as $id => $url) { //add the handles for each url

//            $ch = curl_setup($url);
            $ch = curl_init(); // init curl, and then setup your options
            curl_setopt($ch, CURLOPT_URL, $url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // returns the result - very important
            curl_setopt($ch, CURLOPT_HEADER, 0); // no headers in the output

            $aCurlHandles[$url] = $ch;
            curl_multi_add_handle($mh, $ch);
        }

        $active = null;
        //execute the handles
        do {
            $mrc = curl_multi_exec($mh, $active);

        } while ($mrc == CURLM_CALL_MULTI_PERFORM);

        while ($active && $mrc == CURLM_OK) {
            $mrc = curl_multi_exec($mh, $active);
            if (curl_multi_select($mh) != -1) {
                do {
                    $mrc = curl_multi_exec($mh, $active);
                } while ($mrc == CURLM_CALL_MULTI_PERFORM);
            }
        }

        /* This is the relevant bit */
        // iterate through the handles and get your content
        foreach ($aCurlHandles as $url => $ch) {
            $html = curl_multi_getcontent($ch); // get the content
            $obj1 = json_decode($html, true);
            $data = str_replace("{", "[", $obj1['victim']);
            $data2 = str_replace("}", "]", $data);

            foreach ($data2['items'] as $value) {
                if (!empty($value['quantity_destroyed'])) {
                    $holder[] = [
                        'item' => $value['item_type_id'],
                        'qty' => $value['quantity_destroyed'],
                        'date' => $time
                    ];
                }
                curl_multi_remove_handle($mh, $ch); // remove the handle (assuming  you are done with it);
            }
            /* End of the relevant bit */

            curl_multi_close($mh); // close the curl multi handler
        }

}
$servername = "localhost";
$username = "username";
$password = "password";
$conn = new mysqli($servername, 'root', '', 'eve');
if ($conn->connect_error) {
    die("Connection failed: " . $conn->connect_error);
}
echo "Connected successfully<br>";
$i = 0;
foreach ($holder as $item) {

    $i++;
    $itemCheck = mysqli_query($conn, "SELECT * FROM item_temp WHERE itemid ={$item['item']} AND dates='{$item['date']}'");
    $row = mysqli_fetch_array($itemCheck, MYSQLI_ASSOC);
    if (!empty($row)) {
        $qty = $row['qty'] + $item['qty'];
        $kills = $row['kills'] + 1;
        $sql = "UPDATE item_temp SET qty='{$qty}' ,kills='{$kills}'  WHERE itemid={$item['item']} AND dates='{$item['date']}'";
        if ($conn->query($sql) === TRUE) {
        } else {
            echo "Error updating record: " . $conn->error;
        }
    } else {
        $sql = "INSERT INTO item_temp (itemid, qty, kills, dates)VALUES ('{$item['item']}', '{$item['qty']}', '1', '$time')";
        if ($conn->query($sql) === TRUE) {

        } else {
            echo "Error: " . $sql . "<br>" . $conn->error;
        }

    }
}
$time_end = microtime(true);
$execution_time = ($time_end - $time_start) / 60;
echo '<b>Total Execution Time:</b> ' . $execution_time . ' Mins';
echo '<b>'. $x;

The code has been updated to show curl_multi is running at about the same speed. I'm not sure if I'm using it the right way.


Solution

  • Currently each new request is sending only after previous one is done. You can try send several requests in parallel with curl_multi_* operations. Here is example based on your cod

    $time_start = microtime(true);
    
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_URL, 'https://zkillboard.com/api/history/20170816/');
    $result = curl_exec($ch);
    curl_close($ch);
    $obj = json_decode($result, true);
    $holder = [];
    $i = 0;
    $urls = [];
    foreach ($obj as $key => $item) {
        $urls[] = "https://esi.evetech.net/dev/killmails/{$key}/{$item}/";
    }
    
    $urls = array_slice($urls,0,20); // only first 20 for testing purposes
    
    $mh = curl_multi_init(); // init the curl Multi);
    $aCurlHandles = array(); // create an array for the individual curl handles
    
    foreach ($urls as $urlchunk) {
            $ch = curl_init(); // init curl, and then setup your options
            curl_setopt($ch, CURLOPT_URL, $urlchunk);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); // returns the result - very important
            curl_setopt($ch, CURLOPT_HEADER, 0); // no headers in the output
    
            $aCurlHandles[] = $ch;
            curl_multi_add_handle($mh, $ch);
    }
    $active = null;
    //execute the handles
    do {
        $mrc = curl_multi_exec($mh, $active);
    
    } while ($mrc == CURLM_CALL_MULTI_PERFORM);
    
    while ($active && $mrc == CURLM_OK) {
        $mrc = curl_multi_exec($mh, $active);
        if (curl_multi_select($mh) != -1) {
            do {
                $mrc = curl_multi_exec($mh, $active);
            } while ($mrc == CURLM_CALL_MULTI_PERFORM);
        }
    }
    
    
    /* This is the relevant bit */
    // iterate through the handles and get your content
    foreach ($aCurlHandles as $ch) {
        $html = curl_multi_getcontent($ch); // get the content
        $obj1 = json_decode($html, true);
        $holder[]= $obj1['killmail_time'];
        curl_multi_remove_handle($mh, $ch); // remove the handle (assuming  you are done with it);
    }
    curl_multi_close($mh); // close the curl multi handler
    echo "multi_exec approach \n";
    var_dump(sizeof($holder));
    echo "\n";
    var_dump(array_pop($holder));
    echo "\nIt took: " . (microtime(true) - $time_start);
    
    $time_start = microtime(true);
    $holder = [];
    foreach ($urls as $urlchunk) {
        $ch1 = curl_init();
        curl_setopt($ch1, CURLOPT_SSL_VERIFYPEER, false);
        curl_setopt($ch1, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch1, CURLOPT_URL, $urlchunk);
        $result1 = curl_exec($ch1);
        curl_close($ch1);
        $holder[] = json_decode($result1, true)['killmail_time'];
    }
    echo "\n\nsequential approach \n";
    var_dump(sizeof($holder));
    echo "\n";
    var_dump(array_pop($holder));
    echo "\nIt took: " . (microtime(true) - $time_start);
    

    Be careful and don't try to send all the requests(14480 for link you've mentioned) in parallel: you better not to DoS you API, agree?