Search code examples
phpmemory-leaksspl

memory leak while processing large CSV


I have a script that downloads a large product CSV file, processes the information therein (downloading images and resizing and preparing other data for database insertion), then creates another txt file of all the processed items. The problem is that it seems to be hemmoraging memory somewhere. I get an error 500 returned, but the log shows too much memory usage. I've unset as much as I can, and I'm using SPL iterators which are supposed to be less memory intensive, but i still can get to script to complete execution and enter all of the information. Can anyone point out something in the script that would help prevent the memory leakage?

<?php

define('IN_PHPBB', true);
define('IN_SHOP', true);
$phpbb_root_path = './../forum/';
$root_path = './../';
$phpEx = substr(strrchr(__FILE__, '.'), 1);
include($phpbb_root_path.'common.'.$phpEx);

// Start session management
$user->session_begin();
$auth->acl($user->data);
$user->setup();

set_time_limit(172800);

define('THUMBNAIL_IMAGE_MAX_WIDTH', 150);
define('THUMBNAIL_IMAGE_MAX_HEIGHT', 150);

function generate_thumb($source_image_path, $thumbnail_image_path)
{
    list($source_image_width, $source_image_height, $source_image_type) = getimagesize($source_image_path);
    switch ($source_image_type) {
        case IMAGETYPE_GIF:
            $source_gd_image = imagecreatefromgif($source_image_path);
            break;
        case IMAGETYPE_JPEG:
            $source_gd_image = imagecreatefromjpeg($source_image_path);
            break;
        case IMAGETYPE_PNG:
            $source_gd_image = imagecreatefrompng($source_image_path);
            break;
    }
    if ($source_gd_image === false) {
        return false;
    }
    $source_aspect_ratio = $source_image_width / $source_image_height;
    $thumbnail_aspect_ratio = THUMBNAIL_IMAGE_MAX_WIDTH / THUMBNAIL_IMAGE_MAX_HEIGHT;
    if ($source_image_width <= THUMBNAIL_IMAGE_MAX_WIDTH && $source_image_height <= THUMBNAIL_IMAGE_MAX_HEIGHT) {
        $thumbnail_image_width = $source_image_width;
        $thumbnail_image_height = $source_image_height;
    } elseif ($thumbnail_aspect_ratio > $source_aspect_ratio) {
        $thumbnail_image_width = (int) (THUMBNAIL_IMAGE_MAX_HEIGHT * $source_aspect_ratio);
        $thumbnail_image_height = THUMBNAIL_IMAGE_MAX_HEIGHT;
    } else {
        $thumbnail_image_width = THUMBNAIL_IMAGE_MAX_WIDTH;
        $thumbnail_image_height = (int) (THUMBNAIL_IMAGE_MAX_WIDTH / $source_aspect_ratio);
    }
    $thumbnail_gd_image = imagecreatetruecolor($thumbnail_image_width, $thumbnail_image_height);
    imagecopyresampled($thumbnail_gd_image, $source_gd_image, 0, 0, 0, 0, $thumbnail_image_width, $thumbnail_image_height, $source_image_width, $source_image_height);
    imagejpeg($thumbnail_gd_image, $thumbnail_image_path, 90);
    imagedestroy($source_gd_image);
    imagedestroy($thumbnail_gd_image);
    unset($source_image_width, $source_image_height, $source_image_type, $source_gd_image, $source_aspect_ratio, $thumbnail_aspect_ratio, $thumbnail_image_width, $thumbnail_image_height, $thumbnail_gd_image);
    return true;
}


$regex = <<<'END'
/
  (
    (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
    |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
    |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
    |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3 
    )+                            # ...one or more times
  )
| ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
| ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
/x
END;
function utf8replacer($captures) {
  if ($captures[1] != "") {
    // Valid byte sequence. Return unmodified.
    return $captures[1];
  }
  elseif ($captures[2] != "") {
    // Invalid byte of the form 10xxxxxx.
    // Encode as 11000010 10xxxxxx.
    return "\xC2".$captures[2];
  }
  else {
    // Invalid byte of the form 11xxxxxx.
    // Encode as 11000011 10xxxxxx.
    return "\xC3".chr(ord($captures[3])-64);
  }
}

/* download file from source */
function getDataCSV(){
    $thefile = 'http://feeds.cnv.com/xxxxxxxxxxxxxx/Bronze/ELD-B01.csv';
    $file = 'ELD-B01.csv';

    $fh = fopen($file, "w");
    $rows = file($thefile);
    foreach($rows as $num => $row){
        if($num != 0){
            fwrite($fh, $row);
        }
    }
    fclose($fh);

    include("DataSource.php");

    $csv = new File_CSV_DataSource;
    if ($csv->load($file)) {
        $items = $csv->getHeaders();
        $csv->getColumn($items[2]);
        if ($csv->isSymmetric()) {
            $items = $csv->connect();
        } else {
            $items = $csv->getAsymmetricRows();
        }
        $items = $csv->getrawArray();
    }
    unset($csv);
    return $items;
}

$iter = new ArrayIterator(getDataCSV());

$google_list = array();
$google_list[] = array('id', 'title', 'description', 'google_product_category', 'product_type', 'link', 'image_link', 'condition', 'availability', 'price', 'brand', 'mpn');

$sql = "TRUNCATE TABLE ".SHOP_ITEMS;
$db->sql_query($sql);

foreach($iter as $item){
    if($item[12] != ""){
        $catName = str_replace(" ", "-", str_replace("and ", "", str_replace(",", "", str_replace("&amp;", "and", str_replace("-", "", $item[12])))));
    }else{
        $catName = str_replace(" ", "-", str_replace("and ", "", str_replace(",", "", str_replace("&amp;", "and", str_replace("-", "", $item[11])))));
    }

    $sql = 'SELECT cat_id FROM '.SHOP_CATS.' WHERE cat_clean = "'.$catName.'"';
    $result = $db->sql_query($sql);
    $row = $db->sql_fetchrow($result);
    $db->sql_freeresult($result);
    $catId = $row['cat_id'];

    $img = $item[9];
    $ext = substr($img, strrpos($img, '.') + 1);
    $image = 'images/products/'.$item[0].'.'.$ext;
    file_put_contents($root_path.$image, file_get_contents($img));
    $thumb = "images/products/thumbs/".$item[0]."_thumb.".$ext;

    generate_thumb($root_path.$image, $thumb);

    $itmRow = array(
        'item_name' => str_replace("/", "", preg_replace_callback($regex, "utf8replacer", html_entity_decode(html_entity_decode($item[1], ENT_QUOTES)))),
        'item_price' => $item[2],
        'item_description' => preg_replace_callback($regex, "utf8replacer", html_entity_decode(html_entity_decode($item[4], ENT_QUOTES))),
        'item_model' => $item[0],
        'item_manufacturer' => ($item[6] == '') ? 'No Info' : $item[6],
        'item_image' => $image,
        'item_cat' => ($catId) ? $catId : 0,
        'item_number' => $item[0],
        'item_vendor_code' => "ELD",
        'item_stock' => (strtolower($item[5]) == 'in stock') ? 1 : 0,
        'item_added' => strtotime($item[8]),
        'item_upc' => ($item[13] == '') ? 'No Info' : $item[13],
        'item_url' => '',
        'item_weight' => ($item[14] == '') ? 'No Info' : $item[14],
    );

    $sql = 'INSERT INTO '.SHOP_ITEMS.' '.$db->sql_build_array('INSERT', $itmRow);
    $db->sql_query($sql);
    $itmId = $db->sql_nextid();

    if(strstr($itmRow['item_name'], "-") == FALSE){
        $seo = urlencode(str_replace(" ", "-", $itmRow['item_name'])).".html";
    }else{
        $seo = urlencode(str_replace(" ", "_", $itmRow['item_name'])).".html";
    }
    if($item[5] == "oos"){
        $stock = "Out of Stock";
    }else{
        $stock = "In Stock";
    }
    $u_product = "https://therealmsofwickedry.com/product/".$seo;

    $google_list[] = array($itmId, $itmRow['item_name'], $itmRow['item_description'], 'Mature > Erotic', $catName, $u_product, "https://therealmsofwickedry.com/".$itmRow['item_image'], "new", $stock, $itmRow['item_price'], $itmRow['item_manufacturer'], $itmRow['item_model']);
    unset($catName, $catId, $img, $ext, $image, $thumb, $itmRow, $itmId, $seo, $stock, $u_product);
}

$line = '';

foreach($google_list as $list){
    $line .= implode("\t", $list);
    $line .= "\n";
}

$google = 'google_products.txt';
$h = fopen($google, "w");
fwrite($h, $line);
fclose($h);

?>

Solution

  • Turns out that the utf8replacer() function was the cause of the issue. Thanks for the input, though :)