Search code examples
phpsqlpreg-match

preg_match pattern to scan these prices


i'm trying to scan prices from THIS page, i want to use this preg_match to extract prices from this div: <span class="price"><b>519,00&nbsp;€</b></span>. What is the correct preg_match?

This is my extractor script:

<?php
echo  "funziona!";

    if(!$fp = fopen("https://www.google.it/webhp?sourceid=chrome-instant&ion=1&espv=2&es_th=1&ie=UTF-8#tbs=vw:l,mr:1&tbm=shop&q=samsung+galaxy+note+4&tbas=0" ,"r" )) {
        return false;
    } //our fopen is right, so let's go
    $content = "";

    while(!feof($fp)) { //while it is not the last line, we will add the current line to our $content
        $content .= fgets($fp, 1024);
    }
    fclose($fp); //we are done here, don't need the main source anymore
?>

<?php
//our fopen, fgets here

//our magic regex here
preg_match_all('/<span class=\"price">(.*?)<\/span>/s',$content, $prices); //THIS IS PREG_MATCH 
    echo $prices[0][0]."<br />";
?>

I have never used preg_match before, i'm trying to adapt this script.
Thank you.


Solution

  • Have a look at this:

    <?php
    function getUrl($Url,$Options = array(),&$optOut = array())
    {
    
        $CURL_DEFAULT_SETTINGS  = array
        (
            CURLOPT_FOLLOWLOCATION => true,
            CURLOPT_AUTOREFERER => true,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_CONNECTTIMEOUT => 10,
            CURLOPT_MAXREDIRS => 10,
            CURLOPT_TIMEOUT => 10,
            CURLOPT_USERAGENT => 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8'
        );
    
        if (!($ch = curl_init($Url)))
            throw new Exception("Couldn't initialize cURL library",100);
    
        if (is_array($CURL_DEFAULT_SETTINGS) && count($CURL_DEFAULT_SETTINGS) > 0)
            curl_setopt_array($ch,$CURL_DEFAULT_SETTINGS);
    
        if (is_array($Options) && count($Options) > 0)
        {
            foreach ($Options as $k => $v)
            {
                curl_setopt($ch,$k,$v);
            }
        }
    
        $Data = curl_exec($ch);
        $Error = curl_error($ch);
    
        $optOut['CURLINFO_HEADER_OUT'] = curl_getinfo($ch, CURLINFO_HEADER_OUT );
    
        curl_close($ch);
    
        if (!$Data)
        {
            if ($Error)
                throw new Exception($Error);
    
            return false;
        }
    
        return $Data;
    }
    
    function getPriceFor($query) {
        $data = getUrl('https://www.google.it/search?tbs=vw:l,mr:1&tbm=shop&q='.rawurlencode($query).'&tbas=0&bav=on.2,or.&cad=b&fp=6a24b60e09fe0b18&biw=1196&bih=703&dpr=2&ion=1&espv=2&tch=1&ech=1&psi=byWgVee9A4TNeIXRgLAK.1436558704099.3');
        $data = '['.preg_replace('/\/\*""\*\//msi',',',preg_replace('/\/\*""\*\/[\s]*$/msi','',$data)).']';
        $data = json_decode($data,true);
        preg_match_all('/<div[\s]+class="_OA"><div><b>([^<]+)[\s]*<\/b><\/div><div>([^<]+)<\/div><\/div>/msi',$data[3]['d'],$res);
    
        $re = array();
    
        foreach ($res[1] as $k=>$r)
            $re[] = array('price'=>$r,'from'=>$res[2][$k]);
    
        return $re;
    }
    
    print_r(getPriceFor('samsung galaxy note 4'));
    

    That must display something like this:

    Array
    (
        [0] => Array
            (
                [price] => 515,00 €
                [from] => phoneshopping.it
            )
    
        [1] => Array
            (
                [price] => 519,00 €
                [from] => Smartyrama
            )
    
        [2] => Array
            (
                [price] => 519,00 €
                [from] => Smartyrama
            )
    
        [3] => Array
            (
                [price] => 519,00 €
                [from] => Smartyrama
            )
    
        [4] => Array
            (
                [price] => 690,45 €
                [from] => Amazon.it - Seller
            )
    
        [5] => Array
            (
                [price] => 673,99 €
                [from] => da 2 negozi
            )
    
        [6] => Array
            (
                [price] => 345,00 €
                [from] => da 2 negozi
            )
    
        [7] => Array
            (
                [price] => 342,00 €
                [from] => Amazon.it - Seller
            )
    
        [8] => Array
            (
                [price] => 699,99 €
                [from] => ePRICE.it
            )
    
        [9] => Array
            (
                [price] => 730,00 €
                [from] => in oltre 5 negozi
            )
    
        [10] => Array
            (
                [price] => 20,00 €
                [from] => Amazon.it - Seller
            )
    
        [11] => Array
            (
                [price] => 208,99 €
                [from] => eGlobal Central Italia
            )
    
        [12] => Array
            (
                [price] => 711,00 €
                [from] => in oltre 5 negozi
            )
    
        [13] => Array
            (
                [price] => 322,99 €
                [from] => eGlobal Central Italia
            )
    
        [14] => Array
            (
                [price] => 40,09 €
                [from] => da 4 negozi
            )
    
        [15] => Array
            (
                [price] => 15,99 €
                [from] => acadattatore.com
            )
    
        [16] => Array
            (
                [price] => 339,99 €
                [from] => ePRICE.it
            )
    
        [17] => Array
            (
                [price] => 412,90 €
                [from] => da 3 negozi
            )
    
        [18] => Array
            (
                [price] => 343,33 €
                [from] => Amazon.it - Seller
            )
    
        [19] => Array
            (
                [price] => 629,00 €
                [from] => BestPriceStore
            )
    
    )