Search code examples
phppreg-matchfopen

allow_url_fopen is on, but doesn't work


I've a problem with a fetch in my server.

I'm trying to get all content from a url, and save it in a variable.

But, this return fopen die message.

This is my code:

//Guardo la url pasada por get
$url = $_GET["url"];

if(preg_match('#^http://www.filmaffinity.com.*#s', trim($url))){
  //Funciona
} else{
$data = array('msg' => 'bad url');
   echo json_encode($data);
   return false;
}
//Tomo el código y lo meto en una variable
$fo= fopen($url,"r") or die ("No se encuentra la pagina.");
   while (!feof($fo)) {
   $cadena .= fgets($fo, 4096);
}
fclose ($fo);

This is part of my code, when i execute this, return 'No se encuentra la pagina.'

allow_url_fopen is ON in my server.

WHM PHP Config

PHP Info

How can fix this problem? Can help me?

Thanks.


Solution

  • You are just looking for a certain name in the url?
    This will work better.

    if(stripos('http://www.filmaffinity.com',$url)){}
    

    you may want to try:

    file_get_contents($url)
    

    You may want to try encoding the url:

    $url = urlencode($url)
    

    or

    $url = rawurlencode($url)
    

    curl will get you more answers to your problem:
    with curl you can control the the timeout, see the request (curl_getinfo) and response (CURLINFO_HEADER_OUT, true) headers, the HTTP Status (['http_code']), it can follow 30x redirects, etc.

    There is an issue:

    $data = file_get_contents('http://www.filmaffinity.com');
    

    Returned an error:

     file_get_contents(http://www.filmaffinity.com) [<a href='function.file-get-contents'>function.file-get-contents</a>]: failed to open stream: HTTP request failed! HTTP/1.0 500 Internal Server Error
    

    With curl:

    HTTP CODE: aarray (
      'url' => 'http://www.filmaffinity.com/',
      'content_type' => 'text/html',
      'http_code' => 302,
      'header_size' => 223,
      'request_size' => 170,
      'filetime' => -1,
      'ssl_verify_result' => 0,
      'redirect_count' => 0,
      'total_time' => 0.321491,
      'namelookup_time' => 0.040338,
      'connect_time' => 0.180309,
      'pretransfer_time' => 0.180365,
      'size_upload' => 0,
      'size_download' => 20,
      'speed_download' => 62,
      'speed_upload' => 0,
      'download_content_length' => 20,
      'upload_content_length' => -1,
      'starttransfer_time' => 0.321415,
      'redirect_time' => 0,
      'certinfo' => 
      array (
      ),
      'redirect_url' => 'http://www.filmaffinity.com/en/main.html',
      'request_header' => 'GET / HTTP/1.1
    User-Agent: Mozilla/5.0 (Windows NT 5.1; rv:32.0) Gecko/20100101 Firefox/32.0
    Host: www.filmaffinity.com
    Accept: */*
    Accept-Encoding: deflate, gzip
    

    With curl it works, but you must change this:

    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
    

    To true:

    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
    

    Tested Working Code:

    <?php
    header('Content-Type: text/plain; charset=utf-8');
    echo "start\n";
    $url = 'http://www.filmaffinity.com';
     $ch = curl_init($url);
      curl_setopt($ch, CURLOPT_ENCODING,"");
      curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
      curl_setopt($ch, CURLOPT_HEADER, true);
      curl_setopt($ch, CURLINFO_HEADER_OUT, true);
      curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
      curl_setopt($ch, CURLOPT_FILETIME, true);
      curl_setopt($ch, CURLOPT_USERAGENT,"Mozilla/5.0 (Windows NT 5.1; rv:32.0) Gecko/20100101 Firefox/32.0");
      curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 100);
      curl_setopt($ch, CURLOPT_VERBOSE, true);
      curl_setopt($ch, CURLOPT_AUTOREFERER, true);
      curl_setopt($ch, CURLOPT_TIMEOUT,100);
      curl_setopt($ch, CURLOPT_FAILONERROR,true);
      $data = curl_exec($ch);
      if (curl_errno($ch)){
          $data .= 'Retreive Base Page Error: ' . curl_error($ch);
      }
      else {
        $skip = intval(curl_getinfo($ch, CURLINFO_HEADER_SIZE)); 
        $responseHeader = substr($data,0,$skip);
        $data= substr($data,$skip);
        $info = curl_getinfo($ch);
        if ($info['http_code'] != '200')
        $info = var_export($info,true);
       }
      if ($info['http_code'] != '200'){echo 'HTTP CODE: ' .$info['http_code'];}
    
    
    preg_match_all('#<a href="([^"]*)#m',$data,$matches);
    foreach($matches[1] as $val){
      $url = 'http://www.filmaffinity.com' . $val;
      echo "$url\n";
    }
    ?>
    

    This is the Result

     http://www.filmaffinity.com/en/main.html
        http://www.filmaffinity.com/en/advsearch.php
        http://www.filmaffinity.com/en/login.php
        http://www.filmaffinity.com/en/register.php
        http://www.filmaffinity.com/en/main.html
        http://www.filmaffinity.com/en/awards.php?award_id=berlin&year=2015
        http://www.filmaffinity.com/en/awards.php?award_id=academy_awards&year=2015
        http://www.filmaffinity.com/en/cat_new_th_us.html
        http://www.filmaffinity.com/en/boxoffice.php
        http://www.filmaffinity.com/en/imlost.php
        http://www.filmaffinity.com/en/all_awards.php
        http://www.filmaffinity.com/en/best_2014.php
        http://www.filmaffinity.com/en/oscar_data.php
        http://www.filmaffinity.com/en/topgen.php?nodoc=1
        http://www.filmaffinity.comhttp://www.filmaffinity.com/es/main.html
        http://www.filmaffinity.com/en/cookies_info.php
        http://www.filmaffinity.com/en/tours.php
        http://www.filmaffinity.com/en/tour.php?idtour=55
        http://www.filmaffinity.com/en/tour.php?idtour=54
        http://www.filmaffinity.com/en/tour.php?idtour=29
        http://www.filmaffinity.com/en/tour.php?idtour=9
        http://www.filmaffinity.com/en/tour.php?idtour=24
        http://www.filmaffinity.com/en/tours.php
        http://www.filmaffinity.com/en/trailers.php
        http://www.filmaffinity.com/en/bestrated.php
        http://www.filmaffinity.com/en/film489970.html
        http://www.filmaffinity.com/en/film221477.html
        http://www.filmaffinity.com/en/film730528.html
        http://www.filmaffinity.com/en/film809297.html
        http://www.filmaffinity.com/en/film399474.html
        http://www.filmaffinity.com/en/film795770.html
        http://www.filmaffinity.com/en/film536488.html
        http://www.filmaffinity.com/en/film695552.html
        http://www.filmaffinity.com/en/bestrated.php
        http://www.filmaffinity.com/en/mostrated.php
        http://www.filmaffinity.com/en/film575568.html
        http://www.filmaffinity.com/en/film670216.html
        http://www.filmaffinity.com/en/film124904.html
        http://www.filmaffinity.com/en/film636539.html
        http://www.filmaffinity.com/en/film206955.html
        http://www.filmaffinity.com/en/film779937.html
        http://www.filmaffinity.com/en/film617730.html
        http://www.filmaffinity.com/en/film423489.html
        http://www.filmaffinity.com/en/mostrated.php
        http://www.filmaffinity.com/en/worstrated.php
        http://www.filmaffinity.com/en/film189979.html
        http://www.filmaffinity.com/en/film612348.html
        http://www.filmaffinity.com/en/film968394.html
        http://www.filmaffinity.com/en/film651247.html
        http://www.filmaffinity.com/en/film281762.html
        http://www.filmaffinity.com/en/film886013.html
        http://www.filmaffinity.com/en/film591128.html
        http://www.filmaffinity.com/en/film381051.html
        http://www.filmaffinity.com/en/worstrated.php
        http://www.filmaffinity.com/en/mostvisited.php
        http://www.filmaffinity.com/en/film124904.html
        http://www.filmaffinity.com/en/film994565.html
        http://www.filmaffinity.com/en/film941942.html
        http://www.filmaffinity.com/en/film575568.html
        http://www.filmaffinity.com/en/film670216.html
        http://www.filmaffinity.com/en/film423489.html
        http://www.filmaffinity.com/en/film809035.html
        http://www.filmaffinity.com/en/film391687.html
        http://www.filmaffinity.com/en/film934433.html
        http://www.filmaffinity.com/en/mostvisited.php
        http://www.filmaffinity.com/en/cat_new_th_us.html
        http://www.filmaffinity.com/en/film124904.html
        http://www.filmaffinity.com/en/film376816.html
        http://www.filmaffinity.com/en/film898006.html
        http://www.filmaffinity.com/en/film562434.html
        http://www.filmaffinity.com/en/film510733.html
        http://www.filmaffinity.com/en/film272576.html
        http://www.filmaffinity.com/en/film493854.html
        http://www.filmaffinity.com/en/film792317.html
        http://www.filmaffinity.com/en/film784978.html
        http://www.filmaffinity.com/en/cat_new_th_us.html
        http://www.filmaffinity.com/en/cat_upc_th_us.html
        http://www.filmaffinity.com/en/film526524.html
        http://www.filmaffinity.com/en/film405261.html
        http://www.filmaffinity.com/en/film543207.html
        http://www.filmaffinity.com/en/film809035.html
        http://www.filmaffinity.com/en/film402986.html
        http://www.filmaffinity.com/en/film956269.html
        http://www.filmaffinity.com/en/film759419.html
        http://www.filmaffinity.com/en/film699453.html
        http://www.filmaffinity.com/en/film701069.html
        http://www.filmaffinity.com/en/cat_upc_th_us.html
        http://www.filmaffinity.comhttp://www.facebook.com/FilmAffinity
        http://www.filmaffinity.comhttp://twitter.com/Filmaffinity
        http://www.filmaffinity.com/en/faq.php
        http://www.filmaffinity.com/en/private.php
    

    Then Add (replace the foreach loop above):

    foreach($matches[1] as $key => $val){
      if (!strpos($val,'/film')){continue;}
      $url = 'http://www.filmaffinity.com' . $val;
      $ch = curl_init($url);
      curl_setopt($ch, CURLOPT_ENCODING,"");
      curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
      curl_setopt($ch, CURLOPT_HEADER, true);
      curl_setopt($ch, CURLINFO_HEADER_OUT, true);
      curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
      curl_setopt($ch, CURLOPT_FILETIME, true);
      curl_setopt($ch, CURLOPT_USERAGENT,"Mozilla/5.0 (Windows NT 5.1; rv:32.0) Gecko/20100101 Firefox/32.0");
      curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 100);
      curl_setopt($ch, CURLOPT_VERBOSE, true);
      curl_setopt($ch, CURLOPT_AUTOREFERER, true);
      curl_setopt($ch, CURLOPT_TIMEOUT,100);
      curl_setopt($ch, CURLOPT_FAILONERROR,true);
      $data = curl_exec($ch);
      if (curl_errno($ch)){
          $data .= 'Retreive Base Page Error: ' . curl_error($ch);
      }
      else {
        $skip = intval(curl_getinfo($ch, CURLINFO_HEADER_SIZE)); 
        $responseHeader = substr($data,0,$skip);
        $data= substr($data,$skip);
        $info = curl_getinfo($ch);
        if ($info['http_code'] != '200')
        $info = var_export($info,true);
       }
      if ($info['http_code'] != '200'){echo 'HTTP CODE: ' .$info['http_code'];}
      $fp = fopen("html$key.txt",'w');
      fwrite($fp,$data);
      fclose($fp);
    }