Search code examples
simple-html-dom

undefined function load_file() simple html dom library


I want to scrape content from a website using Simple HTML DOM Library. The first page I am doing my scripting from contains titles and links to the article content. So the logic i want to apply is I get the links from the first page then get the content from each page each link points to. I am getting undefined function load_file() error in the second load_file.

   <?php
defined('BASEPATH') OR exit('No direct script access allowed');

class Crawler extends Frontend_controller {

    public function __construct(){

        parent::__construct();
        require_once(APPPATH . 'libraries/simple_html_dom.php');

    }

    public function index(){

        $target_url = "https://example.com/";
        $html = new simple_html_dom();
        $html->load_file($target_url);
        foreach($html->find('article') as $post){
            $title = $post->find('h1 a', 0)->innertext;
            $link = $post->find('h1 a', 0);
            //echo $title."<br />";
            //echo $link->href."<br />";
            $sample_html = new simple_html_dom();
            $sample_html = load_file($link->href);
            foreach ($sample_html->find('p') as $content) {

                echo $content;
            }


        }

    }//end method

}//end class

Solution

  • change

      $sample_html = load_file($link->href);
    

    to

       $sample_html->load_file($link->href);