I want to scrape content from a website using Simple HTML DOM Library. The first page I am doing my scripting from contains titles and links to the article content. So the logic i want to apply is I get the links from the first page then get the content from each page each link points to. I am getting undefined function load_file() error in the second load_file.
<?php
defined('BASEPATH') OR exit('No direct script access allowed');
class Crawler extends Frontend_controller {
public function __construct(){
parent::__construct();
require_once(APPPATH . 'libraries/simple_html_dom.php');
}
public function index(){
$target_url = "https://example.com/";
$html = new simple_html_dom();
$html->load_file($target_url);
foreach($html->find('article') as $post){
$title = $post->find('h1 a', 0)->innertext;
$link = $post->find('h1 a', 0);
//echo $title."<br />";
//echo $link->href."<br />";
$sample_html = new simple_html_dom();
$sample_html = load_file($link->href);
foreach ($sample_html->find('p') as $content) {
echo $content;
}
}
}//end method
}//end class
change
$sample_html = load_file($link->href);
to
$sample_html->load_file($link->href);