I m facing some trouble about file_get_contents and DOMdocument, also Xpath.
I am trying to do some scraping. So I made an array for website sites link.
array(9) {
[0]=>
string(34) "https://lions-mansion.jp/MF081014/"
[1]=>
string(34) "https://lions-mansion.jp/MF161026/"
[2]=>
string(34) "https://lions-mansion.jp/MF171045/"
[3]=>
string(34) "https://lions-mansion.jp/MF161016/"
[4]=>
string(34) "https://lions-mansion.jp/MF171010/"
}
Trying to go inside these links with foreach. And try to scrape link rel's href!
foreach ($siteUrls as $sites){
@$html [] = file_get_contents($sites);
}
foreach ($html as $geturl)
{
$grabber = new \DOMXPath($geturl);
$mainLink [] = $grabber->query("//link[@rel='canonical']/@href");
}
var_dump($mainLink);
But in the end facing with this error.
Argument 1 passed to DOMXPath::__construct() must be an instance of DOMDocument, string given
Any idea how to solve this problem? How can I get that link rel url
? from head tag
libxml_use_internal_errors
: Disable libxml errors and allow user to fetch error information as needed http://php.net/manual/en/function.libxml-use-internal-errors.php
<?php
$siteUrls = [
"https://lions-mansion.jp/MF081014/",
"https://lions-mansion.jp/MF161026/",
"https://lions-mansion.jp/MF171045/",
"https://lions-mansion.jp/MF161016/",
"https://lions-mansion.jp/MF161016/"
];
foreach ($siteUrls as $sites){
@$html [] = file_get_contents($sites);
}
libxml_use_internal_errors(true);
foreach ($html as $geturl)
{
$dom = new DOMDocument();
$dom->loadHTML($geturl);
$grabber = new DOMXPath($dom);
$names = $grabber->query("//link[@rel='canonical']/@href");
foreach($names as $contextNode) {
$mainLink[] = $contextNode->value;
}
}
libxml_clear_errors();
var_dump($mainLink);
array (size=2)
0 => string 'https://lions-mansion.jp/MF161026/' (length=34)
1 => string 'https://lions-mansion.jp/MF171045/' (length=34)