Trying to parse html page, but having some trouble at grabbing nodeValue's of dt
and dd
tags.
$outline ="http://www.sumitomo-rd-mansion.jp/kansai/higashi_umeda/detail.cgi";
foreach ($outlineUrl as $results) {
if (strpos($results, 'http://www.sumitomo-rd-mansion.jp') === 0) {
$html = file_get_contents($results);
$DOMParser = new \DOMDocument();
$DOMParser->loadHTML($html);
$changeForMyDB = [
'region' => '関西',
'link' => json_encode($results),
'building_name' => '',
'price' => '不明',
'old_price' => '',
'extend' => '不明',
'address' => '',
'total_house' => '',
'rooms' => '',
'cons_finish' => '',
'entry' => '不明',
'balcony' => '不明',
'company_name' => '',
'list_from' => ''
];
foreach ($DOMParser->getElementsByTagName('dl') as $tr) {
$property = trim($tr->getElementsByTagName('dt')[0]->nodeValue);
$value = trim($tr->getElementsByTagName('dd')[0]->nodeValue);
switch ($property) {
case '物件名':
$changeForMyDB['building_name'] = $value;
break;
case '販売価格':
$changeForMyDB['price'] = $value;
break;
case '専有面積':
$changeForMyDB['extend'] = $value;
break;
case '所在地':
$changeForMyDB['address'] = $value;
break;
case '総戸数':
$changeForMyDB['total_house'] = $value;
break;
case '間取り':
$changeForMyDB['rooms'] = $value;
break;
case '竣工時期':
$changeForMyDB['cons_finish'] = $value;
break;
case '管理会社':
$changeForMyDB['company_name'] = $value;
break;
case '入居時期':
$changeForMyDB['entry'] = $value;
break;
case 'バルコニー面積':
$changeForMyDB['balcony'] = $value;
break;
default:
break;
}
}
}
var_dump($changeForMyDB);
}
With this I can't grab all the dl's dt
and dt
nodeValue. Just getting two. Is my foreach loops wrong or something? Thanks for helping me out!
There are quite a few problems with the code, I've done this with comments to help...
// Variable for list of details
$details = [];
// outlineUrl is an array of URL's (not a single string which doesn't work in foreach()
$outlineUrl = ["http://www.sumitomo-rd-mansion.jp/kansai/higashi_umeda/detail.cgi"];
foreach ($outlineUrl as $results) {
$html = file_get_contents($results);
$DOMParser = new \DOMDocument();
// Turn off some error reporting on import
libxml_use_internal_errors(true);
$DOMParser->loadHTML($html); // There was a missing ';'
foreach ($DOMParser->getElementsByTagName('dl') as $tr) {
// Build up a list of details (you were overwriting them all the time)
$dd = $tr->getElementsByTagName('dd');
foreach ( $tr->getElementsByTagName('dt') as $key => $ent ) {
$details[] = [ 'property' => trim($ent->nodeValue),
'value' => trim($dd[$key]->nodeValue) ];
}
}
}
// Output list of details
var_dump($details);
The will loop over all the pairs of <dt>
and <dd>
values in each <dl...>
tag.
Updated code...
$details = [];
$outlineUrl = ["http://www.sumitomo-rd-mansion.jp/kansai/higashi_umeda/detail.cgi"];
foreach ($outlineUrl as $results) {
$html = file_get_contents($results);
$DOMParser = new \DOMDocument();
file_put_contents("test.html", $html);
libxml_use_internal_errors(true);
$DOMParser->loadHTML($html);
foreach ($DOMParser->getElementsByTagName('dl') as $tr) {
$dd = $tr->getElementsByTagName('dd');
$newDetails = [];
foreach ( $tr->getElementsByTagName('dt') as $key => $ent ) {
$value = trim($dd[$key]->nodeValue);
switch ($ent->nodeValue) {
case '物件名':
$newDetails['building_name'] = $value;
break;
case '販売価格':
$newDetails['price'] = $value;
break;
case '専有面積':
$newDetails['extend'] = $value;
break;
case '所在地':
$newDetails['address'] = $value;
break;
case '総戸数':
$newDetails['total_house'] = $value;
break;
case '間取り':
$details['rooms'] = $value;
break;
case '竣工時期':
$newDetails['cons_finish'] = $value;
break;
case '管理会社':
$newDetails['company_name'] = $value;
break;
case '入居時期':
$newDetails['entry'] = $value;
break;
case 'バルコニー面積':
$newDetails['balcony'] = $value;
break;
default:
break;
}
}
$details[] = $newDetails;
}
}
var_dump($details);