Search code examples
phpxpathdomdocumentdomxpath

Get all itemprop and itemprop depths


I have this code and it works more or less, the problem is some of them are empty and in wrong place in the array, and inside offers there's 3 other itemprops.

I dont want to hard code because I'm going to use it on multi websites.

function get_product_itemprop($url){
$url = file_get_contents($url);
$d = new DOMDocument();
$d->loadHTML($url);
$xpath = new DOMXPath($d);
$nodes = $xpath->query('//*[@itemprop]'); 

$new_data = array();
foreach ($nodes as $node) { 
    $new_data[$node->getAttribute("itemprop")] = trim(preg_replace('/\s+/', ' ',$node->nodeValue));
}
return $new_data;

}

Result of the function

    array(8) {
  ["breadcrumb"]=>
  string(38) "Home Atomizers & Coils Amor Mini coils"
  ["name"]=>
  string(15) "Amor Mini coils"
  ["sku"]=>
  string(5) "CO815"
  ["offers"]=>
  string(8) "$ 13.99"
  ["price"]=>
  string(0) ""
  ["priceCurrency"]=>
  string(0) ""
  ["availability"]=>
  string(0) ""
  ["url"]=>
  string(0) ""
  }

On http://search.google.com/structured-data/testing-tool I get all the itemprops and I want a similar structure they done but with an array:

http://imgur.com/KbNRvnG


Solution

  • You can iterate the attributes property:

    foreach ($nodes as $node) {
        foreach ($node->attributes as $attr) {
            $new_data[$attr->nodeName] []= $attr->nodeValue;
        }
    }
    

    Example

    $html = <<<'HTML'
    <html>
        <body>
            <div itemprop="10" a="20" b="30"></div>
            <div itemprop="40" a="50" z="60"></div>
        </body>
    </html>
    HTML;
    
    $d = new DOMDocument;
    $d->loadHTML($html);
    $xpath = new DOMXpath($d);
    $nodes = $xpath->query('//*[@itemprop]');
    
    
    $new_data = [];
    foreach ($nodes as $node) {
        foreach ($node->attributes as $attr) {
            $new_data[$attr->nodeName] []= $attr->nodeValue;
        }
    }
    var_dump($new_data);
    

    Output

    array(4) {
      ["itemprop"]=>
      array(2) {
        [0]=>
        string(2) "10"
        [1]=>
        string(2) "40"
      }
      ["a"]=>
      array(2) {
        [0]=>
        string(2) "20"
        [1]=>
        string(2) "50"
      }
      ["b"]=>
      array(1) {
        [0]=>
        string(2) "30"
      }
      ["z"]=>
      array(1) {
        [0]=>
        string(2) "60"
      }
    }
    

    The sample code fetches all elements in the document having itemprop property. If you want to fetch all elements with properties, use @*, e.g. //*[@*].