Search code examples
phpxpathdomdocumentdomxpath

Xpath Query Won't Return Results


I'm trying to return some results from an Xpath query but it won't select the elements correctly. I'm using the following code:

public function getTrustPilotReviews($amount)
{
    $trustPilotUrl = 'https://www.trustpilot.co.uk/review/purplegriffon.com';
    $html5 = new HTML5;
    $document = $html5->loadHtml(file_get_contents($trustPilotUrl));
    $document->validateOnParse = true;
    $xpath = new DOMXpath($document);
    $reviewsDomNodeList = $xpath->query('//div[@id="reviews-container"]//div[@itemprop="review"]');
    $reviews = new Collection;

    foreach ($reviewsDomNodeList as $key => $reviewDomElement)
    {
        $xpath = new DOMXpath($reviewDomElement->ownerDocument);

        if ((int) $xpath->query('//*[@itemprop="ratingValue"]')->item($key)->getAttribute('content') >= 4)
        {
            $review = [
                'title'     => 'Test',
                'author'    => $xpath->query('//*[@itemprop="author"]')->item($key)->nodeValue,
                'date'      => $xpath->query('//*[@class="ndate"]')->item($key)->nodeValue,
                'rating'    => $xpath->query('//*[@itemprop="ratingValue"]')->item($key)->nodeValue,
                'body'      => $xpath->query('//*[@itemprop="reviewBody"]')->item($key)->nodeValue,
            ];

            $reviews->add((object) $review);
        }
    }

    return $reviews->take($amount);
}

This code won't return anything:

//div[@id="reviews-container"]//div[@itemprop="review"]

But if I change it to:

//*[@id="reviews-container"]//*[@itemprop="review"]

It partially works but does not return the correct results.


Solution

  • Thanks to Viper-7, biberu and salathe in the ##php IRC I have this working now using:

    public function getTrustPilotReviews($amount)
    {
        $context = stream_context_create(array('ssl' => array('verify_peer' => false)));
    
        $url = 'https://www.trustpilot.co.uk/review/purplegriffon.com';
        $data = file_get_contents($url, false, $context);
    
        libxml_use_internal_errors(true);
    
        $doc = new \DOMDocument();
        $doc->loadHTML($data);
    
        $xpath = new DOMXpath($doc);
    
        $reviews = new Collection;
    
        foreach($xpath->query('//div[@id="reviews-container"]/div[@itemprop="review"]') as $node)
        {
            $xpath = new DOMXpath($doc);
            $rating = $xpath->query('.//*[@itemprop="ratingValue"]', $node)->item(0)->getAttribute('content');
    
            if ($rating >= 4)
            {
                $review = [
                    'title'     => $xpath->evaluate('normalize-space(descendant::*[@itemprop="headline"]/a)', $node),
                    'author'    => $xpath->evaluate('normalize-space(descendant::*[@itemprop="author"])', $node),
                    'date'      => $xpath->evaluate('normalize-space(descendant::*[@class="ndate"])', $node),
                    'rating'    => $xpath->evaluate('number(descendant::*[@itemprop="ratingValue"]/@content)', $node),
                    'body'      => $xpath->evaluate('normalize-space(descendant::*[@itemprop="reviewBody"])', $node),
                ];
    
                $reviews->add((object) $review);
            }
        }
    
        return $reviews->take($amount);
    }