Search code examples
phploopsweb-scrapingforeach

Combine 2 separate foreach loops


so far ive managed to scrape 2 elements from an external site on to my test page: http://mt-cloud.co.uk/nhs/

(Please do a test search on the page to view results)

$ch = curl_init('http://www.nhs.uk/service-search/GP/m410ux/Results/4/-2.35167407989502/53.4519462585449/4/0?distance=25');
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
$output = curl_exec($ch);
curl_close($ch);

$document = new DOMDocument;
libxml_use_internal_errors(true);
$document->loadHTML($output);
$document->encoding = 'utf-8';
$xpath = new DOMXPath($document);
$pnames = $xpath->query("//th[@class='fctitle']");
$addresses = $xpath->query("//td[@class='fcdetails fc-first']");

I have 2 foreach loops:

foreach ($pnames as $pname) {
$result1 = $document->saveHTML($pname);
}


foreach ($addresses as $address) {
$result2 = $document->saveHTML($address);
}

$result1 = Name of a GP Practice $result2 = Address of GP Practice

As you see on the test page my result 1 and 2 are seperated..how do i get the them so i have Practice name and Practice Address together?

UPDATE (@Tri)

for($i = 0; $i < count($pnames); $i++){
$name= $document->saveHTML($pnames[$i]);
$name=str_replace ('<a href="/Services/', '<a href="http://www.nhs.uk/Services/', $name);
$address = $document->saveHTML($addresses[$i]);

echo $name.'<br>'.$address;
}

Only returns one result rather than all

Here is my full php code: http://mt-cloud.co.uk/nhs/content/code

Image of data i'm trying to scrape http://mt-cloud.co.uk/nhs/content/results.png


Solution

  • You first code was OK, you just need to store your names and addresses in an 2 dimensional array and then loop through your array.

    This part of the code is exactly same as yours:

    $ch = curl_init('http://www.nhs.uk/service-search/GP/m410ux/Results/4/-2.35167407989502/53.4519462585449/4/0?distance=25');
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
    $output = curl_exec($ch);
    curl_close($ch);
    
    $document = new DOMDocument;
    libxml_use_internal_errors(true);
    $document->loadHTML($output);
    $document->encoding = 'utf-8';
    $xpath = new DOMXPath($document);
    $pnames = $xpath->query("//th[@class='fctitle']");
    $addresses = $xpath->query("//td[@class='fcdetails fc-first']");
    

    Now we will create an array:

    $results = array();
    

    Then use your loops and store names and addresses as pairs in same indexs of array:

    $iCnt = 0;
    foreach ($pnames as $pname){
        $results[$iCnt]['name'] = $document->saveHTML($pname);
        $iCnt++;
    }
    
    $iCnt = 0;
    foreach ($addresses as $address){
        $results[$iCnt]['address'] = $document->saveHTML($address);
        $iCnt++;
    }
    

    Now we have an array with pairs of names and addresses and if we loop through it, we can see them together:

    for($iCnt = 0, $cnt = count($results); $iCnt < $cnt; $iCnt++){
        echo 'Name: '.$results[$iCnt]['name'].'<br>';
        echo 'Address: '.$results[$iCnt]['address'].'<br>';
    }
    

    That's all. The complete code will look like this:

    <?php
    $ch = curl_init('http://www.nhs.uk/service-search/GP/m410ux/Results/4/-2.35167407989502/53.4519462585449/4/0?distance=25');
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); 
    $output = curl_exec($ch);
    curl_close($ch);
    
    $document = new DOMDocument;
    libxml_use_internal_errors(true);
    $document->loadHTML($output);
    $document->encoding = 'utf-8';
    $xpath = new DOMXPath($document);
    $pnames = $xpath->query("//th[@class='fctitle']");
    $addresses = $xpath->query("//td[@class='fcdetails fc-first']");
    
    $results = array();
    
    $iCnt = 0;
    foreach ($pnames as $pname){
        $results[$iCnt]['name'] = $document->saveHTML($pname);
        $iCnt++;
    }
    
    $iCnt = 0;
    foreach ($addresses as $address){
        $results[$iCnt]['address'] = $document->saveHTML($address);
        $iCnt++;
    }
    
    for($iCnt = 0, $cnt = count($results); $iCnt < $cnt; $iCnt++){
        echo 'Name: '.$results[$iCnt]['name'].'<br>';
        echo 'Address: '.$results[$iCnt]['address'].'<br>';
    }
    ?>