I'm building a webscraper where I loop through multiple pages and 20 items per page.
Can I return this or should I use echo? I use echo right now but want to switch to return to have more control over the output per file (using this function in multiple pages).
function function1(){
$link = $url;
$url_items = $url2;
for($pageNr=1; $pageNr<11; $pageNr++){
$client = new \GuzzleHttp\Client(['base_uri' => $link]);
$response = $client->request('GET', '/zoeken', [
'query' => [
'page' => $pageNr
]
]);
if($response->getStatusCode() === 200){
echo "Pagina: " . $pageNr . "<br/>";
for($item = 1; $item <=20; $item++){
$url = $url . "?" . $pageNr;
$html = file_get_html($url);
if(!empty($html)){
$content = $html->find("#resultitem-$item", 0)->find('.item-description2', 0)->find('.title', 0)->find('a', 0)->plaintext;
$content_url = $html->find("#resultitem-$item", 0)->find('.item-description2', 0)->find('.title', 0)->find('a', 0)->getAttribute('href', 0);
if(!empty($content)){
echo $item . ". <a href='" . $url_items . $content_url . "'>" . $content ."</a><br/>";
}
}
}
}
sleep(2);
}
}
I'm echoing a link back, but I want just the values so I can control it in my other pages.
I want to return $content
and $content_url
, how can I change this to return?
Make an array and then return it.
In your case here is an example:
function function1(){
$link = $url;
$url_items = $url2;
$dataArr = array();
for($pageNr=1; $pageNr<11; $pageNr++){
$client = new \GuzzleHttp\Client(['base_uri' => $link]);
$response = $client->request('GET', '/zoeken', [
'query' => [
'page' => $pageNr
]
]);
if($response->getStatusCode() === 200){
$data['pageNo'] = $pageNr;
for($item = 1; $item <=20; $item++){
$url = $url . "?" . $pageNr;
$html = file_get_html($url);
if(!empty($html)){
$content = $html->find("#resultitem-$item", 0)->find('.item-description2', 0)->find('.title', 0)->find('a', 0)->plaintext;
$content_url = $html->find("#resultitem-$item", 0)->find('.item-description2', 0)->find('.title', 0)->find('a', 0)->getAttribute('href', 0);
if(!empty($content)){
$data['content'] = $content;
$data['content_url'] = $content_url;
array_push($dataArr,$data);
}
}
}
}
sleep(2);
}
return $dataArr;
}