I am trying to recursively parse a unprotected web directory (index of/), and list down all the files in it up to predefined depth. I have used the Simple HTML DOM for this.
below is the directory structure
--index of/
---dir1/
----dir1.1
-----file1
-----file2
----dir1.2
----dir1.3
----dir1.4
---dir2/
---dir3/
---dir4/
it shows
/dir1/dir1.1/file1
/dir1/dir1.1/file2
but in next iteration, it shows
/dir1/dir1.1/dir1.2/
This is the code
<?php
include 'simple_html_dom.php';
$baseurl = 'http://gawisp.com/perry/';
echo 'depth 0'.'</br>';
echo $baseurl.'</br>';
findLinks($baseurl, 1,10);
function findLinks($url, $depth, $maxDepth){
$html = file_get_html($url);
foreach($html->find('a') as $element)
echo ' '.$element->href.'</br>';;
flush();
ob_flush();
if ($depth <= $maxDepth){
foreach($html->find('a') as $element){
if(substr($element->href,-1,1) == '/'){
if($element->href != '../'){
$url = $url.$element->href;
echo '</br>depth'.$depth.'</br>';
echo $url.'</br>';
flush();
ob_flush();
$result = findLinks($url, $depth + 1, $maxDepth);
echo '......................... '.$result;
}
}
else{
echo '----------->'.$url.$element->href.'</br>';
}
}
unset($element);
}else
exit;
return 0;
}
?>
Please guide me to the right direction.
Try to change you code in such a way that the current folder $url
is not changed inside the loop:
// $url = $url.$element->href;
echo '</br>depth'.$depth.'</br>';
echo $url.$element->href.'</br>';
flush();
ob_flush();
$result = findLinks($url.$element->href, $depth + 1, $maxDepth);
echo '......................... '.$result;