Search code examples
phpsimple-html-dom

How to do this loop thru all childs and sub childs until no more childs?


I want to loop thru all childs and sub childs till end of time. it starts from a HTML STRING, Some elements contains up to 7 or 8 layers of childs. How to do this the smart way?

<code>
include_once('simple_html_dom.php');
$style_array = array();
foreach(str_get_html($str)->find('*') as $element) {
$PARENT_NODE = new stdClass();
$PARENT_NODE->tag = $element->tag;
$PARENT_NODE->style = $element->style;
$PARENT_NODE->src = $element->src;
$PARENT_NODE->href = $element->href;
$PARENT_NODE->innertext = array();
if($element->hasChildNodes()) {
foreach(str_get_html($element->innertext)->find('*') as $element2) {
$CHILD_NODE_1 = new stdClass();
$CHILD_NODE_1->tag = $element2->tag;
$CHILD_NODE_1->style = $element2->style;
$CHILD_NODE_1->src = $element2->src;
$CHILD_NODE_1->href = $element2->href;
$CHILD_NODE_1->innertext = array();
if($element2->hasChildNodes()) {
foreach(str_get_html($element->innertext)->find('*') as $element3) {
$CHILD_NODE_2 = new stdClass();
$CHILD_NODE_2->tag = $element3->tag;
$CHILD_NODE_2->style = $element3->style;
$CHILD_NODE_2->src = $element3->src;
$CHILD_NODE_2->href = $element3->href;
$CHILD_NODE_2->innertext = $element3->innertext;
array_push($CHILD_NODE_1->innertext, $CHILD_NODE_2);
}
}else{
$CHILD_NODE_1->innertext = $element2->innertext;
}
array_push($PARENT_NODE->innertext, $CHILD_NODE_1);
}
}else{
$PARENT_NODE->innertext = $element->innertext;
}
array_push($style_array,array($PARENT_NODE));
};
echo var_export($style_array, true);  
</code>

Solution

  • This is the best I can help you with, showing how to recurse over simplexml to build a standard array/object structure.

    $html = simplexml_load_string( file_get_contents('https://en.wikipedia.org/wiki/HTML5') );
    
    $output = process_html( $html );
    
    print_r($output);
    
    function process_html( SimpleXMLElement $nodes )
    {
      $array = array();
      /* @var $node SimpleXMLElement */
      foreach( $nodes as $node )
      {
        $object = new stdClass();
        $object->tag = $node->getName();
        $object->text = trim( (string) $node );
        if( $node->attributes() )
        {
          $object->attributes = new stdClass();
          foreach( $node->attributes() as $attrKey => $attr )
          {
            $object->attributes->{$attrKey} = (string) $attr;
          }
        }
        if( count( $node->children() ) )
        {
          // Here is the recursion
          $object->children = process_html( $node->children() );
        }
        $array[] = $object;
      }
      return $array;
    }