Search code examples
phpxmldomdomdocument

Iterating over elements from DOMDocument::getElementsByTagName() doesn't work


I have this tiny class that will help me to replace custom tags with valid HTML tags. My issue with it is, it replaces only the first custom tag for whatever reason. My guess is that I'm breaking the reference somewhere, but I can't figure out where... Scroll down to the bottom of this post to see the actual result and the expected output.

<?php
class DomParser {

    protected $tags = [];
    protected $document;

    public function __construct($html) {
        $this->document = new DOMDocument();
        $this->document->loadXML($html);
    }

    public function addTag(string $name, callable $callable) {
        $this->tags[$name] = $callable;
    }

    public function replace() {
        foreach ($this->tags as $name => $callable) {
            $elements = $this->document->getElementsByTagName($name);

            foreach ($elements as $element) {
                $callable($element, $this->document);
            }
        }

        return $this->document->saveHTML();
    }
}

Example code to run the class:

<?php
require_once 'DomParser.php';
//require_once 'RenameTag.php';
//require_once 'Container.php';

$html = '<html>
    <container>
        <col>
            <p>
                <test attribute="test" attribute2="this">test<br />test2</test>
            </p>
        </col>
        <col>
            test col
        </col>
    </container>
    <container fluid="test"><test>dsdshsh</test></container>
</html>';

$parser = new DomParser($html);

//$parser->addTag('test', RenameTag::create('othertag'));
//$parser->addTag('container', Container::create());

$parser->addTag('col', function($oldTag) {
    $document = $oldTag->ownerDocument;

    $newTag = $document->createElement('div');
    $oldTag->parentNode->replaceChild($newTag, $oldTag);

    foreach (iterator_to_array($oldTag->childNodes) as $child) {
        $newTag->appendChild($oldTag->removeChild($child));
    }

    $newTag->setAttribute('class', 'col');
});

echo $parser->replace();

I'm getting this result:

<html>
        <container>
                <div class="col">
                        <p>
                                <test attribute="test" attribute2="this">test<br>test2</test>
                        </p>
                </div>
                <col>
        </container>
        <container fluid="true"><test>dsdshsh</test></container>
</html>

The expected output should be:

<html>
        <container>
                <div class="col">
                        <p>
                                <test attribute="test" attribute2="this">test<br>test2</test>
                        </p>
                </div>
                <div class="col">
                    test col
                </div>
        </container>
        <container fluid="test"><test>dsdshsh</test></container>
</html>

Solution

  • The problem seems to be that you are changing the document structure while trying to iterate over it.

    The alternative is to use XPath, which will take it's own copy of the nodes for you to loop over, the changes are fairly small, but will give the output your after...

    public function replace() {
        $xp = new DOMXPath($this->document);
    
        foreach ($this->tags as $name => $callable) {
            $elements = $xp->query("//".$name);
            foreach ($elements as $element) {
                $callable($element, $this->document);
            }
        }
    
        return $this->document->saveHTML();
    }