Search code examples
phpdomappendchildmicrodata

Extract <p> tag from the page and warp it around <div> using php


I have a php based dynamic website with FAQ page. I want to auto inject microdata into the text for SEO purposes. I have witten the code which get the html and auto inject the microdata into the html, and output the html with microdata but somehow, not getting the expected results.

I have tried the below php code:

<?php
$faq = "
<h3>What does the story's title mean?</h3>
<p>The beating of a heart which causes the narrator to go insane that he confesses to his crimes. So the title means he's telling (Tell) the story of the old man's beating heart (Tale Heart) that drove him insane.</p>
<h3>The narrator claims he is not mad. What evidence do we have that he is?</h3>
<p>The narrator's desire to eradicate the man's eye motivates his murder, but the narrator does not acknowledge that this act will end the man's life. By dismembering his victim, the narrator further deprives the old man of his humanity.</p>";

$dom = new DOMDocument();
$dom->loadHTML($faq);
$title = $dom->getElementsByTagName('h3');
$paras = $dom->getElementsByTagName('p');
foreach($title as $h){
    $h->setAttribute('class', 'faqy');
    $h->setAttribute('itemprop', 'name');
  }

foreach($paras as $p){
    $acceptedAnswer = $dom->createElement('div');
    $acceptedAnswer->setAttribute('itemscope','');
    $acceptedAnswer->setAttribute('itemprop','acceptedAnswer');
    $acceptedAnswer->setAttribute('itemtype','https://schema.org/Answer');
    foreach ($p->childNodes as $child) {
        $p->removeChild($child);
        $acceptedAnswer->appendChild($child);
    }
    $p->appendChild($acceptedAnswer);        
    $p->setAttribute('class', 'faqp');
    $p->setAttribute('itemprop', 'text');
    $p->setAttribute('style', 'display: none;');
 }
$html=$dom->saveHTML();

function wrapcallback( $buffer ){
        global $use_output_buffer;
        $delimiter='#';
        $tags=array('h1','h2','h3','h4','h5','h6');
        $query=implode('|', explode( $delimiter, sprintf( '//%s', implode( sprintf( '%s//', $delimiter ), $tags ) ) ) );
        $keepers=array();
        $parents=array();
        libxml_use_internal_errors( true );
        $dom=new DOMDocument;
        $dom->validateOnParse=false;
        $dom->recover=true;
        $dom->strictErrorChecking=false;
        $dom->preserveWhiteSpace=true;
        $dom->loadHTML( $buffer );
        $errors = libxml_get_errors();
        libxml_clear_errors();
        $xp=new DOMXPath( $dom );
        $col=$xp->query( $query );
        if( $col->length > 0 ){
            foreach( $col as $node ){
                $parents[]=$node->parentNode;
                $nodes=array( $node );
                while( $node = $node->nextSibling ){
                    if( in_array( $node->nodeName, $tags ) )break;
                    if( $node->nodeType==XML_ELEMENT_NODE  )$nodes[]=$node;
                }
                $keepers[]=$nodes;
            }
        }
        foreach( $keepers as $index => $obj ){
            $mainEntity=$dom->createElement('li');
            $mainEntity->setAttribute('class', 'faqq');
            $mainEntity->setAttribute('itemscope', '');
            $mainEntity->setAttribute('itemprop', 'mainEntity');
            $mainEntity->setAttribute('itemtype', 'https://schema.org/Question');
            $parents[ $index ]->appendChild( $mainEntity );
            foreach( $obj as $child )$mainEntity->appendChild( $child );
        }
        $keepers = $parents = $xp = $mainEntity = null;
        echo $dom->saveHTML();
    };
    $wraptext=$html;
?>
<div class="dfaq cfix" itemscope itemtype="https://schema.org/FAQPage">
<h2 class="faqt">FAQs</h2>
<ul class="faqs">
<?php wrapcallback($wraptext); ?>
</ul>
</div><!-- /.dfaq -->

The html output of the above php is

<div class="dfaq cfix" itemscope="" itemtype="https://schema.org/FAQPage">
    <h2 class="faqt">FAQs</h2>
    <ul class="faqs">
        <li class="faqq" itemscope="" itemprop="mainEntity" itemtype="https://schema.org/Question">
            <h3 class="faqy" itemprop="name">What does the story's title mean?</h3>
            <p class="faqp" itemprop="text" style="display: none;"></p>
            <div itemscope="" itemprop="acceptedAnswer" itemtype="https://schema.org/Answer">
                The beating of a heart which causes the narrator to go insane that he confesses to his crimes. So the title means he's telling (Tell) the story of the old man's beating heart (Tale Heart) that drove him insane.
            </div>
        </li>
        <li class="faqq" itemscope="" itemprop="mainEntity" itemtype="https://schema.org/Question">
            <h3 class="faqy" itemprop="name">The narrator claims he is not mad. What evidence do we have that he is?</h3>
            <p class="faqp" itemprop="text" style="display: none;"></p>
            <div itemscope="" itemprop="acceptedAnswer" itemtype="https://schema.org/Answer">
                The narrator's desire to eradicate the man's eye motivates his murder, but the narrator does not acknowledge that this act will end the man's life. By dismembering his victim, the narrator further deprives the old man of his
                humanity.
            </div>
        </li>
    </ul>
</div>

Expected html output / I need this html output:

<div class="dfaq cfix" itemscope="" itemtype="https://schema.org/FAQPage">
    <h2 class="faqt">FAQs</h2>
    <ul class="faqs">
        <li class="faqq" itemscope="" itemprop="mainEntity" itemtype="https://schema.org/Question">
            <h3 class="faqy" itemprop="name">What does the story's title mean?</h3>
            
            <div itemscope="" itemprop="acceptedAnswer" itemtype="https://schema.org/Answer">
                <p class="faqp" itemprop="text" style="display: none;">The beating of a heart which causes the narrator to go insane that he confesses to his crimes. So the title means he's telling (Tell) the story of the old man's beating heart (Tale Heart) that drove him insane.</p>
            </div>
        </li>
        <li class="faqq" itemscope="" itemprop="mainEntity" itemtype="https://schema.org/Question">
            <h3 class="faqy" itemprop="name">The narrator claims he is not mad. What evidence do we have that he is?</h3>
            <p class="faqp" itemprop="text" style="display: none;"></p>
            <div itemscope="" itemprop="acceptedAnswer" itemtype="https://schema.org/Answer">
                <p class="faqp" itemprop="text" style="display: none;">The narrator's desire to eradicate the man's eye motivates his murder, but the narrator does not acknowledge that this act will end the man's life. By dismembering his victim, the narrator further deprives the old man of his humanity.</p>
            </div>
        </li>
    </ul>
</div>

Solution

  • Create a div before the element. When you reappend a node it will be moved.

    $html = '<p>text</p>';
    
    $doc = new DOMDocument();
    $doc->loadHTML($html);
    
    $div = $doc->createElement('div');
    $div->setAttribute('class', 'wrapper');
    
    // Get the element
    $p = $doc->getElementsByTagName('p')->item(0);
    
    // Insert the new one before
    $divNode = $p->parentNode->insertBefore($div, $p);
    
    // Move the p into div
    $divNode->appendChild($p);
    
    echo $doc->saveHTML();
    

    Output

    <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
    <html><body><div class="wrapper"><p>text</p></div></body></html>