Search code examples
phpsqlxmldatabaserss

I am trying to retrieve the image URL from media:content tag RSS FEED and storing it in the mysql, but I am tired of trying


Here is a part from RSS feed

<item>
    <title>
        <![CDATA[
        Japan backs coronavirus drug; Moscow keeps lockdown: Live updates
        ]]>
    </title>
    <description>
        <![CDATA[
        <div><img src="https://www.aljazeera.com/mritems/Images/2020/5/9/b738bc043efb400296eff230bcfd444f_18.jpg" style="width: 100%;"><div>Over 275,000 have died worldwide while more than 1.3 million have recovered as countries weigh easing of lockdown rules.</div></div>
        ]]>
    </description>
    <link>
        https://www.aljazeera.com/news/2020/05/japan-backs-coronavirus-drug-moscow-lockdown-live-updates-200508232257831.html
    </link>
    <guid isPermaLink="false">592c6747e057269acbb31f383c9c110d</guid>
    <dc:creator>
        <![CDATA[ AJENews ]]>
    </dc:creator>
    <pubDate>Sat, 09 May 2020 01:18:50 GMT</pubDate>
    <media:content medium="image" url="https://www.aljazeera.com/mritems/Images/2020/5/9/b738bc043efb400296eff230bcfd444f_18.jpg"/>
</item>

Here loaded rss feed content by DOMDocument() and define array for save content and use getElementsByTagName

    <?php
       $rss = new DOMDocument();
        $rss->load('https://rss.app/feeds/7e40Pu6EiBz9ourE.xml');
        $feed = array();
        foreach ($rss->getElementsByTagName('item') as $node) {
            $item = array ( 
                'title' => $node->getElementsByTagName('title')->item(0)->nodeValue,
                'desc' => $node->getElementsByTagName('description')->item(0)->nodeValue,
                'link' => $node->getElementsByTagName('link')->item(0)->nodeValue,
                'date' => $node->getElementsByTagName('pubDate')->item(0)->nodeValue,
                //'dcUri' => $rss->lookupNamespaceUri('dc'),
                //'authors' => $node->getElementsByTagNameNS($item[dcUri], 'creator'),
                //'cre' => $item[authors]->item(0)->nodeValue,
                );
            array_push($feed, $item);
        } 

code for store array content in mysql database and i am successfully saved (title,des,date)

    $mysqli = new mysqli("localhost","root","","hmsdb");

    if (mysqli_connect_errno()) {
        printf("Connect failed: %s\n", mysqli_connect_error());
        exit();
    }
    $stmt = $mysqli->prepare("INSERT INTO `rssitems` (`title`, `description`, `link`, `pubDate`) VALUES (?, ?, ?, ?)");

    $stmt->bind_param('ssss', $title, $description, $link, $pubDate);

    foreach( $feed as $RssItem){
        $title = $RssItem["title"];
        $description = $RssItem["desc"];
        $link = $RssItem["link"];
        $pubDate = $RssItem["date"];
        //$creator= $RssItem["cre"];

        $stmt->execute();
    }

Solution

  • Here is a minimum reproducible example how to extract the image URL.

    • Iterate over all nodes
    • Nest a loop for all child Nodes
    • Grab the properties you need on the specific type
    $rss = new DOMDocument();
    $rss->load('https://rss.app/feeds/7e40Pu6EiBz9ourE.xml');
    $feed = array();
    foreach ($rss->getElementsByTagName('item') as $node) {
        foreach($node->childNodes as $childNode) {
            if($childNode->tagName == 'media:content') {
                var_dump($childNode->getAttribute('url'));
            }
        }
    }
    

    string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/1024c76c23214805817eff50f9eb45c0_18.jpg"
    string(90) "https://www.aljazeera.com/mritems/Images/2020/4/24/d8cba9945a604b88a1dbb413197d0db0_18.jpg"
    string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/bb5f767a4baf4d4098cf766f71a7b347_18.jpg"
    string(90) "https://www.aljazeera.com/mritems/Images/2020/5/10/6adcf99eaeda40bea68a519d95ad9fa5_18.jpg"
    string(90) "https://www.aljazeera.com/mritems/Images/2020/3/23/4cb97ca92a4543799b3b0c83e8d6df3f_18.jpg"