Search code examples
phpsleepexecution

PHP web-app cripples entire website


I have a web-app that fetches the content of various sources to test for the existence of a certain keyword in that text.

The problem is is that when that program runs the entire website becomes unresponsive.

I have tried adding sleep(1); everywhere so it get's some break time, but the only thing that does is make the execution longer and thusly keep my website unresponsive for a longer period.

Is there another solution to prevent one program to halt the entire website or affect the entire website?

Here's the code example(please don't mind the use of mysql instead of mysqli... The website was handed to me as is, and for the moment it's too much work to revamp the website to mysqli)

function updatethestuff()
        {
        $count = 0;
        error_reporting(E_ALL);
        ini_set('display_errors', '1');
        $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt','Begin met het verwerken van alle rssfeeds'."\n",'w');
        $feeds = $this->db->query("SELECT * FROM rsscanner.feeds where `url` not like 'disabled' order by `id` asc");
        foreach($feeds->result() as $feed)
            {

            if(!isset($this->rssparser))
                {
                $this->load->library('RSSParser', array('url' => $feed->url, 'life' => 2));
                }
            else
                {
                $this->rssparser->reinit(array('url' => $feed->url, 'life' => 2));
                }
            $this->write_file(FCPATH.'/pagemodules/rssfeedlog.txt','Begin met parsen van resultaten van ' . $feed->naam . "\n");
            $data = $this->rssparser->getFeed(20);
            foreach ($data as $item) 
                {
                $innerHTML='';
                $link = mysql_real_escape_string($item['link']);
                $query = $this->db->query("SELECT feed_id from rsscanner.rsscontent where url='$link' limit 0,1");
                $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': Aantal keren dat ' . $link . ' al voorkomt is ' . $query->num_rows() . "\n");
                sleep(1);
                if($query->num_rows() == 0)
                    {                   
                    $dom = new DOMDocument();
                    $dom->validateOnParse = false;
                    $html = get_data($item['link']);
                    $dom->loadHTML($html);
                    $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': HTML data geladen' . "\n");
                    sleep(1);
                    if(isset($feed->contentdiv))
                        {
                        $x = $dom->getElementById($feed->contentdiv);
                        $tmp_dom = new DOMDocument(); 
                        $tmp_dom->appendChild($tmp_dom->importNode($x, true)); 
                        $innerHTML.=trim($tmp_dom->saveHTML()); 
                        $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': Informatie uit de contentdiv("' . $feed->contentdiv . '") geladen'. "\n");
                        sleep(1);
                        }
                    else
                        {
                        $finder = new DomXPath($dom);
                        $nodes = $finder->query("//*[contains(concat(' ', normalize-space(@class), ' '), ' {$feed->containerclass} ')]");
                        $tmp_dom = new DOMDocument(); 
                         foreach ($nodes as $node) 
                            {
                            $tmp_dom->appendChild($tmp_dom->importNode($node,true));
                            }
                        $innerHTML.=trim($tmp_dom->saveHTML()); 
                        $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': Informatie uit de content class("' . $feed->containerclass . '") geladen' . "\n");
                        sleep(1);
                        }
                    unset($dom);
                    unset($tmp_dom);
                    $innerHTML = mysql_real_escape_string(stripslashes($innerHTML));
                    $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': html veilig gemaakt voor database'. "\n");
                    sleep(1);
                    $q=$this->db->query("SELECT * FROM `rsscanner`.`zoekwoorden` union select * from `rsscanner`.`zoekwoord_synoniem`");

                    $found = false;
                    $keywordsstring = "";
                    $lowerhtml = strtolower($innerHTML);
                    $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': Begin met zoekwoord vergelijking'. "\n");
                    sleep(1);
                    foreach($q->result() as $r)
                        {
                        if(strpos($lowerhtml,strtolower($r->zoekwoord)) !== false)
                            {
                            $found = true;
                            $keywordsstring = "$keywordsstring ".$r->zoekwoord;
                            $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': - Match met zoekwoord: ' . $r->zoekwoord . "\n");
                            sleep(1);
                            }

                        }
                    if($found)
                        {

                        $keywordsstring = mysql_real_escape_string($keywordsstring);
                        $title = mysql_real_escape_string($item['title']);
                        $desc = mysql_real_escape_string(strip_tags($item['description']));
                        $date = mysql_real_escape_string(date('Y-m-d',strtotime($item['pubDate'])));
                        $this->db->query("INSERT INTO rsscanner.rsscontent (feed_id,title,description,published,url,content,keywords_found) 
                                                                    VALUES ('{$feed->id}','$title','$desc','$date','$link','$innerHTML','$keywordsstring')");
                        $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': Opslaan in database als geldig zoekresultaat' . "\n");

                        }
                    else
                        {

                        $keywordsstring = mysql_real_escape_string($keywordsstring);
                        $title = mysql_real_escape_string($item['title']);
                        $desc = mysql_real_escape_string(strip_tags($item['description']));
                        $date = mysql_real_escape_string(date('Y-m-d',strtotime($item['pubDate'])));
                        $this->db->query("INSERT INTO rsscanner.rsscontent (feed_id,title,description,published,url,content,keywords_found) 
                                                                    VALUES ('{$feed->id}','$title','$desc','1983-05-10 09:10:53','$link','$innerHTML','NO MATCH')");
                        $this->write_file(FCPATH . '/pagemodules/rssfeedlog.txt',$feed->naam . ': markeren in database als een no match' . "\n");
                        }
                    sleep(1);
                    }
                }
            }
        echo "done";
        }

Solution

  • Would it be an option to put this task as a background process?

    For example use Gearman

    http://www.php.net/manual/en/intro.gearman.php