Search code examples
phpdomdocumentfile-get-contents

php for packaging any URL with embedded javascript and css?


We need a cronjob that creates a static version of our anonymous pages.

Each URL should be saved as a single HTML document, with all external <script src=""> tags replaced with their javascript and all <link href=""> tags replaced with their CSS. (css doesn't need to be inlined as attributes).

Before I re-invent the wheel, are there any simple packaging scripts in PHP to do this?


Solution

  • my simply solution...

    $ch = curl_init();
      curl_setopt($ch, CURLOPT_HEADER, false);
      curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
      curl_setopt($ch, CURLOPT_URL, $url);
      curl_setopt($ch, CURLOPT_REFERER, $url);
      curl_setopt($ch, CURLOPT_RETURNTRANSFER, TRUE);
      //curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
      //curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
      curl_setopt($ch, CURLOPT_VERBOSE, 1);
      $html = curl_exec($ch);
      if($errno = curl_errno($ch)) {
          $error_message = curl_strerror($errno) . ' -- ' . $url;
          die ("cURL error ({$errno}):\n {$error_message}");
      }
      curl_close($ch);
    
      $doc = new DOMDocument();
      $doc->strictErrorChecking = false;
      $doc->recover = true;
      $internalErrors = libxml_use_internal_errors(true);
      $doc->loadHTML($html);
      libxml_use_internal_errors($internalErrors);
    
      foreach($doc->getElementsByTagName('script') as $script) {
          if($script->hasAttribute('src')) {
              $path = $script->getAttribute('src');
              if (strpos($path, 'http') !== 0) {
                if (strpos($path, '?') > -1)  $path = substr($path, 0, strpos($path, '?'));
                $path = ROOT_CD . $path;
              }
              $source = file_get_contents($path);
    
              $new = $doc->createElement('script');
              $new->setAttribute('type', 'text/javascript');
              $new->setAttribute('sourcePath', $script->getAttribute('src'));
              $new->setAttribute('language', 'javascript');
              $source = $doc->createTextNode($source);
              $new->appendChild($source);
    
              $script->parentNode->replaceChild($new, $script);
          }
      }
    
      foreach($doc->getElementsByTagName('link') as $script) {
          if($script->hasAttribute('href') && $script->hasAttribute('rel')) {
              $type = $script->getAttribute('rel');
              if ($type !== 'stylesheet') continue;
    
              $path = $script->getAttribute('href');
              if (strpos($path, 'http') !== 0) {
                if (strpos($path, '?') > -1)  $path = substr($path, 0, strpos($path, '?'));
                $path = ROOT_CD . $path;
              }
              $source = file_get_contents($path);
    
              $new = $doc->createElement('style');
              $new->setAttribute('type', 'text/css');
              $new->setAttribute('sourcePath', $script->getAttribute('src'));
              $source = $doc->createTextNode($source);
              $new->appendChild($source);
    
              $script->parentNode->replaceChild($new, $script);
          }
      }
    
    
      $html = $doc->saveHTML();
      require_once '../preprocessing/minifier.php';
      $html = compressHTML($html);
        file_put_contents("static-login-".$platform.".html", $html);
        echo $html;