Search code examples
javascriptget

Rewriting GET calls from JS script from an other JS script


I'm trying to convert some (old) dynamic sites to static pages. So far it works fine but I have some included JS scripts that build URL to load some other JS files, and the URL building is complicated (so just searching and replacing URL don't work, and I want a generic method).

I want to "rewrite" the GET calls performed from the JS in order to map them to my own (local) URL.

I tried using this script (added at the begin of the HTML source)

console.log("Script loaded");
// Override the open method of XMLHttpRequest to intercept all requests
var originalOpen = XMLHttpRequest.prototype.open;
XMLHttpRequest.prototype.open = function(method, url) {
  console.log("Intercepted HTTP request: " + method + " " + url);
  originalOpen.apply(this, arguments);
};
console.log("Script end");

in a script that is loaded at first in the HTML source. In the console (from webtools in Firefox) I can see the "Script loaded" at the begin of logs (and the "Script end"), but the "GET <url>" in the network logs associated later to the JS script are not intercepted.

I also tried

console.log("Script loaded");
const nativeFetch = window.fetch
window.fetch = async (input, init) => {
  const request = new Request(input, init);
  console.log("request happened!", request.method, request.url);
  return nativeFetch(input, init)
}
console.log("Script end");

with the same (lack) of result. Note: both scripts come from Stack Overflow posts.

In the network console these GET look like:

(method) GET | (domain) <hostname> | (file) <URL path> | (initiator) <the infamous JS script> 

Both scripts don't log the console.log() directives, so it seems that nothing is intercepted.

Did I miss something? What is the right way to perform this? (if it is possible)

What I expect: I want to get a call to my JS function each time a script starts a GET call in order to intercept - and modify - the call to my own URL (that I still know).

To be more precise on the process: I download a HTML page from a site (to statify it). Then I download all the images, CSS and JS, rewriting URL (src, href…) to point to local version. But when loading the page in Firefox with network console I can still see GET to JS pages that are from the original site URL. And the initiator of these GET is a JS script. Rewriting the JS code is difficult and hazardous, so I'm trying to catch these GET (GET, here, is from the browser perspective) in order to re-route them to my local files. Hope this is more clear.


Solution

  • I don't really can determine what is making your approach to not work.

    But I'll risk recommending not loading your interceptor script, but instead hardcode it directly in the html document.

    Also, it would be good to attach your interceptor to top, self, window and globalThis.

    Here's an example:

    You only need the code inside the <script> tag in the HTML document.

    console.log('Script loaded!');
    {
      
      const exampleLink = 'https://code.jquery.com/jquery-3.7.1.min.js';
      
      const myScriptTag = document.createElement('script');
      
      myScriptTag.src = exampleLink;
      document.head.appendChild(myScriptTag);
      
      setInterval
      (
        () =>
        {
          const newLink = document.createElement('a');
          const linkUrl = newLink.href = exampleLink + '?r=' + String(Math.random() * Number.MAX_SAFE_INTEGER);
          
          generatedUrls.appendChild(newLink);
        },
        2000
      );
      
      fetch(exampleLink).then
      (
        Result =>
        {
          //console.log('Fetching Success:', exampleLink);
        }
      ).catch
      (
        Error =>
        {
          //console.log('Fetching Error:', Error.message);
        }
      );
      
      const myRequest = new XMLHttpRequest();
      
      myRequest.addEventListener
      (
        'readystatechange', stateEvent =>
        {
          //console.log('XMLHttpRequest open() state changed. New state:', stateEvent.target.readyState);
          
          if ( stateEvent.target.readyState == 4 && stateEvent.target.status == 200 )
          {
            //console.log('XMLHttpRequest open() was successful.');
          }
        }
      );
      
      myRequest.addEventListener
      (
        'error', errorEvent =>
        {
          //console.log('XMLHttpRequest open() produced an error.');
          //console.log('Error:', errorEvent.target.error?.message ?? errorEvent.target.error);
        }
      );
      
      myRequest.open('GET', exampleLink, true);
      
    }
    console.log('Script ended!');
    #observedUrls li
    {
      list-style-type: none;
    }
    #generatedUrls a
    {
      text-decoration: none;
    }
    #generatedUrls pre
    {
      display: inline-block;
      break-before: never;
      break-after: never;
    }
    <!DOCTYPE html>
    <html>
      
      <head>
        
        <!-- This should be your first <script> tag in the document -->
        
        <script>
          
          ///// MAIN OBSERVER FUNCTION /////
          
          /**
           * The code of this function need to be changed completely
           * to achieve your purposes because it was made this way
           * for testing with the stacksnippets.net.
           */
          function observeUrl ( theUrl, theSource )
          {
            if ( ! theUrl?.length ) return;
            
            const newMessage = oldCreate.call(document, 'li');
            {
              console.log
              (
                (
                  theSource.tagName
                  ? ('<' + theSource.tagName.toLowerCase() + '>')
                  : theSource.constructor.name
                )
                + ': ' + theUrl
              );
              
              newMessage.innerHTML =
              (
                '<strong>' +
                (
                  theSource.tagName
                  ? ('&lt;' + theSource.tagName.toLowerCase() + '&gt;')
                  : theSource.constructor.name
                )
                + ':</strong> ' + theUrl
                .replaceAll('&', '&amp;')
                .replaceAll('<', '&lt;')
                .replaceAll('>', '&gt;')
              );
            }
            
            if ( ['interactive', 'complete'].includes(document.readyState) )
            {
              observedUrls.appendChild(newMessage);
            }
            
            else
            {
              const eventListener = readyEvent =>
              {
                if ( ['complete'].includes(document.readyState) )
                {
                  observedUrls.appendChild(newMessage);
                }
              };
              
              document.addEventListener('readystatechange', eventListener);
            }
          }
          
          ///// OTHER OBSERVER FUNCTIONS /////
          
          function observeNew ( newElement )
          {
            if ( targetSubjects.elements.includes(newElement.tagName.toLowerCase()) )
            {
              observeElement(newElement);
              
              const superElement = Reflect.getPrototypeOf(newElement);
              
              for ( const theAttribute of targetSubjects.attributes )
              {
                const srcDescriptor = Reflect.getOwnPropertyDescriptor(superElement, theAttribute);
                
                if ( srcDescriptor )
                Reflect.defineProperty
                (
                  newElement, theAttribute,
                  {
                    get: function ()
                    {
                      return srcDescriptor.get.apply(newElement);
                    },
                    set: newValue =>
                    {
                      observeUrl(newValue, newElement);
                      return srcDescriptor.set.apply(newElement, [newValue]);
                    },
                  }
                );
              }
            }
            
            return newElement;
          }
          
          function observeElement ( theElement )
          {
            for ( const theAttribute of targetSubjects.attributes )
            {
              if ( (typeof theElement[theAttribute] != 'undefined') && (theElement[theAttribute] !== '') )
              {
                observeUrl(theElement[theAttribute], theElement);
              }
            }
          }
          
          ///// OBSERVER CONFIGURATION /////
          
          console.clear();
          
          const targetSubjects =
          {
            attributes:
            [
              'action', 'data', 'formaction',
              'href', 'location', 'src', 'srcset'
            ],
            elements:
            [
              'a', 'area', 'audio', 'base', 'button',
              'canvas', 'embed', 'fencedframe', 'figure',
              'form', 'frame', 'frameset', 'iframe',
              'img', 'input', 'link', 'object', 'picture',
              'portal', 'script', 'source', 'track', 'video'
            ],
          };
          
          const oldCreate = document.createElement;
          const oldElement = Element;
          const oldFetch = fetch;
          const oldOpen = XMLHttpRequest.prototype.open;
          
          ///// OBSERVER AUTOLOAD /////
          
          targetSubjects.elements.forEach
          (
            tagName =>
            {
              for ( const theElement of document.querySelectorAll(tagName) )
              {
                observeNew(theElement);
              }
            }
          );
          
          ///// OBSERVER INJECTIONS /////
          
          function installGlobal ( theName, theValue )
          {
            const allContexts = {top, parent, self, window, globalThis};
            
            for ( const theContext in allContexts )
            {
              if ( allContexts[theContext] != null )
              {
                try
                {
                  allContexts[theContext][theName] = theValue;
                }
                catch
                {
                  console.log('Global installation failed:', theContext + '.' + theName);
                }
              }
            }
          }
          
          installGlobal
          (
            'Element', function newElement ()
            {
              const theElement = new oldElement(...arguments);
              
              if ( targetSubjects.elements.includes(theElement.tagName.toLowerCase()) )
              {
                observeNew(theElement);
              }
              
              return theElement;
            }
          );
          
          Document.prototype.createElement =
          document.createElement = function newCreate ( tagName, ...Arguments )
          {
            const theElement = oldCreate.apply(this, arguments);
            
            if ( targetSubjects.elements.includes(theElement.tagName.toLowerCase()) )
            {
              observeNew(theElement);
            }
            
            return theElement;
          }
          
          XMLHttpRequest.prototype.open = function newOpen ( theMethod, theUrl )
          {
            observeUrl(theUrl, this);
            
            return oldOpen.apply(this, arguments);
          }
          
          installGlobal
          (
            'fetch', function newFetch ( theUrl, ...Arguments )
            {
              observeUrl(theUrl, this);
              
              return oldFetch.apply(this, arguments);
            }
          );
          
          Reflect.setPrototypeOf(Element, oldElement);
          Reflect.setPrototypeOf(HTMLElement, Element);
          
          document.addEventListener
          (
            'readystatechange', readyEvent =>
            {
              if ( document.readyState == 'interactive' )
              {
                observeNew(document.querySelector('html'));
              }
            }
          );
          
        </script>
        
        <!-- Any other <script> tag goes after this -->
        
      </head>
      
      <body>
        
        <code>
          <strong>Observed URLs:</strong>
          <menu id="observedUrls"></menu>
          
          <div id="generatedUrls"></div>
        </code>
      </body>
      
    </html>

    EDIT: I just implemented the <script> tag interception. As this is just an example of how to do it, there's other tags that should be observed as well (like <a>, <link>, <img> and many others that could have the src or href attribute. You just need to slightly change the code to achieve that.