Search code examples
javascripthtmlcssgoogle-chrome-extensionfirefox-addon

Dynamically Create Span Around Each Word using JavaScript


I am looking to create a function in JavaScript which will create a <span> element around each word of any (realistically most) website. This is part of the content script of a browser extension, so I am not able to make changes to the HTML by editing it in a text editor, as it should work with whatever website the user loads into the browser. The goal is to be able to label each span with an index using an id tag. In other words, I want to be able to write code that just goes in and says "I want to change the CSS of word number 54 on the current site", and it can go in and select the span element with id=54 and run some code to change it (or something along those lines).

Below is the recursive function I currently have. (the yellow highlighting via CSS is just so I can see where the span elements are, and will be removed in the future). I also intend to add functionality where it will make the id differently if the integer id it's trying to use is already in use on the site. This mostly works, except it causes some undesired visual changes on some sites. On many sites it seems to make all the text slightly "larger" and more spread out, and on some sites it messes with the CSS in an undesirable way. For example, on the results of a DuckDuckGo search with dark mode on, it will remove the dark background and replace it with a white one, but will not change the color of the text, making the text very hard to see.

let initidx = 0;

function run_element(element, parent)
{
    if(element.nodeType == Node.ELEMENT_NODE)
    {
        let childList = [];
        for(let child of element.childNodes) //probably a better way to copy the list of children before it gets modified.
        {
            childList.push(child);
        }
        for(let child of childList)
        {
            run_element(child, element);
        }
    }
    else if(element.nodeType == Node.TEXT_NODE)
    {
        for(let word of element.nodeValue.split(" "))
        {
            if(word != "" && word != "\n")
            {
                let span = document.createElement('span');
                span.appendChild(document.createTextNode(word));
                span.id = initidx.toString();
                span.style = "background:yellow;"
                initidx++;
                parent.insertBefore(document.createTextNode('\xa0'), element);
                parent.insertBefore(span, element);
            }
        }
        parent.removeChild(element);
    }
}

run_element(document.body, null);

Is there any way I could improve this method of "indexing" the words (where a word is just defined by using split(" ")) in the site? I am not very experienced with web development, and I have never made a browser extension before. Thanks!


Solution

  • Ok. After much work, I think I have found an answer. The extent to which it "damages" the original site depends on the regex called "valid" in the below code. I spent a while tweaking it. The goal is to avoid making spans around strange characters which seem to be used for formatting on more complex sites. This still causes a tiny bit of damage to some websites' formatting, but this is much better than anything else I have come up with. This regex may need expanded to support special characters and characters not typically included in the English language. I also don't have a ton of experience with regex, so feedback is appreciated.

    function run_element(element) {
    const childNodes = [];
    for (let i of element.childNodes)
    {
        childNodes.push(i);
    }
    for (let node of childNodes)
    {
      if(node.nodeType == Node.TEXT_NODE)
      {
        let whitespaceRGX = new RegExp(/\s/);
        let valid = new RegExp(/\w|[\=\.,\(\);\:\?/\<\>\'\"`\-\+_\\\|\{\}\[\]\!@#\$%\^&\*]/);
        let str = node.nodeValue;
        let word = "";
        let whitespace = "";
        for (let i = 0; i < str.length; i++)
        {
          if(whitespaceRGX.test(str[i].toString()))
          {
            if(whitespace === "" && valid.test(word))
            {
              let newSpan = document.createElement('span');
              newSpan.appendChild(document.createTextNode(word));
              newSpan.id = initIdx.toString();
              newSpan.style = "background:yellow;";
              initIdx++;
              element.insertBefore(newSpan, node);
              word = "";
            }
            whitespace += str[i].toString();
          }
          else
          {
            if(word === "")
            {
              element.insertBefore(document.createTextNode(whitespace), node);
              whitespace = "";
            }
            word += str[i].toString();
          }
        }
        if(word !== "" && valid.test(word))
        {
          let newSpan = document.createElement('span');
          newSpan.appendChild(document.createTextNode(word));
          newSpan.id = initIdx.toString();
          newSpan.style = "background:yellow;";
          initIdx++;
          element.insertBefore(newSpan, node);
        }
        else if(whitespace !== "")
        {
          element.insertBefore(document.createTextNode(whitespace), node);
        }
        element.removeChild(node);
      }
      else if(element.nodeType == Node.ELEMENT_NODE && element.nodeName !== "STYLE" && element.nodeName !== "SCRIPT")
      {
        run_element(node);
      }
    }
    

    }