Search code examples
javascriptregexsearchmatchregex-group

RegEx to match phrases with spaces


I have an array of keywords in strings:

var keywords = ["Hello World", "or"];

and I have a line of text, e.g.:

var text = "Hello World, Hello World";

I am using RegEx to find the keywords in the text to highlight so that my resulting html would be:

<span class="highlight">Hello World</span>, <span class="highlight">Hello World</span>

However, my RegEx result is returning me this:

[
 0: "or"                          ----------> shouldn't it be "Hello World"?
 index: 7
 input: "Hello World, Hello World"
]

This is my code:

function searchFn(text, keywords) {
  regex = new RegExp(keywords.join("|");
  return regex.exec(text);
}

var text = "Hello World, Hello World";
var keywords = ["Hello World", "or"];

searchFn(text, keywords);

Is my RegEx wrong?


Solution

  • You need to sort the keywords by length in descending order, use unambiguous word boundaries, global modifier to match all occurrences and use it in a String#replace method like this:

    function searchFn(text, rx) {
      return text.replace(rx, '$1<span class="highlight">$2</span>');
    }
    
    var text = "Hello World, Hello World,Hello (World)!";
    var keywords = ["Hello World", "or", "Hello (World)"];
    var regex = new RegExp(
    	"(^|\\W)(" + 
    	keywords
    		.map(function(z) { return z.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); })
    	.sort(function(a, b) { return b.length - a.length; })
    	.join("|") +
    ")(?!\\w)", "g");
    console.log(searchFn(text, regex));

    The regex will look like (^|\W)(Hello World|or)(?!\w) and will match Hello World or or words as whole words. Since you have special characters, you need to escape special characters in the keywords, and use unambigous word boundaries, (^|\W) and (?!\w), which requires specific replacement pattern, namely, $1<span...>$2</span>, since we do not want to truncate out the non-word char captured into Group 1 if it matched. Sorting is necessary in case you have both Hello World and Hello / world and you want to handle longer keywords first.