I have an array of keywords in strings:
var keywords = ["Hello World", "or"];
and I have a line of text, e.g.:
var text = "Hello World, Hello World";
I am using RegEx to find the keywords in the text to highlight so that my resulting html would be:
<span class="highlight">Hello World</span>, <span class="highlight">Hello World</span>
However, my RegEx result is returning me this:
[
0: "or" ----------> shouldn't it be "Hello World"?
index: 7
input: "Hello World, Hello World"
]
This is my code:
function searchFn(text, keywords) {
regex = new RegExp(keywords.join("|");
return regex.exec(text);
}
var text = "Hello World, Hello World";
var keywords = ["Hello World", "or"];
searchFn(text, keywords);
Is my RegEx wrong?
You need to sort the keywords by length in descending order, use unambiguous word boundaries, global modifier to match all occurrences and use it in a String#replace
method like this:
function searchFn(text, rx) {
return text.replace(rx, '$1<span class="highlight">$2</span>');
}
var text = "Hello World, Hello World,Hello (World)!";
var keywords = ["Hello World", "or", "Hello (World)"];
var regex = new RegExp(
"(^|\\W)(" +
keywords
.map(function(z) { return z.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); })
.sort(function(a, b) { return b.length - a.length; })
.join("|") +
")(?!\\w)", "g");
console.log(searchFn(text, regex));
The regex will look like (^|\W)(Hello World|or)(?!\w)
and will match Hello World
or or
words as whole words. Since you have special characters, you need to escape special characters in the keywords, and use unambigous word boundaries, (^|\W)
and (?!\w)
, which requires specific replacement pattern, namely, $1<span...>$2</span>
, since we do not want to truncate out the non-word char captured into Group 1 if it matched. Sorting is necessary in case you have both Hello World
and Hello
/ world
and you want to handle longer keywords first.