Search code examples
jqueryhighlightingdiacritics

Highlight accented characters


I'm using the jquery.highlight plugin: http://code.google.com/p/gce-empire/source/browse/trunk/jquery.highlight.js?r=2

When I search an accented word, e.g. "café", it won't highlight the "café" matches... With "cafe" works fine, though.. So, I want to be able to highlight accented words... Any idea on how to do this with this plugin?

Thanks!

EDIT Ok, this is working, but I had to add a space between the first "(" and the last ")" on the 'pattern' and 'patternNoAccents' vars, because if there was an accented character at the beggining or end of the word, it wouldn't work (e.g. 'café')

The problem with this is that it highlights the word AND a space before and after. Is there any other solution?

jQuery.extend({
            highlight: function (node, re, nodeName, className) {
                if (node.nodeType === 3) {
                    var match = node.data.match(re);
                    if (match) {
                        var highlight = document.createElement(nodeName || 'span');
                        highlight.className = className || 'highlight';
                        var wordNode = node.splitText(match.index);
                        wordNode.splitText(match[0].length);
                        var wordClone = wordNode.cloneNode(true);
                        highlight.appendChild(wordClone);
                        wordNode.parentNode.replaceChild(highlight, wordNode);
                        return 1; //skip added node in parent
                    }
                } else if ((node.nodeType === 1 && node.childNodes) && // only element nodes that have children
                        !/(script|style)/i.test(node.tagName) && // ignore script and style nodes
                        !(node.tagName === nodeName.toUpperCase() && node.className === className)) { // skip if already highlighted
                    for (var i = 0; i < node.childNodes.length; i++) {
                        i += jQuery.highlight(node.childNodes[i], re, nodeName, className);
                    }
                }
                return 0;
            }
        });

        jQuery.fn.unhighlight = function (options) {
            var settings = { className: 'highlight', element: 'span' };
            jQuery.extend(settings, options);

            return this.find(settings.element + "." + settings.className).each(function () {
                var parent = this.parentNode;
                parent.replaceChild(this.firstChild, this);
                parent.normalize();
            }).end();
        };

        function stripAccents(str) { 
                var rExps=[ 
                {re:/[\xC0-\xC6]/g, ch:'A'}, 
                {re:/[\xE0-\xE6]/g, ch:'a'}, 
                {re:/[\xC8-\xCB]/g, ch:'E'}, 
                {re:/[\xE8-\xEB]/g, ch:'e'}, 
                {re:/[\xCC-\xCF]/g, ch:'I'}, 
                {re:/[\xEC-\xEF]/g, ch:'i'}, 
                {re:/[\xD2-\xD6]/g, ch:'O'}, 
                {re:/[\xF2-\xF6]/g, ch:'o'}, 
                {re:/[\xD9-\xDC]/g, ch:'U'}, 
                {re:/[\xF9-\xFC]/g, ch:'u'}, 
                {re:/[\xD1]/g, ch:'N'}, 
                {re:/[\xF1]/g, ch:'n'} ]; 
                for(var i=0, len=rExps.length; i<len; i++) 
                        str=str.replace(rExps[i].re, rExps[i].ch); 
                return str; 
        };

        jQuery.fn.highlight = function (words, options) {
            var settings = { className: 'highlight', element: 'span', caseSensitive: false, wordsOnly: false };
            jQuery.extend(settings, options);

            if (words.constructor === String) {
                words = [words];
            }
            words = jQuery.grep(words, function(word, i){
              return word != '';
            });
            words = jQuery.map(words, function(word, i) {
              return word.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
            });
            wordsNoAccents = jQuery.map(words, function(word, i) {
              return stripAccents(word);
            });

            if (words.length == 0) { return this; };

            var flag = settings.caseSensitive ? "" : "i";
            var pattern = "( " + words.join("|") + " )";

            if (settings.wordsOnly) {
                pattern = "\\b" + pattern + "\\b";
            }

            var patternNoAccents = "( " + wordsNoAccents.join("|") + " )";

            if (settings.wordsOnly) {
                patternNoAccents = "\\b" + patternNoAccents + "\\b";
            }

            var re = new RegExp(pattern, flag);
            var reNA = new RegExp(patternNoAccents, flag);

            console.log(re);
                console.log(reNA);
            return this.each(function () {
                jQuery.highlight(this, re, settings.element, settings.className);
                jQuery.highlight(this, reNA, settings.element, settings.className);
            });
        };

Solution

  • Check out this example. http://jsfiddle.net/bNPjQ/ It will highlight both Cafe and Café if you pass in Café. Not sure if this is the desired function, but I can help you out if you want it to only highlight Café.

    I started with the original plugin, added your stripAccents function, changed one line, and added a call to stripAccents in the highlight function. I marked everything I changed with a comment.

    When you run it, it will do a highlight without wordsOnly, then 2 seconds later unhighlight and do another highlight with wordsOnly.