Search code examples
javascriptregexhtml-entities

Replacing   from javascript dom text node


I am processing xhtml using javascript. I am getting the text content for a div node by concatenating the nodeValue of all child nodes where nodeType == Node.TEXT_NODE.

The resulting string sometimes contains a non-breaking space entity. How do I replace this with a regular space character?

My div looks like this...

<div><b>Expires On</b> Sep 30, 2009 06:30&nbsp;AM</div>

The following suggestions found on the web did not work:

var cleanText = text.replace(/^\xa0*([^\xa0]*)\xa0*$/g,"");


var cleanText = replaceHtmlEntities(text);

var replaceHtmlEntites = (function() {
  var translate_re = /&(nbsp|amp|quot|lt|gt);/g;
  var translate = {
    "nbsp": " ",
    "amp" : "&",
    "quot": "\"",
    "lt"  : "<",
    "gt"  : ">"
  };
  return function(s) {
    return ( s.replace(translate_re, function(match, entity) {
      return translate[entity];
    }) );
  }
})();

Any suggestions?


Solution

  • This is much easier than you're making it. The text node will not have the literal string "&nbsp;" in it, it'll have have the corresponding character with code 160.

    function replaceNbsps(str) {
      var re = new RegExp(String.fromCharCode(160), "g");
      return str.replace(re, " ");
    }
    
    textNode.nodeValue = replaceNbsps(textNode.nodeValue);
    

    UPDATE

    Even easier:

    textNode.nodeValue = textNode.nodeValue.replace(/\u00a0/g, " ");