Search code examples
xmlregexapache-flexactionscriptstring-parsing

Actionscript/flex3: Escape XML special characters like <, >, " and ' inside/within tag contents


Is there a way of escaping XML special characters inside XML tag contents by only processing the XML as string? Can it be done using regular expression (regexp)? I get a runtime error of malformed XML when trying to create new XML() from the string because it is containing "<" character inside some tag.


Solution

  • You can use something like this:

     public static function escapeXMLTagContents(a_string:String):String{
            var l_indexOfSpecialChar:int = -1,
                l_tagsMatch:RegExp =/<(\?|[a-zA-Z_]{1}|\/{1})[^<]*?>/g,
                l_tags:Array = [],
                l_tagCharacterIndexes:Array = [],
                l_stringCopy:String = new String(a_string),
                i:int = -1,
                l_replaceArray:Array = [],
                l_return:String = "",
                l_tagCharIndex:int = -1,
                l_replaceChar:String = "";
    
            l_replaceArray.push("&|&amp;");
            l_replaceArray.push("<|&lt;");
            l_replaceArray.push(">|&gt;");
            l_replaceArray.push("\"|&quot;");
            l_replaceArray.push("'|&apos;");
    
            l_tags = a_string.match(l_tagsMatch);
            i = l_tags.length;
            while (--i > -1){
                var l_tagText:String = l_tags[i];
                var l_startIndex:int = l_stringCopy.lastIndexOf(l_tagText);
                var l_endIndex:int = l_startIndex + (l_tagText.length - 1);
    
                for (var j:int = l_startIndex; j <= l_endIndex; j++){
                    if(l_tagCharacterIndexes.indexOf(j) < 0){
                        l_tagCharacterIndexes.push(j);
                    }
                }
    
                l_stringCopy = l_stringCopy.substring(0, l_startIndex);
            }
    
            l_return = new String(a_string);
            for each (l_replaceChar in l_replaceArray){
                l_stringCopy = new String(l_return);
                while ((l_indexOfSpecialChar = l_stringCopy.lastIndexOf(l_replaceChar.charAt(0))) > -1) {
                    // determine if it char needs to be escaped (i.e is inside tag contents)
                    if(l_tagCharacterIndexes.indexOf(l_indexOfSpecialChar) == -1){
                        l_return = l_return.substring(0, l_indexOfSpecialChar) + l_replaceChar.split("|")[1] + l_return.substring(l_indexOfSpecialChar+1);
    
                        // adjust indexes
                        for (i = 0; i < l_tagCharacterIndexes.length; i++) {
                            l_tagCharIndex = l_tagCharacterIndexes[i];
                            if(l_tagCharIndex >= l_indexOfSpecialChar) {
                                l_tagCharacterIndexes[i] = l_tagCharacterIndexes[i] + String(l_replaceChar.split("|")[1]).length-1; // -1 from the old characther "&,<,>," or '"
                            }
                        }
                    }
    
                    l_stringCopy = l_stringCopy.substring(0, l_indexOfSpecialChar);
                }
            }
    
            return l_return;
        }