I am using GetSafeHtmlFragment in my website and I found that all of tags except <p>
and <a>
is removed.
I researched around and I found that there is no resolution for it from Microsoft.
Is there any superseded for it or is there any solution?
Thanks.
An alternative solution would be to use the Html Agility Pack in conjunction with your own tags white list :
using System;
using System.IO;
using System.Text;
using System.Linq;
using System.Collections.Generic;
using HtmlAgilityPack;
class Program
{
static void Main(string[] args)
{
var whiteList = new[]
{
"#comment", "html", "head",
"title", "body", "img", "p",
"a"
};
var html = File.ReadAllText("input.html");
var doc = new HtmlDocument();
doc.LoadHtml(html);
var nodesToRemove = new List<HtmlAgilityPack.HtmlNode>();
var e = doc
.CreateNavigator()
.SelectDescendants(System.Xml.XPath.XPathNodeType.All, false)
.GetEnumerator();
while (e.MoveNext())
{
var node =
((HtmlAgilityPack.HtmlNodeNavigator)e.Current)
.CurrentNode;
if (!whiteList.Contains(node.Name))
{
nodesToRemove.Add(node);
}
}
nodesToRemove.ForEach(node => node.Remove());
var sb = new StringBuilder();
using (var w = new StringWriter(sb))
{
doc.Save(w);
}
Console.WriteLine(sb.ToString());
}
}