I'd like to compare between two strings having html tags in different orders.
Example:
string str1="""<p><strong style="font-size: 36px; color: rgb(153, 51, 255);">Hello </strong><em><u>World</u></em></p>""";
string str2="""<p><strong style="color: rgb(153, 51, 255); font-size: 36px;">Hello </strong><em><u>World</u></em></p>""";
I care about the text as well as the style so I want the result of the comparison to be identical because it gives the same text with the same html result. However, a normal string comparison gives that the two strings are different.
how can I compare these two strings using c# not javascript.
to compare the 2 string you have to make few steps:
to do that you first need to add nuget package HtmlAgilityPack, here is sample code
class Program
{
static void Main()
{
string str1 = "<p><strong style=\"font-size: 36px; color: rgb(153, 51, 255);\">Hello </strong><em><u>World</u></em></p>";
string str2 = "<p><strong style=\"color: rgb(153, 51, 255); font-size: 36px;\">Hello </strong><em><u>World</u></em></p>";
bool areIdentical = AreHtmlStringsIdentical(str1, str2);
Console.WriteLine($"The HTML strings are identical: {areIdentical}");
}
static bool AreHtmlStringsIdentical(string html1, string html2)
{
var doc1 = new HtmlDocument();
doc1.LoadHtml(html1);
var doc2 = new HtmlDocument();
doc2.LoadHtml(html2);
string canonicalHtml1 = GetCanonicalHtml(doc1.DocumentNode);
string canonicalHtml2 = GetCanonicalHtml(doc2.DocumentNode);
return canonicalHtml1 == canonicalHtml2;
}
static string GetCanonicalHtml(HtmlNode node)
{
if (node.NodeType == HtmlNodeType.Text)
{
return NormalizeWhitespace(node.InnerText);
}
var builder = new StringBuilder();
builder.Append('<').Append(node.Name);
var sortedAttributes = node.Attributes.OrderBy(a => a.Name);
foreach (var attribute in sortedAttributes)
{
string value = attribute.Value;
if (attribute.Name == "style")
{
value = NormalizeStyleAttribute(value);
}
builder.Append(' ')
.Append(attribute.Name)
.Append("=\"")
.Append(NormalizeWhitespace(value))
.Append('"');
}
builder.Append('>');
foreach (var child in node.ChildNodes)
{
builder.Append(GetCanonicalHtml(child));
}
builder.Append("</").Append(node.Name).Append('>');
return builder.ToString();
}
static string NormalizeWhitespace(string input)
{
return Regex.Replace(input.Trim(), @"\s+", " ");
}
static string NormalizeStyleAttribute(string style)
{
var styles = style.Split(';')
.Select(s => s.Trim())
.Where(s => !string.IsNullOrEmpty(s))
.Select(s =>
{
var parts = s.Split(':');
return new
{
Name = parts[0].Trim(),
Value = parts.Length > 1 ? NormalizeWhitespace(parts[1].Trim()) : ""
};
})
.OrderBy(s => s.Name)
.Select(s => $"{s.Name}: {s.Value}");
return string.Join("; ", styles) + (styles.Any() ? ";" : "");
}
}