I'm working on C# app to get html file from current IE tab by EnumWindows. Now I got HTMLDocument and can parse it to html file from outerHTML ({HTMLDocument}.documentElement.outerHTML) by HtmlAgilityPack, but my output html file has not doctype.
I see that HTMLDocument has doctype property, how can I parse it to string as same as tag in html file
I got it by casting htmlDocument.doctype
as dynamic object. Another, you can get other tags which are out of <html>
tag by looping on (dynamic)htmlDocument.childNodes
list
private static void InsertDocType(HTMLDocument htmlDocument, HtmlDocument document)
{
// get html node
HtmlNode htmlNode = document.DocumentNode.SelectSingleNode("/html");
// get doctype node from HTMLDocument
var doctype = (dynamic)htmlDocument.doctype;
StringBuilder doctypeText = new StringBuilder();
doctypeText.Append("<!DOCTYPE");
doctypeText.Append(" ");
doctypeText.Append(doctype.name);
// add PUBLIC
if (!string.IsNullOrEmpty(doctype.publicId))
{
doctypeText.Append(" PUBLIC \"");
doctypeText.Append(doctype.publicId);
doctypeText.Append("\"");
}
// add sytem id
if (!string.IsNullOrEmpty(doctype.systemId))
{
doctypeText.Append(" \"");
doctypeText.Append(doctype.systemId);
doctypeText.Append("\"");
}
// add close tag
doctypeText.Append(">");
doctypeText.Append(Environment.NewLine);
HtmlCommentNode doctypeNode = document.CreateComment(doctypeText.ToString());
document.DocumentNode.InsertBefore(doctypeNode, htmlNode);
}