Search code examples
c#xmlxmldocumentsgml

How can I stop empty XML elements self-closing using XmlDocument in C#?


Before I get jumped on by people saying the XML parser shouldn’t care if the elements are empty or self-closed, there is a reason why I can’t allow self-closed XML elements. The reason is that I’m actually working with SGML not XML and the SGML DTD I’m working with is very strict and doesn't allow it.

What I have is several thousand SGML files which I’ve needed to run XSLT on. I’ve therefore had to convert the SGML to XML temporarily in order to apply the XSLT. I’ve then written a method that converts them back to SGML (essentially just replacing the XML declaration with the SGML declaration and writing back any other entity declarations such as graphic entities).

My problem is that after this conversion back to SGML, when I open the files in my SGML editor, the files don’t parse as the empty elements have been self-closed.

Does anybody know how I can stop this happening please when using XmlDocument?

The methods that convert the SGML to XML and back again are shown below

//converts the SGML file to XML – it’s during this conversion that the 
//empty elements get self-closed, i think

private XmlDocument convertToXML(TextReader reader)
        {
            // setup SgmlReader
            Sgml.SgmlReader sgmlReader = new Sgml.SgmlReader();
            //sgmlReader.DocType = "HTML";
            sgmlReader.WhitespaceHandling = WhitespaceHandling.All;
            sgmlReader.CaseFolding = Sgml.CaseFolding.ToLower;
            sgmlReader.InputStream = reader;


            // create document
            XmlDocument doc = new XmlDocument();
            doc.PreserveWhitespace = true;

            doc.XmlResolver = null;
            doc.Load(sgmlReader);
            return doc;
        }

// method to apply the XSLT stylesheet to the XML document

private void filterApplic(string applicFilter)
        {
            string stylesheet = getRequiredStylesheet(); // do this just once

            if (stylesheet != "")
            {
                foreach (string file in FilesToConvert)
                {
                    fileName = Path.GetFileName(file); //gets just the file name from the path
                    fileNameNoExt = Path.GetFileNameWithoutExtension(file);
                    string ext = Path.GetExtension(file);

                    if (ext == ".sgm")
                    {
                        try
                        {
                            publicIdentifier = getDoctype(file); // gets the sgml declaration
                            entitiesList = getEntitites(file); // gets the list of entities

                            TextReader tr = new StreamReader(file);
                            myDoc = convertToXML(tr);

                            myDoc.Save(outputFolder + "\\temp.xml");

                            var myXslTrans = new XslCompiledTransform();

                            myXslTrans.Load(stylesheet);
                            myXslTrans.Transform(outputFolder + "\\temp.xml", Path.Combine(outputFolder, fileNameNoExt +".xml"));

                            XmlDocument convertedDoc = new XmlDocument();
                           convertedDoc.Load(Path.Combine(outputFolder, fileNameNoExt + ".xml"));

                            convertToSGM(convertedDoc);

                            filesTransformed++;
                        }
                        catch (Exception e)
                        {
                            MessageBox.Show(e.ToString());
                        }

                    }
                }
            }
            else
            {
                MessageBox.Show("The stylesheet was retured empty. Cannot perform Applicability filter.");
                return;
            }


            MessageBox.Show("Complete! " + filesTransformed.ToString() + " files filtered for " + applicFilter);
        }


//convert files back to SGML
private void convertToSGM(XmlDocument myDoc)
        {

            using (var stringWriter = new StringWriter())
            using (var xmlTextWriter = XmlWriter.Create(stringWriter, settings))
            {

                myDoc.WriteTo(xmlTextWriter);
                xmlTextWriter.Flush();


                string xmltext = stringWriter.GetStringBuilder().ToString();

                xmltext = xmltext.Replace("<?xml version=\"1.0\" encoding=\"utf-16\"?>", "<!DOCTYPE DMODULE " + publicIdentifier + ">");
                xmltext = xmltext.Replace("<?xml version=\"1.0\" encoding=\"utf-8\"?>", "<!DOCTYPE DMODULE " + publicIdentifier + ">");

                if (entitiesList.Count != 0)
                {
                    string entityListAsOne = "";

                    foreach (string entity in entitiesList)
                    {
                        entityListAsOne = entityListAsOne + "\r\n" + entity;
                    }

                    xmltext = xmltext.Replace("//EN\">", "//EN\" [" + entityListAsOne + "]>");
                }

                File.WriteAllText(Path.Combine(outputFolder, fileNameNoExt + ".sgm"), xmltext);
            }


        }

Solution

  • One way to do it would be to subclass an appropriate XmlWriter and override WriteEndElement() to call WriteFullEndElement().

    For instance, here's a subclassed version of XmlTextWriter that does the job:

    public class FullElementXmlTextWriter : XmlTextWriter
    {
        public FullElementXmlTextWriter(TextWriter w) : base(w) { }
    
        public FullElementXmlTextWriter(Stream w, Encoding encoding) : base(w, encoding) { }
    
        public FullElementXmlTextWriter(string filename, Encoding encoding) : base(filename, encoding) { }
    
        public override void WriteEndElement()
        {
            base.WriteFullEndElement();
        }
    }
    

    Then use it like:

    string xmltext;
    using (var stringWriter = new StringWriter())
    {
        using (var xmlTextWriter = new FullElementXmlTextWriter(stringWriter))
        {
            myDoc.WriteTo(xmlTextWriter);
        }
        xmltext = stringWriter.ToString();
    }
    

    Or, if you need the control afforded by XmlWriterSettings, you can use the decorator pattern to encapsulate any XmlWriter in a decorator that automatically remaps calls from WriteEndElement() to WriteFullEndElement():

    public class FullElementXmlWriterDecorator : XmlWriterDecorator
    {
        public FullElementXmlWriterDecorator(XmlWriter baseWriter) : base(baseWriter) { }
    
        public override void WriteEndElement()
        {
            base.WriteFullEndElement();
        }
    }
    
    public class XmlWriterDecorator : XmlWriter
    {
        readonly XmlWriter baseWriter;
    
        public XmlWriterDecorator(XmlWriter baseWriter)
        {
            if (baseWriter == null)
                throw new ArgumentNullException();
            this.baseWriter = baseWriter;
        }
    
        protected virtual bool IsSuspended { get { return false; } }
    
        public override void Close()
        {
            baseWriter.Close();
        }
    
        public override void Flush()
        {
            baseWriter.Flush();
        }
    
        public override string LookupPrefix(string ns)
        {
            return baseWriter.LookupPrefix(ns);
        }
    
        public override void WriteBase64(byte[] buffer, int index, int count)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteBase64(buffer, index, count);
        }
    
        public override void WriteCData(string text)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteCData(text);
        }
    
        public override void WriteCharEntity(char ch)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteCharEntity(ch);
        }
    
        public override void WriteChars(char[] buffer, int index, int count)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteChars(buffer, index, count);
        }
    
        public override void WriteComment(string text)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteComment(text);
        }
    
        public override void WriteDocType(string name, string pubid, string sysid, string subset)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteDocType(name, pubid, sysid, subset);
        }
    
        public override void WriteEndAttribute()
        {
            if (IsSuspended)
                return;
            baseWriter.WriteEndAttribute();
        }
    
        public override void WriteEndDocument()
        {
            if (IsSuspended)
                return;
            baseWriter.WriteEndDocument();
        }
    
        public override void WriteEndElement()
        {
            if (IsSuspended)
                return;
            baseWriter.WriteEndElement();
        }
    
        public override void WriteEntityRef(string name)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteEntityRef(name);
        }
    
        public override void WriteFullEndElement()
        {
            if (IsSuspended)
                return;
            baseWriter.WriteFullEndElement();
        }
    
        public override void WriteProcessingInstruction(string name, string text)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteProcessingInstruction(name, text);
        }
    
        public override void WriteRaw(string data)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteRaw(data);
        }
    
        public override void WriteRaw(char[] buffer, int index, int count)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteRaw(buffer, index, count);
        }
    
        public override void WriteStartAttribute(string prefix, string localName, string ns)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteStartAttribute(prefix, localName, ns);
        }
    
        public override void WriteStartDocument(bool standalone)
        {
            baseWriter.WriteStartDocument(standalone);
        }
    
        public override void WriteStartDocument()
        {
            baseWriter.WriteStartDocument();
        }
    
        public override void WriteStartElement(string prefix, string localName, string ns)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteStartElement(prefix, localName, ns);
        }
    
        public override WriteState WriteState
        {
            get { return baseWriter.WriteState; }
        }
    
        public override void WriteString(string text)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteString(text);
        }
    
        public override void WriteSurrogateCharEntity(char lowChar, char highChar)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteSurrogateCharEntity(lowChar, highChar);
        }
    
        public override void WriteWhitespace(string ws)
        {
            if (IsSuspended)
                return;
            baseWriter.WriteWhitespace(ws);
        }
    }
    

    If you were doing async writing, I believe (but have not tested) that you'd want to decorate the async methods as well.

    Then use it like:

    string xmltext;
    using (var stringWriter = new StringWriter())
    {
        using (var innerXmlWriter = XmlWriter.Create(stringWriter, settings))
        using (var xmlTextWriter = new FullElementXmlWriterDecorator(innerXmlWriter))
        {
            myDoc.WriteTo(xmlTextWriter);
        }
        xmltext = stringWriter.ToString();
    }
    

    fiddle.