Search code examples
c#.netxmlxmltextwriter

Check output size using .NET XmlTextWriter


I need to generate an XML file and i need to stick as much data into it as possible BUT there is a filesize limit. So i need to keep inserting data until something says no more. How do i figure out the XML file size without repeatably writing it to file?


Solution

  • I agree with John Saunders. Here's some code that will basically do what he's talking about but as an XmlSerializer except as a FileStream and uses a MemoryStream as intermediate storage. It may be more effective to extend stream though.

    public class PartitionedXmlSerializer<TObj>
    {
        private readonly int _fileSizeLimit;
    
        public PartitionedXmlSerializer(int fileSizeLimit)
        {
            _fileSizeLimit = fileSizeLimit;
        }
    
        public void Serialize(string filenameBase, TObj obj)
        {
            using (var memoryStream = new MemoryStream())
            {
                // serialize the object in the memory stream
                using (var xmlWriter = XmlWriter.Create(memoryStream))
                    new XmlSerializer(typeof(TObj))
                        .Serialize(xmlWriter, obj);
    
                memoryStream.Seek(0, SeekOrigin.Begin);
    
                var extensionFormat = GetExtensionFormat(memoryStream.Length);
    
                var buffer = new char[_fileSizeLimit];
    
                var i = 0;
                // split the stream into files
                using (var streamReader = new StreamReader(memoryStream))
                {
                    int readLength;
                    while ((readLength = streamReader.Read(buffer, 0, _fileSizeLimit)) > 0)
                    {
                        var filename 
                            = Path.ChangeExtension(filenameBase, 
                                string.Format(extensionFormat, i++));
                        using (var fileStream = new StreamWriter(filename))
                            fileStream.Write(buffer, 0, readLength);
                    }
                }
            }
        }
    
        /// <summary>
        /// Gets the a file extension formatter based on the 
        /// <param name="fileLength">length of the file</param> 
        /// and the max file length
        /// </summary>
        private string GetExtensionFormat(long fileLength)
        {
            var numFiles = fileLength / _fileSizeLimit;
            var extensionLength = Math.Ceiling(Math.Log10(numFiles));
            var zeros = string.Empty;
            for (var j = 0; j < extensionLength; j++)
            {
                zeros += "0";
            }
            return string.Format("xml.part{{0:{0}}}", zeros);
        }
    }
    

    To use it, you'd initialize it with the max file length and then serialize using the base file path and then the object.

    public class MyType
    {
        public int MyInt;
        public string MyString;
    }
    
    public void Test()
    {
        var myObj = new MyType { MyInt = 42, 
                                 MyString = "hello there this is my string" };
        new PartitionedXmlSerializer<MyType>(2)
            .Serialize("myFilename", myObj);
    }
    

    This particular example will generate an xml file partitioned into

    myFilename.xml.part001
    myFilename.xml.part002
    myFilename.xml.part003
    ...
    myFilename.xml.part110