Search code examples
c#.netxmlxmlserializerxml-deserialization

Why an XML string cannot be deserialized due to prefixes in root elements?


I have the XML below:

<y:input xmlns:y='http://www.blahblah.com/engine/42'>
    <y:datas>
        <y:instance yclass='ReportPeriod' yid="report">
            <language yid='en'/>
            <threshold>0.6</threshold>
            <typePeriod>predefinedPeriod</typePeriod>
            <interval>month</interval>
            <valuePeriod>April</valuePeriod>
            <fund yclass="Fund">
                <name>K</name>
                <indexName>CAC40</indexName>
            </fund>
        </y:instance>
    </y:datas>
</y:input>

That I am trying to deserialize to

[XmlRoot(ElementName="fund")]
public class Fund 
{
    [XmlElement(ElementName="name")]
    public string Name { get; set; }

    [XmlElement(ElementName="indexName")]
    public string IndexName { get; set; }

    [XmlAttribute(AttributeName="yclass")]
    public string Yclass { get; set; }
}

[XmlRoot(ElementName="instance", Namespace="http://www.blahblah.com/engine/42")]
public class Instance 
{
    [XmlElement(ElementName="language")]
    public Language Language { get; set; }

    [XmlElement(ElementName="threshold")]
    public string Threshold { get; set; }

    [XmlElement(ElementName="typePeriod")]
    public string TypePeriod { get; set; }

    [XmlElement(ElementName="interval")]
    public string Interval { get; set; }

    [XmlElement(ElementName="valuePeriod")]
    public string ValuePeriod { get; set; }

    [XmlElement(ElementName="fund")]
    public Fund Fund { get; set; }

    [XmlAttribute(AttributeName="yclass")]
    public string Yclass { get; set; }

    [XmlAttribute(AttributeName="yid")]
    public string Yid { get; set; }
}

[XmlRoot(ElementName="datas", Namespace="http://www.blahblah.com/engine/42")]
public class Datas
{
    [XmlElement(ElementName="instance", Namespace="http://www.blahblah.com/engine/42")]
    public Instance Instance { get; set; }
}

[XmlRoot(ElementName="input", Namespace="http://www.blahblah.com/engine/42")]
public class Input
{
    [XmlElement(ElementName="datas", Namespace="http://www.blahblah.com/engine/42")]
    public Datas Datas { get; set; }

    [XmlAttribute(AttributeName="y", Namespace="http://www.blahblah.com/engine/42", Form = XmlSchemaForm.Qualified)]
    public string Y { get; set; }
}

However, when deserializing the XML above:

public static class Program
{
    public static void Main(params string[] args)
    {
        var serializer = new XmlSerializer(typeof(Input));
        using (var stringReader = new StringReader(File.ReadAllText("file.xml")))
        {
            using(var xmlReader = XmlReader.Create(stringReader))
            {
                var instance = (Input)serializer.Deserialize(stringReader);
            }
        }
    }
}

I get an error due to the y prefix...

There is an error in XML document (1, 1). ---> System.Xml.XmlException: Data at the root level is invalid. Line 1, position 1.

Reading some posts like that one: https://stackoverflow.com/a/36163079/4636721 it seems that there is maybe a bug with the XmlSerializer.


Solution

  • The cause of the exception is that you are passing stringReader rather than xmlReader to serializer.Deserialize(). You should be passing the XML reader instead:

    Input instance = null;
    var serializer = new XmlSerializer(typeof(Input));
    using (var stringReader = new StreamReader("file.xml"))
    {
        using(var xmlReader = XmlReader.Create(stringReader))
        {
            instance = (Input)serializer.Deserialize(xmlReader);
        }
    }   
    

    (Apparently XmlReader.Create(stringReader) advances the text reader a bit, so if you later attempt to read from the stringReader directly, it has been moved past the root element.)

    You also have some errors in your data model. It should look like:

    [XmlRoot(ElementName="fund")]
    public class Fund 
    {
        [XmlElement(ElementName="name")]
        public string Name { get; set; }
    
        [XmlElement(ElementName="indexName")]
        public string IndexName { get; set; }
    
        [XmlAttribute(AttributeName="yclass")]
        public string Yclass { get; set; }
    }
    
    [XmlRoot(ElementName="instance")]
    [XmlType(Namespace = "")] // Add this
    public class Instance 
    {
        [XmlElement(ElementName="language")]
        public Language Language { get; set; }
    
        [XmlElement(ElementName="threshold")]
        public string Threshold { get; set; }
    
        [XmlElement(ElementName="typePeriod")]
        public string TypePeriod { get; set; }
    
        [XmlElement(ElementName="interval")]
        public string Interval { get; set; }
    
        [XmlElement(ElementName="valuePeriod")]
        public string ValuePeriod { get; set; }
    
        [XmlElement(ElementName="fund")]
        public Fund Fund { get; set; }
    
        [XmlAttribute(AttributeName="yclass")]
        public string Yclass { get; set; }
    
        [XmlAttribute(AttributeName="yid")]
        public string Yid { get; set; }
    }
    
    [XmlRoot(ElementName="datas", Namespace="http://www.blahblah.com/engine/42")]
    public class Datas
    {
        [XmlElement(ElementName="instance", Namespace="http://www.blahblah.com/engine/42")]
        public Instance Instance { get; set; }
    }
    
    [XmlRoot(ElementName="input", Namespace="http://www.blahblah.com/engine/42")]
    public class Input
    {
        [XmlElement(ElementName="datas", Namespace="http://www.blahblah.com/engine/42")]
        public Datas Datas { get; set; }
    
        //Remove This
        //[XmlAttribute(AttributeName="y", Namespace="http://www.blahblah.com/engine/42", Form = XmlSchemaForm.Qualified)]
        //public string Y { get; set; }
    }
    
    // Add this
    [XmlRoot(ElementName="language")]
    public class Language 
    {
        [XmlAttribute(AttributeName="yid")]
        public string Yid { get; set; }
    }
    

    Notes:

    • xmlns:y='http://www.blahblah.com/engine/42' is an XML namespace declaration and thus should not be mapped to a member in the data model.

    • The child elements of <y:instance ...> are not in any namespace. Unless the namespace of the child elements is specified by attributes somehow, XmlSerializer will assume that they should be in the same namespace as the containing element, here http://www.blahblah.com/engine/42".

      Thus it is necessary to add [XmlType(Namespace = "")] to Instance to indicate the correct namespace for all child elements created from Instance. (Another option would be to add [XmlElement(Form = XmlSchemaForm.Unqualified)] to each member, but I think it is easier to set a single attribute on the type.)

    • A definition for Language is not included in your question, so I included one.

    • It will be more efficient to deserialize directly from your file using a StreamReader than to read first into a string, then deserialize from the string using a StringReader.

    Working sample fiddle here.