Search code examples
pythonpython-3.xlxmlxsd-validationxml-validation

How to retrieve all validation differences when comparing XML file with XSD file


I have been trying to find the validation of XSD file with XML file. But unfortunately I am getting only the first difference while checking the files. I am using asssertValid() which only giving the first difference i.e., "Pane" from the file. It should find all the difference i.e., "Pane" and "Subpanel". I want all the validation difference in the log file. Could you please help me find all the differences.

Python Code

from lxml import etree
import traceback

def validate_xml_and_xsd_files(xml_path: str, xsd_path: str):
    result = False
    try:
       xmlschema_doc = etree.parse(xsd_path)
       xmlschema = etree.XMLSchema(xmlschema_doc)
       xml_doc = etree.parse(xml_path)
       result = xmlschema.validate(xml_doc)
       xmlschema.assertValid(xml_doc)
    except Exception as ex:
       print("Exception raised-->", ''.join(traceback.format_exception(etype=type(ex), value=ex, tb=ex.__traceback__)))
    finally:
       return result


 if __name__ == "__main__":
     file_path = "C:\\Users\\"
     source_file_path = file_path + 'outputmetadata.xml'
     schema_file_path = file_path + 'XMLValidator.xsd'

     if validate_xml_and_xsd_files(source_file_path, schema_file_path):
         print('Validation Successful w.r.t XML and XSD files.')
     else:
         print('Validation Unsuccessful w.r.t XML and XSD files')

XML File

<?xml version="1.0" encoding="UTF-8"?>
<dataFeedDeliveryMetaData>
    <source>
            <setting name="countryCode" serializeAs="String">
                    <value>IND</value>
            </setting>
            <setting name="startDateOfData" serializeAs="String">
                    <value>2012/12/31</value>
            </setting>
            <setting name="endDateOfData" serializeAs="String">
                    <value>2013/03/24</value>
            </setting>
            <setting name="currency" serializeAs="String">
                    <value>INR</value>
            </setting>
    </source>
    <delivery>
            <panels>
                    <panel>
                            <setting type="PersonName">Vikas</setting>
                            <setting type="Gender">Male</setting>
                    </panel>
                    <panel>
                            <setting type="PersonName">Akash</setting>
                            <setting type="Gender">Male</setting>
                    </panel>
                    <pane>
                            <setting type="PersonName">Divya</setting>
                            <setting type="Gender">Female</setting>
                    </pane>
                    <subpanel>
                            <setting type="PersonName">Manikanta</setting>
                            <setting type="Gender">Male</setting>
                    </subpanel>

            </panels>
            <setting name="JobType" serializeAs="String">
                    <value>FullTime</value>
            </setting>
    </delivery>

XSD file

<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="dataFeedDeliveryMetaData">
<xs:complexType>
  <xs:sequence>
    <xs:element name="source">
      <xs:complexType>
        <xs:sequence>
          <xs:element name="setting" maxOccurs="4" minOccurs="4">
            <xs:complexType>
              <xs:sequence>
                <xs:element type="xs:string" name="value"/>
              </xs:sequence>
              <xs:attribute name="name" use="required">
                <xs:simpleType>
                    <xs:restriction base="xs:string">
                         <xs:enumeration value="countryCode"/>
                         <xs:enumeration value="startDateOfData"/>
                         <xs:enumeration value="endDateOfData"/>
                         <xs:enumeration value="currency"/>
                    </xs:restriction>
                </xs:simpleType>
              </xs:attribute>                      
              <xs:attribute type="xs:string" name="serializeAs" use="optional"/>
            </xs:complexType>
          </xs:element>
        </xs:sequence>
      </xs:complexType>
      <xs:unique name="settingNameAttribute">
        <xs:selector xpath="setting"/>
        <xs:field xpath="@name"/>
      </xs:unique>
    </xs:element>
    <xs:element name="delivery">
      <xs:complexType>
        <xs:sequence>
          <xs:element name="panels">
            <xs:complexType>
              <xs:sequence>
                <xs:element name="panel" minOccurs="3" maxOccurs="unbounded">
                  <xs:complexType>
                    <xs:sequence>
                      <xs:element name="setting" maxOccurs="2" minOccurs="2">
                        <xs:complexType>
                          <xs:simpleContent>
                            <xs:extension base="xs:string">
                              <xs:attribute name="type" use="required">
                                <xs:simpleType>
                                    <xs:restriction base="xs:string">
                                         <xs:enumeration value="PersonName"/>
                                         <xs:enumeration value="Gender"/>
                                    </xs:restriction>
                                </xs:simpleType>
                              </xs:attribute>
                            </xs:extension>
                          </xs:simpleContent>
                        </xs:complexType>
                      </xs:element>
                    </xs:sequence>
                  </xs:complexType>
                  <xs:unique name="panelTypeAttribute">
                    <xs:selector xpath="setting"/>
                    <xs:field xpath="@type"/>
                  </xs:unique>
                </xs:element>
              </xs:sequence>
            </xs:complexType>
          </xs:element>
          <xs:element name="setting" maxOccurs="2" minOccurs="1">
            <xs:complexType>
              <xs:sequence>
                <xs:element type="xs:string" name="value"/>
              </xs:sequence>
              <xs:attribute name="name" use="required">
                <xs:simpleType>
                    <xs:restriction base="xs:string">
                         <xs:enumeration value="JobType"/>
                    </xs:restriction>
                </xs:simpleType>
              </xs:attribute>
              <xs:attribute type="xs:string" name="serializeAs" use="optional"/>
            </xs:complexType>
          </xs:element>
        </xs:sequence>
      </xs:complexType>
    </xs:element>
  </xs:sequence>
</xs:complexType>

Output

Unable to get all difference pane and subpanel from xml file


Solution

  • import xmlschema
    
    
    def get_validation_errors(xml_file, xsd_file):
        schema = xmlschema.XMLSchema(xsd_file)
        validation_error_iterator = schema.iter_errors(xml_file)
        errors = list()
        for idx, validation_error in enumerate(validation_error_iterator, start=1):
            err = validation_error.__str__()
            errors.append(err)
            print(err)
        return errors
    
    errors = get_validation_errors('stackover.xml', 'stackover.xsd')
    
    

    XML - stackover.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <dataFeedDeliveryMetaData>
        <source>
            <setting name="countryCode" serializeAs="String">
                <value>IND</value>
            </setting>
            <setting name="startDateOfData" serializeAs="String">
                <value>2012/12/31</value>
            </setting>
            <setting name="endDateOfData" serializeAs="String">
                <value>2013/03/24</value>
            </setting>
            <setting name="currency" serializeAs="String">
                <value>INR</value>
            </setting>
        </source>
        <delivery>
            <panels>
                <panel>
                    <setting type="PersonName">Vikas</setting>
                    <setting type="Gender">Male</setting>
                </panel>
                <panel>
                    <setting type="PersonName">Akash</setting>
                    <setting type="Gender">Male</setting>
                </panel>
                <pane>
                    <setting type="PersonName">Divya</setting>
                    <setting type="Gender">Female</setting>
                </pane>
                <subpanel>
                    <setting type="PersonName">Manikanta</setting>
                    <setting type="Gender">Male</setting>
                </subpanel>
    
            </panels>
            <setting name="JobType" serializeAs="String">
                <value>FullTime</value>
            </setting>
        </delivery>
    </dataFeedDeliveryMetaData>
    

    XSD - stackover.xsd

    <xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified"
               xmlns:xs="http://www.w3.org/2001/XMLSchema">
        <xs:element name="dataFeedDeliveryMetaData">
            <xs:complexType>
                <xs:sequence>
                    <xs:element name="source">
                        <xs:complexType>
                            <xs:sequence>
                                <xs:element name="setting" maxOccurs="4" minOccurs="4">
                                    <xs:complexType>
                                        <xs:sequence>
                                            <xs:element type="xs:string" name="value"/>
                                        </xs:sequence>
                                        <xs:attribute name="name" use="required">
                                            <xs:simpleType>
                                                <xs:restriction base="xs:string">
                                                    <xs:enumeration value="countryCode"/>
                                                    <xs:enumeration value="startDateOfData"/>
                                                    <xs:enumeration value="endDateOfData"/>
                                                    <xs:enumeration value="currency"/>
                                                </xs:restriction>
                                            </xs:simpleType>
                                        </xs:attribute>
                                        <xs:attribute type="xs:string" name="serializeAs" use="optional"/>
                                    </xs:complexType>
                                </xs:element>
                            </xs:sequence>
                        </xs:complexType>
                        <xs:unique name="settingNameAttribute">
                            <xs:selector xpath="setting"/>
                            <xs:field xpath="@name"/>
                        </xs:unique>
                    </xs:element>
                    <xs:element name="delivery">
                        <xs:complexType>
                            <xs:sequence>
                                <xs:element name="panels">
                                    <xs:complexType>
                                        <xs:sequence>
                                            <xs:element name="panel" minOccurs="3" maxOccurs="unbounded">
                                                <xs:complexType>
                                                    <xs:sequence>
                                                        <xs:element name="setting" maxOccurs="2" minOccurs="2">
                                                            <xs:complexType>
                                                                <xs:simpleContent>
                                                                    <xs:extension base="xs:string">
                                                                        <xs:attribute name="type" use="required">
                                                                            <xs:simpleType>
                                                                                <xs:restriction base="xs:string">
                                                                                    <xs:enumeration value="PersonName"/>
                                                                                    <xs:enumeration value="Gender"/>
                                                                                </xs:restriction>
                                                                            </xs:simpleType>
                                                                        </xs:attribute>
                                                                    </xs:extension>
                                                                </xs:simpleContent>
                                                            </xs:complexType>
                                                        </xs:element>
                                                    </xs:sequence>
                                                </xs:complexType>
                                                <xs:unique name="panelTypeAttribute">
                                                    <xs:selector xpath="setting"/>
                                                    <xs:field xpath="@type"/>
                                                </xs:unique>
                                            </xs:element>
                                        </xs:sequence>
                                    </xs:complexType>
                                </xs:element>
                                <xs:element name="setting" maxOccurs="2" minOccurs="1">
                                    <xs:complexType>
                                        <xs:sequence>
                                            <xs:element type="xs:string" name="value"/>
                                        </xs:sequence>
                                        <xs:attribute name="name" use="required">
                                            <xs:simpleType>
                                                <xs:restriction base="xs:string">
                                                    <xs:enumeration value="JobType"/>
                                                </xs:restriction>
                                            </xs:simpleType>
                                        </xs:attribute>
                                        <xs:attribute type="xs:string" name="serializeAs" use="optional"/>
                                    </xs:complexType>
                                </xs:element>
                            </xs:sequence>
                        </xs:complexType>
                    </xs:element>
                </xs:sequence>
            </xs:complexType>
        </xs:element>
    </xs:schema>