Search code examples
xmlperlxsdlibxml2

Perl XML LibXML to replace a node with the other on xsd schema


I have an XML Schema and I would like to replace the RootNode with the element found inside of the RootNode complex Type with.

For example, below expected result is to have the <xs:element name="RootNode" ...> replaced by <xs:element name="real_node" type="RealNode"/> and remove the whole node of <xs:complexType name="RootNode">.

XML Schema:

<xs:schema xmlns="test" xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="test" elementFormDefault="qualified">
    <xs:element name="RootNode" type="RootNode"/>

    <xs:complexType name="RootNode">
            <xs:sequence>
                    <xs:element name="real_node" type="RealNode"/>
            </xs:sequence>
    </xs:complexType>

    <xs:complexType name="RealNode">
            <xs:annotation>
                    <xs:documentation source="Name" xml:lang="EN">TestName</xs:documentation>
                    <xs:documentation source="Type" xml:lang="EN">TestType</xs:documentation>
            </xs:annotation>
            <xs:sequence>
                    <xs:element name="Elem2" type="Type2" minOccurs="1">
                            <xs:annotation>
                                    <xs:documentation source="Name3" xml:lang="EN">TestName3</xs:documentation>
                                    <xs:documentation source="Type3" xml:lang="EN">TestType3</xs:documentation>
                            </xs:annotation>
                    </xs:element>
                    <xs:element name="Elem4" type="Type4" maxOccurs="99" />
            </xs:sequence>
    </xs:complexType>

    <xs:simpleType name="Type2">
            <xs:restriction base="xs:string">
                    <xs:pattern value="[A-Z]{9,9}"/>
            </xs:restriction>
    </xs:simpleType>

    <xs:simpleType name="Type4">
            <xs:annotation>
                    <xs:documentation source="Name5" xml:lang="EN">TestName5</xs:documentation>
                    <xs:documentation source="Type5" xml:lang="EN">TestType5</xs:documentation>
            </xs:annotation>
            <xs:restriction base="xs:string">
                    <xs:pattern value="[A-Z]{7,9}"/>
            </xs:restriction>
    </xs:simpleType>

</xs:schema>

Expected result:

<?xml version="1.0" encoding="UTF-8"?>
<xs:schema xmlns="test" xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="test" elementFormDefault="qualified">
    <xs:element name="real_node" type="RealNode"/>

    <xs:complexType name="RealNode">
            <xs:annotation>
                    <xs:documentation source="Name" xml:lang="EN">TestName</xs:documentation>
                    <xs:documentation source="Type" xml:lang="EN">TestType</xs:documentation>
            </xs:annotation>
            <xs:sequence>
                    <xs:element name="Elem2" type="Type2" minOccurs="1">
                            <xs:annotation>
                                    <xs:documentation source="Name3" xml:lang="EN">TestName3</xs:documentation>
                                    <xs:documentation source="Type3" xml:lang="EN">TestType3</xs:documentation>
                            </xs:annotation>
                    </xs:element>
                    <xs:element name="Elem4" type="Type4" maxOccurs="99" />
            </xs:sequence>
    </xs:complexType>

    <xs:simpleType name="Type2">
            <xs:restriction base="xs:string">
                    <xs:pattern value="[A-Z]{9,9}"/>
            </xs:restriction>
    </xs:simpleType>

    <xs:simpleType name="Type4">
            <xs:annotation>
                    <xs:documentation source="Name5" xml:lang="EN">TestName5</xs:documentation>
                    <xs:documentation source="Type5" xml:lang="EN">TestType5</xs:documentation>
            </xs:annotation>
            <xs:restriction base="xs:string">
                    <xs:pattern value="[A-Z]{7,9}"/>
            </xs:restriction>
    </xs:simpleType>

</xs:schema>

My script:

#!/opt/perl/bin/perl -w
use strict;
use warnings;

use XML::LibXML               qw( );
use XML::LibXML::XPathContext qw( );
use File::Copy;
use File::Basename;

my $in_qfn  = $ARGV[0];
my ($parser, $doc, $root, $out_qfn);
my ($name, $path, $suffix);
my ($documentroot, $complexdoc, $copyelem, $test, $fnd_type, $parent);

my @files = glob "$in_qfn/*.xsd";

foreach my $file (@files) {
   print "###LI### 1 $file\n";
   ($name, $path, $suffix) = fileparse($file);
   $out_qfn = "${name}NoRoot.${suffix}";

   $parser = XML::LibXML->new();
   $doc    = $parser->parse_file($file);
   $root   = $doc->documentElement();

   my $xpc = XML::LibXML::XPathContext->new($doc);
   $xpc->registerNs('xsd', 'http://www.w3.org/2001/XMLSchema');

   # Get the RootNode element node and delete it
   foreach $test ($xpc->findnodes("//xsd:element", $root)) {
      $fnd_type = $test->getAttribute('type') or next;

      if ( $xpc->findnodes('./@name[.="RootNode"]', $test) ) {
         foreach my $fnd_node ($xpc->findnodes('./@name[.="RootNode"]', $test)) {
            $fnd_type = $fnd_node->getAttribute('type') or next;
            $parent = $fnd_node->[0]->parentNode;
            $parent->removeChild($fnd_node->[0]);
         }
      }
   }

   # Get the RootNode Complext Type node
   foreach $test ($xpc->findnodes("//xsd:complexType", $root)) {
      if ($xpc->findnodes('./@name[.="RootNode"]', $root)) {
         $complexdoc = $xpc->findnodes('./@name[.="RootNode"]', $root);
         $copyelem = $xpc->findnodes("//xsd:element", $complexdoc);
         # Copy the element node within RootNode node to the top level
         $root->appendChild($copyelem->cloneNode(1));
      }
   }

   $doc->toFile($out_qfn);
}

However, the script throws error:

Can't locate object method "getAttribute" via package "XML::LibXML::NodeList"

Any help is appreciated.


Solution

  • A call to findnodes in list context will return a list of matching nodes. That is how your first loop

    foreach $test ($xpc->findnodes("//xsd:element", $root)) { ... }
    

    works, because for applies list context to the expression in parentheses

    However, your second loop

    foreach my $fnd_node ($documentroot = $xpc->findnodes('./@name[.="RootNode"]', $test)) { ... }
    

    puts findnodes into scalar context because you first assign the result to $documentroot. That causes findnodes to return an XML::LibXML::NodeList object instead of a list of nodes, and the loop will iterate just once, setting $fnd_node to that object and causing the errors you see

    I'm unclear why you have added the assignment to $documentroot, as it will just set both $documentroot and $fnd_node to the same value, which hass no advantage. Your errors would go away if you removed that assignment and replaced $documentroot with$fnd_node in the body of the loop

    By the way, there is no need for the test

    if ( $xpc->findnodes('./@name[.="RootNode"]', $test) ) { ... }
    

    before looping over the same call to findnodes. If the call finds no matching nodes then the for loop won't execute at all



    Update

    There were a few other problems with your program. For instance you didn't delete the original <xs:complexType name="RootNode"> element, and you inserted the contained <xs:element name="real_node" type="RealNode"/> at the end of the file

    I've fixed your code like this

    #!/opt/perl/bin/perl
    use strict;
    use warnings 'all';
    
    use XML::LibXML ();
    use XML::LibXML::XPathContext ();
    use File::Copy;
    use File::Basename;
    
    my ($in_qfn) = @ARGV;
    
    my @files = $in_qfn ? glob "$in_qfn/*.xsd" : 'test.xsd';
    
    for my $file ( @files ) {
    
        print "###LI### 1 $file\n";
    
        my ( $name, $path, $suffix ) = fileparse( $file, qr/\.[^.]*/ );
        my $out_qfn = "${name}NoRoot${suffix}";
    
        print "###LO### 1 $out_qfn\n";
    
        my $parser = XML::LibXML->new();
        my $doc    = $parser->parse_file( $file );
        my $root   = $doc->documentElement();
    
        my $xpc = XML::LibXML::XPathContext->new( $doc );
        $xpc->registerNs( 'xsd', 'http://www.w3.org/2001/XMLSchema' );
    
        # Get the RootNode element node and delete it
        for my $element_root ( $xpc->findnodes( '//xsd:element[@name="RootNode"]', $root ) ) {
            my $parent = $element_root->parentNode;
            $parent->removeChild( $element_root );
        }
    
        # Get the RootNode Complex Type node
        for my $complex_root ( $xpc->findnodes( '//xsd:complexType[@name="RootNode"]', $root ) ) {
    
            my ($copyelem) = $xpc->findnodes( ".//xsd:element", $complex_root );
    
            # Copy the element node within RootNode node to the top level
            $root->insertBefore(
                $copyelem->cloneNode( 1 ),
                $root->firstChild
            );
    
            # Put a newline before the cloned copy
            $root->insertBefore(
                XML::LibXML::Text->new( "\n\n    " ),
                $root->firstChild
            );
    
            # Remove the <xs:complexType name="RootNode">
            $complex_root->parentNode->removeChild($complex_root);
        }
    
        $doc->toFile( $out_qfn );
    }
    

    output

    <?xml version="1.0"?>
    <xs:schema xmlns="test" xmlns:xs="http://www.w3.org/2001/XMLSchema" targetNamespace="test" elementFormDefault="qualified">
    
        <xs:element name="real_node" type="RealNode"/>
    
    
    
    
        <xs:complexType name="RealNode">
                <xs:annotation>
                        <xs:documentation source="Name" xml:lang="EN">TestName</xs:documentation>
                        <xs:documentation source="Type" xml:lang="EN">TestType</xs:documentation>
                </xs:annotation>
                <xs:sequence>
                        <xs:element name="Elem2" type="Type2" minOccurs="1">
                                <xs:annotation>
                                        <xs:documentation source="Name3" xml:lang="EN">TestName3</xs:documentation>
                                        <xs:documentation source="Type3" xml:lang="EN">TestType3</xs:documentation>
                                </xs:annotation>
                        </xs:element>
                        <xs:element name="Elem4" type="Type4" maxOccurs="99"/>
                </xs:sequence>
        </xs:complexType>
    
        <xs:simpleType name="Type2">
                <xs:restriction base="xs:string">
                        <xs:pattern value="[A-Z]{9,9}"/>
                </xs:restriction>
        </xs:simpleType>
    
        <xs:simpleType name="Type4">
                <xs:annotation>
                        <xs:documentation source="Name5" xml:lang="EN">TestName5</xs:documentation>
                        <xs:documentation source="Type5" xml:lang="EN">TestType5</xs:documentation>
                </xs:annotation>
                <xs:restriction base="xs:string">
                        <xs:pattern value="[A-Z]{7,9}"/>
                </xs:restriction>
        </xs:simpleType>
    
    </xs:schema>