Search code examples
xmlperlxml-libxml

Concatenate elements from multiple XML files


I need to open and parse several XML files, keeping the data in memory, and then sort the result.

The script is not finished because I don't know how keep the information in a hash or a variable, that's why I need your help.

Perl code

#!/usr/local/bin/perl

use strict;
use warnings 'all';

use POSIX;
use XML::LibXML;

print strftime('%Y-%m-%d %H:%M:%S', localtime), "\n";

my $DIR = "/totest/";

# Looking for XML files
opendir(DIR, $DIR);
my @SEARCH = grep(/^XML-[0-9]{8}_[0-9]{6}.FR.*.FORTESTING.xml$/, readdir(DIR));
closedir(DIR);

my ($product, $series, $voditem, $episode) = ("", "", "", "");

# If data file so
if ( scalar(@SEARCH) ) {

    foreach ( @SEARCH ) {

        my $filename = $DIR . $_;
        my $doc      = XML::LibXML->load_xml(location => $filename);

        $product = $doc->getDocumentElement->findnodes("/ScheduleProvider/Product")->[0]->toString, "\n";
        $series  = $doc->getDocumentElement->findnodes("/ScheduleProvider/Series"->[0]->toString, "\n";
        $episode = $doc->getDocumentElement->findnodes("/ScheduleProvider/Episode")->[0]->toString, "\n";
        $voditem = $doc->getDocumentElement->findnodes("/ScheduleProvider/VodItem")->[0]->toString, "\n";
    }

    print "$series\n";
}

Desired output

<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<ScheduleProvider id="FRT">
    <Product action="override" type="single">
        <Price currency="EUR">0.00</Price>
        <EpgDescription locale="fr_FR">
            <EpgElement key="Title">NO TITLE</EpgElement>
        </EpgDescription>
    </Product>
    <Product action="override" type="single">
        <Price currency="EUR">0.00</Price>
        <EpgDescription locale="fr_FR">
            <EpgElement key="Title">NO TITLE</EpgElement>
        </EpgDescription>
    </Product>
    <Series id="TS30200026214" action="override">
            <EpgElement key="IsRecordable">0</EpgElement>
    </Series>
    <Series id="TS20200026214" action="override">
            <EpgElement key="IsRecordable">0</EpgElement>
    </Series>
    <Episode action="override" id="TS303687645464" seriesRef="TS30200026214">
        <Media id="TS300687645464"/>
    </Episode>
    <Episode action="override" id="TS203182282260" seriesRef="TS20200026214">
        <Media id="TS200182282260"/>
    </Episode>
    <VodItem action="override" contentRef="TS303687645464" id="TS304687645464">
        <Period year="2017"/>
    </VodItem>
    <VodItem action="override" contentRef="TS203182282260" id="TS204182282260">
        <Period year="2018"/>
    </VodItem>
</ScheduleProvider>

XML-20170504_060444.FR.FORTESTING.xml

<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<ScheduleProvider id="FRT">
    <Product action="override" type="single">
        <Price currency="EUR">0.00</Price>
        <EpgDescription locale="fr_FR">
            <EpgElement key="Title">NO TITLE</EpgElement>
        </EpgDescription>
    </Product>
    <Series id="TS30200026214" action="override">
            <EpgElement key="IsRecordable">0</EpgElement>
    </Series>
    <Episode action="override" id="TS303687645464" seriesRef="TS30200026214">
        <Media id="TS300687645464"/>
    </Episode>
    <VodItem action="override" contentRef="TS303687645464" id="TS304687645464">
        <Period year="2017"/>
    </VodItem>
</ScheduleProvider>

XML-20170505_073915.FR.FORTESTING.xml

<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<ScheduleProvider id="FRT">
    <Product action="override" type="single">
        <Price currency="EUR">0.00</Price>
        <EpgDescription locale="fr_FR">
            <EpgElement key="Title">NO TITLE</EpgElement>
        </EpgDescription>
    </Product>
    <Series id="TS20200026214" action="override">
            <EpgElement key="IsRecordable">0</EpgElement>
    </Series>
    <Episode action="override" id="TS203182282260" seriesRef="TS20200026214">
        <Media id="TS200182282260"/>
    </Episode>
    <VodItem action="override" contentRef="TS203182282260" id="TS204182282260">
        <Period year="2018"/>
    </VodItem>
</ScheduleProvider>

Solution

  • Inside your if (scalar(@SEARCH)) block, change your code to something like this. It creates a new XML document, and adds the nodes you want from the opened XML docs. I also cleaned up some of the unnecessary verbosity in the XML methods.

    use strict;
    use warnings;
    use XML::LibXML;
    
    # create new xml doc 
    my $new_doc = XML::LibXML::Document->new('1.0','utf-8');
    my $new_root = $new_doc->createElement('ScheduleProvider');
    $new_root->setAttribute('id','FRT');
    
    # for example purposes, let's say the file paths are in the command line arguments
    for my $fn (@ARGV) {
        die "file $fn not found" unless -e $fn;
    
        my $doc = XML::LibXML->load_xml(location => $fn);
        my $dom = $doc->getDocumentElement;
    
        # add the nodes to the new xml doc       
        for my $xpath (qw<//Product //Series //Episode //VodItem>) {
             $new_root->appendChild($_) for $dom->findnodes($xpath);
        }                 
     }
    
     $new_doc->setDocumentElement($new_root);
    
     print $new_doc->toString;