Search code examples
perlxml-twig

Unable to get highest version of the TRADE in Perl


I have just started learning Perl and got stuck in precarious scenario. Input Source XML FILE is:

<STATEMENT>
     <TRADE origin = "BANK", ref="1",version="1">
      <EVENT type="PRO">
       <EVENTNAR key = "USE" val = "MY"/>
       <EVENTNAR key = "USEE" val = "MYY"/>
      </EVENT>
     </TRADE>
     <TRADE origin = "BANK", ref="1",version="2">
      <EVENT type="PRO">
       <EVENTNAR key = "USE" val = "MYY"/>
       <EVENTNAR key = "USEE" val = "MYY"/>
      </EVENT>
     </TRADE>
     <TRADE origin = "BANK", ref="2",version="1">
      <EVENT type="PRO">
       <EVENTNAR key = "USE" val = "MY"/>
       <EVENTNAR key = "USEE" val = "MYY"/>
      </EVENT>
         <TRADE origin = "BANK" ref="1",version="1">
           <EVENT type="PRO">
              <EVENTNAR key = "USE" val = "MY"/>
              <EVENTNAR key = "USEE" val = "MYY"/>
           </EVENT>
         </TRADE>
       </TRADE>
    <STATEMENT>

Now I need to filter Trades with following 'AND' conditions:

  1. Only trades with origin = "BANK"

  2. TRADE should have "type" attribute of <EVENT> = 'PRO'

  3. TRADE should have "key" attribute of <EVENTNAR> = "USE"

  4. TRADE should have "value" attribute of <EVENTNAR> = "MY"

  5. Multiple <EVENTNAR> can be there under <EVENT> of a <TRADE>. At least one <EVENTNAR> should be legitimate.

  6. All sub-trades i.e TRADE inside TRADE should be deleted :

  7. The most important of all - Only highest versionno for given ref can be taken (this is not working)

Expected output:

 <STATEMENT>
      <TRADE origin = "BANK", ref="1",version="2">(higher version)
        <EVENT type="PRO">
           <EVENTNAR key = "USE" val = "MYY"/>
           <EVENTNAR key = "USEE" val = "MYY"/>
        </EVENT>
      </TRADE>
      <TRADE origin = "BANK", ref="2",version="1">
        <EVENT type="PRO">
          <EVENTNAR key = "USE" val = "MY"/>
          <EVENTNAR key = "USEE" val = "MYY"/>
        </EVENT>
      </TRADE>
    <STATEMENT>

Following is my code :

use strict;
  use warnings;
  use XML::Twig;
  use Tie::File;


    my $SOURCEFILE=$ARGV[0];
    my $FILELOCATIONIN=$ARGV[1];


    open( my $out, '>:utf8', 'out.xml') or die "cannot create output file out.xml: $!";


    my $twig = XML::Twig->new(  pretty_print => 'indented',
      twig_handlers => { 'TRADE'=>\&TRADE_HANDLER,
                            'TRADE/TRADE' => \&DEL_TRADE},
                         att_accessors => [ qw/ ref version / ],

     );

    my %max_version;

    $twig->parsefile($FILELOCATIONIN.'/'.$SOURCEFILE.'.xml');


    for my $trade ($twig->root->children('TRADE')) {
      my ($ref, $version) = ($trade->ref, $trade->version);
    if ($version eq $max_version{$ref})
    {
     $trade->flush($out);
    }

    }

    sub DEL_TRADE{
    my ( $twig, $TRADE ) = @_;
    $TRADE->delete($TRADE);
    #$twig->purge();
    }


    sub TRADE_HANDLER {
        my ( $twig, $trade ) = @_;

        my $org   = $trade->att('origin');


     if ($org eq "BANK"  &&  grep {grep {$_->att('key') eq 'USE' and $_->att('value') eq 'MY'}
        $_->children('EVENTNAR')} $trade->children('EVENT[@type="PRO"]') )

    {
        my ($ref, $version) = ($trade->ref, $trade->version);

        unless (exists $max_version{$ref} and $max_version{$ref} >= $version) {
        $max_version{$ref} = $version;}

    }

    else
    {
    $twig->purge();
    }

    return ;
    }

My output is:

<STATEMENT>
      <TRADE origin = "BANK", ref="1",version="1">(this shouldn't come )
         <EVENT type="PRO">
          <EVENTNAR key = "USE" val = "MY"/>
          <EVENTNAR key = "USEE" val = "MYY"/>
         </EVENT>
       </TRADE>
       <TRADE origin = "BANK", ref="1",version="2">
        <EVENT type="PRO">
          <EVENTNAR key = "USE" val = "MYY"/>
          <EVENTNAR key = "USEE" val = "MYY"/>
         </EVENT>
       </TRADE>
    <TRADE origin = "BANK", ref="2",version="1">
        <EVENT type="PRO">
          <EVENTNAR key = "USE" val = "MY"/>
          <EVENTNAR key = "USEE" val = "MYY"/>
        </EVENT>
      </TRADE>
     </STATEMENT>

As can be seen, logic of highest version for given ref is not working.

Any suggestions will be highly appreciated.


Solution

  • Using XML::XSH2, after fixing the input:

    open file.xml ;
    rm //TRADE/TRADE ;
    $l = //TRADE[@origin='BANK'][EVENT[@type='PRO'][EVENTNAR[@key='USE'][@val='MY']]] ;
    $h := hash @ref $l ;
    for my $ref in { keys %$h } {
        $trades = xsh:lookup('h', $ref);
        ls $trades[@version=xsh:max($trades/@version)] ;
    } | cat > output1.xml ;
    

    For very large files, you can try the streaming interface:

    $h = { {} } ;
    stream :f file.xml :F /dev/null select TRADE {
        rm TRADE ;
        if (@origin='BANK'
            and EVENT[@type='PRO'][EVENTNAR[@key='USE'][@val='MY']]
           ) {
            $ref = @ref ;
            $record = xsh:lookup('h', $ref)/@version ;
            perl { $record ||= -1 } ;
            if (@version > $record) {
                $here = . ;
                perl { $h->{$ref} = $here } ;
            }
        }
    } ;
    
    create STATEMENT ;
    for my $trade in { values %$h } mv $trade into STATEMENT ;
    save :f output2.xml ;
    

    On MSWin, you have to use NUL instead of /dev/null. The program can still be memory hungry - it needs to remember the whole output. If it's too much, you have to change it to process the file twice: in the first run, it'll remember the maximal version for each ref, in the second run, it will output.

    $h = { {} } ;
    stream :f file.xml :F /dev/null select TRADE {
        rm TRADE ;
        if (@origin='BANK' 
            and EVENT[@type='PRO'][EVENTNAR[@key='USE'][@val='MY']]
        ) {
            $ref = @ref ;
            $record = xsh:lookup('h', $ref) ;
            perl { $record ||= -1 } ;
            if (@version > $record) {
                $record = @version ;
                perl { $h->{$ref} = $record } ;
            }
        }
    } ;
    
    stream :f file.xml :F output3.xml select TRADE {
        rm TRADE ;
        if not(@origin = 'BANK'
               and EVENT[@type='PRO'][EVENTNAR[@key='USE'][@val='MY']]
               and xsh:lookup('h', @ref) = @version
        ) rm . ;
    } ;
    

    If the version + ref combinations are unique, you can simplify the condition in the last if not.