I am parsing this file with XML-Twig perl module:
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE reference
PUBLIC "-//OASIS//DTD DITA Reference//EN" "reference.dtd">
<reference xmlns:ditaarch="http://dita.oasis-open.org/architecture/2005/" id="dav1395327178563" xml:lang="en-us" ditaarch:DITAArchVersion="1.2">
<title>Display</title>
<shortdesc></shortdesc>
<prolog>
</prolog>
<refbody>
<table>
<title>LPSV register</title>
<desc>Address offset: <codeph>0x100</codeph>.</desc>
<tgroup cols="5">
<colspec colname="col1" colnum="1"/>
<colspec colname="col2" colnum="2"/>
<colspec colname="col3" colnum="3"/>
<colspec colname="col4" colnum="4"/>
<colspec colname="col5" colnum="5"/>
<thead>
<row>
<entry colname="col1">Bits</entry>
<entry colname="col2">Reset value</entry>
<entry colname="col3">Access type</entry>
<entry colname="col4">Name</entry>
<entry colname="col5">Usage</entry>
</row>
</thead>
<tbody>
<row>
<entry colname="col1">4:0</entry>
<entry colname="col2">0</entry>
<entry colname="col3">tt</entry>
<entry colname="col4">xx</entry>
<entry colname="col5">
<p>Video layer input format.</p>
</entry>
</row>
<row>
<entry colname="col1">7:5</entry>
<entry colname="col2">-</entry>
<entry colname="col3">-</entry>
<entry colname="col4">-</entry>
<entry colname="col5">Reserved</entry>
</row>
</tbody>
</tgroup>
</table>
<table>
<title>LV_CONTROL</title>
<desc>Address offset: <codeph>0x104</codeph>.</desc>
<tgroup cols="5">
<colspec colname="col1" colnum="1"/>
<colspec colname="col2" colnum="2"/>
<colspec colname="col3" colnum="3"/>
<colspec colname="col4" colnum="4"/>
<colspec colname="col5" colnum="5"/>
<thead>
<row>
<entry colname="col1">Bits</entry>
<entry colname="col2">Reset value</entry>
<entry colname="col3">Access type</entry>
<entry colname="col4">Name</entry>
<entry colname="col5">Usage</entry>
</row>
</thead>
<tbody>
<row>
<entry colname="col1">0</entry>
<entry colname="col2">0</entry>
<entry colname="col3">RWPU</entry>
<entry colname="col4">EN</entry>
<entry colname="col5">layer enable flag</entry>
</row>
<row>
<entry colname="col1">3:1</entry>
<entry colname="col2">0</entry>
<entry colname="col3">rr</entry>
<entry colname="col4">ss</entry>
<entry colname="col5">layer data flow configuration</entry>
</row>
<row>
<entry colname="col1">4</entry>
<entry colname="col2">0</entry>
<entry colname="col3">rr</entry>
<entry colname="col4">rr</entry>
<entry colname="col5">layer inverse gamma enable flag</entry>
</row>
</tbody>
</tgroup>
</table>
</refbody>
</reference>
But the get_xpath("//table")
only picks, first <table>
element. Can anyone please help me what's wrong in my script:
use strict;
use open ':std', ':encoding(UTF-8)'; # To remove 'Wide character in print' error
#use warnings;
use XML::Twig;
my @input_xml_files = ( 'dav1395327178563.xml' ); #<input_regs/*.xml>;
our @registers;
our @registers_desc;
our @registers_offset;
my $register_count = 0;
foreach my $input_xml_file ( @input_xml_files ) {
my $twig = XML::Twig->new(
twig_handlers => {
'table/title' => \®ister_name,
#'section/dl' => \®ister_description,
'table/desc/codeph' => \®ister_offset
},
);
$twig->parsefile( $input_xml_file );
$input_xml_file =~ s/.xml//;
$input_xml_file =~ s/input_regs\///;
my $regs_file_name = $input_xml_file . ".regs";
#chdir ($curdir);
open( REGS_PPFE, ">$regs_file_name" ) || die( "Can't open the file." );
foreach my $table ( $twig->get_xpath( "//table" ) ) { # get each <table>
# my $header = $table->prev_sibling->text;
my @headers;
my $register_id = $registers[$register_count];
$register_id =~ s/[\(\s+-]/_/g;
$register_id =~ s/,//g; # Remove ,
print $regs_file_name. "\n";
print $table. "\n\n";
#print $register_id."\n";
print REGS_PPFE "name=\"$register_id\" id=\"$register_id\"
\"$registers[$register_count]\"
offset=\"$registers_offset[$register_count]\"
\n\n";
################################## Bit fields ####################################################################
my $bit_num = 1; # bit number
my $bit_width_temp = 0;
my $bit_offset = 0; # bit offset
my %entries;
my $row_count = 0; # row counter
my $reserved_bit_offset;
my $reserved_bit_width;
my $prev_bit_offset = 0;
my $prev_bit_width_temp = 0;
my $prev_colon = 0;
my $prev_single = 0;
foreach my $row ( $table->get_xpath( "//row" ) ) { #foreach my $row ( $table->get_xpath("tgroup/tbody/row") ) # get each <row> of one <table>
# my %entries;
$row_count = $row_count + 1;
# print $row_count, "\n";
my @row_entries = map { $_->text =~ s/\n\s+//rg; } $row->children; # remove 'linefeed and whitespace' (s/\n\s+//gr) /
if ( @headers ) {
# my $bit_width_temp;
# my $bit_offset;
my $bit_width; # bit width
@entries{@headers} = @row_entries;
#foreach my $field (@headers)
#{
$entries{'Bits'} =~ s/[\[\]]//g; # remove [] from this text
#print "$entries{'Bits'}\n";
if ( $entries{'Bits'} =~ /:/ ) { # e.g. 3:2
$prev_bit_width_temp = $bit_width_temp;
$prev_bit_offset = $bit_offset;
# print $reg_name," Prev prev_bit_width_temp: ", $prev_bit_width_temp, " Prev prev_bit_offset: ", $prev_bit_offset;
$prev_colon = 1 - $prev_single;
#$prev_single = 0;
( $bit_width_temp, $bit_offset ) = split( ':', $entries{'Bits'} );
$bit_width = int( $bit_width_temp ) - int( $bit_offset ) + 1;
# print " Now bit_width_temp: ",$bit_width_temp, " Now bit_offset: ",$bit_offset, "\n";
}
else { # e.g. 24
$prev_bit_offset = $bit_offset;
$prev_bit_width_temp = $bit_width_temp;
# print $reg_name, " Prev prev_bit_width_temp: ", $prev_bit_width_temp," Prev prev_bit_offset: ", $prev_bit_offset;
$prev_single = 1 - $prev_colon;
#$prev_colon = 0;
( $bit_width_temp, $bit_offset ) = ( 0, $entries{'Bits'} );
$bit_width = int( $bit_width_temp ) + 1;
# print " Now bit_width_temp: ",$bit_width_temp," Now bit_offset: ",$bit_offset,"\n";
}
my $bit_reset = $entries{'Reset value'};
print "This is binary \n" if -B $bit_reset;
#my $bit_access = $entries{'Access'};
#$bit_access =~ s/RAZ\/WI/ROZ/g;
my $bit_name = $entries{'Name'};
#$bit_name =~ s/[\(\s+-]/_/g;
#$entries{'Usage'} =~ s/[‘’]/'/g;
#print $bit_name;
if ( $bit_name =~ /-/ ) {
print REGS_PPFE "reserved_bit_field position=\"$bit_offset\" width=\"$bit_width\"
reset_value value=\"0x$bit_reset\"
\n";
}
else {
print REGS_PPFE
"<bit_field name=\"$bit_name\" id=\"$bit_name\" position=\"$bit_offset\" width=\"$bit_width\" access=\"RW\">
<brief_description>$entries{'Name'}</brief_description>
<long_description>
<p>$entries{'Usage'}</p>
</long_description>
<reset_value value=\"0x$bit_reset\" override=\"true\"/>
</bit_field>\n";
}
$bit_num = $bit_num + 1;
}
else {
@headers = @row_entries;
}
}
print REGS_PPFE "</register_def>";
$register_count++;
}
close( REGS_PPFE );
}
#print "\n\n\n\n@registers \n";
#print "\n@registers_desc \n\n\n\n\n";
#================================================== Subroutines =================================================
sub register_name {
my ( $twig, $text_elt ) = @_;
push @registers, $text_elt->text;
print $text_elt ->text . "\n";
$twig->purge;
}
sub register_description {
my ( $twig, $text_elt ) = @_;
#print $text_elt -> text;
my $temp_reg_desc = $text_elt->text;
$temp_reg_desc =~ s/\bPurpose/<p>Purpose: /i;
$temp_reg_desc =~ s/\bUsage constraints/<\/p><p>Usage constraints: /i;
$temp_reg_desc =~ s/\bConfigurations/<\/p><p>Configurations: /i;
$temp_reg_desc =~ s/\bAttributes.*//; #Remove all text after Attributes
$temp_reg_desc = $temp_reg_desc . "</p>";
#print $temp_reg_desc;
push @registers_desc, $temp_reg_desc;
$twig->purge;
}
sub register_offset {
my ( $twig, $text_elt ) = @_;
#print $text_elt -> text;
push @registers_offset, $text_elt->text;
$twig->purge;
}
The problem is the calls to purge
at the end of each of your handler subroutines. This deletes from memory all fully-process nodes, so you are removing the whole of the first table
element before you use get_xpath
to search for it. There is no need to purge
or flush
the nodes accessed by the handlers
You are using two different ways of processing your data: via handlers and through get_xpath
on the complete twig. I suggest that you remove the handlers altogether and just access the XML through the DOM