I'm trying to test some probes connected via USB on an Linux device using Perl 5.28 and Linux (Debian 8). When I read out a large file buffer of the probe, often none readable ASCII signs occur like \0
or \x02
. I want to translate these signs into readable tagged text. I've written a small subroutine, but it seems to me a little bit clunky for large translation list to test every entry. Is there a better way to do that?
Example script
#!/usr/bin/env perl -w
# test-escape.pl --- test none readable chars
use strict;
sub escBuf() {
my $buf = shift;
my @numNul = $buf =~ /\0/g;
my @numCR = $buf =~ /\r/g;
$buf =~ s/\r/\n/g;
$buf =~ s/\x00/<NUL>/g;
$buf =~ s/\x01/<SOH>/g;
$buf =~ s/\x02/<STX>/g;
$buf =~ s/\x03/<ETX>/g;
$buf =~ s/\x04/<EOT>/g;
$buf =~ s/\x05/<ENQ>/g;
$buf =~ s/\x06/<ACK>/g;
$buf =~ s/\x07/<BEL>/g;
$buf =~ s/\x08/<BS>/g;
$buf =~ s/\x0B/<VT>/g;
$buf =~ s/\x0C/<FF>/g;
$buf =~ s/\x0E/<SO>/g;
$buf =~ s/\x0F/<SI>/g;
my $numNUL = @numNul;
my $numCR = @numCR;
return ($buf, $numNUL, $numCR);
}
# Buffer example
my $buffer = "\x01\r\x02This is a test with\r\n ".
"sometimes qiurks \0 inside \x0C stuff \0 and regular \x03\r\x04";
# Translate output
my ($out, $numNUL, $numCR) = &escBuf($buffer);
# Not printed correctly due to \0
# print "ORG.TEXT: '$buffer' \n\n";
# Result of the translation
print "ESC.TEXT: '$out' \n\n";
print "NUM.NUL: $numNUL\n";
print "NUM.CR: $numCR\n\n";
Result
/usr/bin/env perl -w "test-escape.pl"
ESC.TEXT: '<SOH>
<STX>This is a test with
sometimes qiurks <NUL> inside <FF> stuff <NUL> and regular <ETX>
<EOT>'
NUM.NUL: 2
NUM.CR: 3
EDIT: Adopted code with proposed solution by ikegami
#!/usr/bin/env perl -w
# test-escape.pl --- test none readable chars
use strict;
# Dictionary of non printable signs
my %NONE_ASC_DICT = (
"\x00" => "NUL", "\x01" => "SOH", "\x02" => "STX", "\x03" => "ETX",
"\x04" => "EOT", "\x05" => "ENQ", "\x06" => "ACK", "\x07" => "BEL",
"\x08" => "BS",
# Essenital for parsing "\x09" => "TAB" "\x0a" => "LF"
"\x0b" => "VT", "\x0c" => "FF", "\x0d" => "CR",
"\x0e" => "SO", "\x0f" => "SI",
"\x10" => "DLE",
"\x11" => "DC1", "\x12" => "DC2", "\x13" => "DC3", "\x14" => "DC4",
"\x15" => "NAK", "\x16" => "SYN", "\x17" => "ETB", "\x18" => "CAN",
"\x19" => "EM", "\x1A" => "SUB", "\x1B" => "ESC", "\x1C" => "FS",
"\x1D" => "GS", "\x1E" => "RS", "\x1F" => "US", "\x7F" => "DEL",
);
# Mapping of the entries and corresponding predefined REGEX
my $NONE_ASC_CLASS = join "", map quotemeta, keys(%NONE_ASC_DICT);
my $NONE_ASC_REGEX = qr/([$NONE_ASC_CLASS])/;
# Translator subroutine
sub escBuffer() {
my ($buf, $dict, $regex, $prefix, $suffix) = @_;
# Set default sprefix suffix strings if not present
$prefix //= '<'; $suffix //= '>';
# Count the real quirks
my @numNUL = $buf =~ /\0/g;
my $numNUL = @numNUL;
# Clean up mixed UNIX / DOS context
$buf =~ s/\r\n/\n/g;
$buf =~ s/\r/\n/g; # translate all remaining \r to \n
# Calc resulting number of lines
my @numLF = $buf =~ /\n/g;
my $numLF = @numLF;
# Translate the remaining non printables
$buf =~ s/$regex/ $prefix.$dict->{$1}.$suffix /eg;
# Result set translated buffer, count quirks, count lines
return ($buf, $numNUL, $numCR);
}
# Buffer example
my $buffer = "\x01\r\x02This is a test with\r\n ".
"sometimes qiurks \0 inside \x0C stuff \0 and regular \x03\r\x04";
# Translate output
my ($out, $numNUL, $numLF) = &escBuffer
($buffer, \%NONE_ASC_DICT, $NONE_ASC_REGEX);
# Result of the translation
print "ESC.TEXT: '$out' \n\n";
print "NUM.NUL: $numNUL\n";
print "NUM.LF: $numLF\n\n";
Use a table.
Setup:
my %map = (
"\x00" => "<NUL>",
...,
);
my $class = join "", map quotemeta, keys(%map);
my $re = qr/([$class])/;
Replacing:
s/$re/$map{$1}/g