Search code examples
bashawkargumentsrangeoption-type

Pass nonconsecutive values as arguments to bash / awk script


Link to input file for testing is samples.bin
I can pass ranges of values by delimiting two integers with a - in my script:

[root@usreliance Biorad]# ./sample12.sh -s 0-9 -c 0-3
                Ch0     Ch1     Ch2     Ch3
Sample 0:       0x1a03  0x1a03  0x4a03  0x5703
Sample 1:       0x4b03  0x4403  0x1e03  0x0904
Sample 2:       0x1003  0x1903  0x4003  0xae03
Sample 3:       0x1e03  0x2603  0x3303  0xad03
Sample 4:       0x1003  0x8403  0x4303  0x6203
Sample 5:       0xe003  0x1603  0x3403  0xc403
Sample 6:       0xf802  0x3b03  0x5303  0x6103
Sample 7:       0x1003  0x1503  0x4203  0x5803
Sample 8:       0x2303  0x1f03  0x5703  0x6203
Sample 9:       0x1703  0x7303  0x3103  0x3303

[root@usreliance Biorad]# ./sample12.sh -s 0-9 -c 2-3
                Ch2     Ch3
Sample 0:       0x4a03  0x5703
Sample 1:       0x1e03  0x0904
Sample 2:       0x4003  0xae03
Sample 3:       0x3303  0xad03
Sample 4:       0x4303  0x6203
Sample 5:       0x3403  0xc403
Sample 6:       0x5303  0x6103
Sample 7:       0x4203  0x5803
Sample 8:       0x5703  0x6203
Sample 9:       0x3103  0x3303

But I want to be able to pass nonconsecutive values delimited by , commas:

[root@usreliance Biorad]# ./sample12.sh -s 1,3,5 -c 0-3
                Ch0     Ch1     Ch2     Ch3
Sample 1:       0x4b03  0x4403  0x1e03  0x0904
Sample 3:       0x1e03  0x2603  0x3303  0xad03
Sample 5:       0xe003  0x1603  0x3403  0xc403

[root@usreliance Biorad]# ./sample12.sh -s 0-9 -c 0,3
                Ch0     Ch3
Sample 0:       0x1a03  0x5703
Sample 1:       0x4b03  0x0904
Sample 2:       0x1003  0xae03
Sample 3:       0x1e03  0xad03
Sample 4:       0x1003  0x6203
Sample 5:       0xe003  0xc403
Sample 6:       0xf802  0x6103
Sample 7:       0x1003  0x5803
Sample 8:       0x2303  0x6203
Sample 9:       0x1703  0x3303

Here is my script in its current state right now:

#!/usr/bin/env bash

samps=""
chans=""
total=false

while getopts ':c:s:t' opt; do
    case $opt in
        s) samps="$OPTARG" ;;
        c) chans="$OPTARG" ;;
        t) total=true ;;
        *) printf 'Unrecognized option "%s"\n' "$opt" >&2
    esac
done
shift $(( OPTIND - 1 ))

if [[ $total == true ]]; then
  printf "Total Samples: "$(hexdump -v -e '8/1 "%02x " "\n"' samples.bin | wc -l)"\n"
else {
  hexdump -v -e '8/1 "%02x " "\n"' samples.bin |
  awk -v samps="$samps" -v chans="$chans" '
    BEGIN {
      # split sample string to arrays using "-" as delimiter
      split(samps, srange, "-")
      # split channel string
      split(chans, crange, "-")

      # arbitrary INT_MAX
      int_max=2^52
      # default 4 channels as per prerequisite example
      chan_default=4

      # set default samples
      if (!srange[1]) srange[1] = 0
      if (!srange[2]) srange[2] = int_max
      # set default channels
      if (!crange[1]) crange[1] = 0
      if (!crange[2]) crange[2] = crange[1] + chan_default-1

      # print channel header row
      printf "\t\t"
      for (i=crange[1]; i<=crange[2]; i++) {
        printf("Ch%d%s", i, (i==crange[2]?"\n":"\t"))
      }
    }
    {
      if(NR >= srange[1] + 1 && NR <= srange[2] + 1) {
        start=(crange[1] + 1) * 2 - 1
        end=(crange[2] + 1 ) * 2

        # print sample range
        printf("Sample %d:\t", NR-1)

        # print channel range in sample line
        for (i = start; i <= end; i+=2) {
            j = i + 1
            printf("0x%s%s%s", $i, $j, (i==end||j==end?"\n":"\t"))
        }
      }
    }
  '
}
fi

The user should be able to pass as many arguments after -s or -c as they like, i,e..,

[root@usreliance Biorad]# ./sample12.sh -s 0,7,23,44 -c 0,2

EXTRA CREDIT: Pass multiple range and nonconsecutive values in one pass, i.e..,

[root@usreliance Biorad]# ./sample12.sh -s 0,7,23,44-99,214-300 -c 0,2-3

Solution

  • Focusing on just the awk portion of OP's code:

    hexdump -v -e '8/1 "%02x " "\n"' samples.bin |
    awk -v samps="${samps}" -v chans="${chans}" '
    
    function fill_array(var,arr) {                             # NOTE: arrays are passed by reference so changes made here are maintained in parent
    
        m=split(var,_a,",")                                    # split variables on comma
        for (i=1;i<=m;i++) {
            n=split(_a[i],_b,"-")                              # further split each field on hyphen 
            for (j=_b[1];j<=(n==1 ? _b[1] : _b[2]);j++)        # if no hyphen => n==1 so just re-use _b[1]
                arr[j]                                         # store value as array index
        }
    }
    
    BEGIN { OFS="\t"
    
            fill_array(samps,samps_arr)                        # parse variable "samps" and store as indices of samps_arr[] array
            fill_array(chans,chans_arr)                        # parse variable "chans" and store as indices of chans_arr[] array
    
            printf "%s", OFS                                   # print header ...
            for (i=0;i<=3;i++)                                 # loop through possible channel numbers and ...
                if (i in chans_arr)                            # if the channel number is an index in the chans_arr[] array then ...
                   printf "%sCh%d", OFS, i                     # print the associated header
            print ""                                           # terminate printf line
          }
    
          { if ((FNR-1) in samps_arr) {                        # if current line number (minus 1) is an index in the samps_arr[] array then ...
               printf "Sample %d:", (FNR-1)                    # print our "Sample #:" line ...
               for (i=1;i<=NF;i=i+2) {                         # loop through odd-numbered fields and ...
                   if ( (i-1)/2 in chans_arr) {                # if the associated group # is an index in the chans_arr[] array then ...
                      printf "%s0x%s%s", OFS, $(i), $(i+1)     # add to our output line
                   }
               }
               print ""                                        # terminate printf line
            }
          }
    '
    

    NOTE: OP can add more logic to:

    • determine what to do if/when invalid values are supplied
    • determine what to do if one/both input variables are empty (eg, default to all Samples and/or all Channels? abort? something else?)

    For samps='0-9'; chans='0-3' this generates:

                    Ch0     Ch1     Ch2     Ch3
    Sample 0:       0x1a03  0x1a03  0x4a03  0x5703
    Sample 1:       0x4b03  0x4403  0x1e03  0x0904
    Sample 2:       0x1003  0x1903  0x4003  0xae03
    Sample 3:       0x1e03  0x2603  0x3303  0xad03
    Sample 4:       0x1003  0x8403  0x4303  0x6203
    Sample 5:       0xe003  0x1603  0x3403  0xc403
    Sample 6:       0xf802  0x3b03  0x5303  0x6103
    Sample 7:       0x1003  0x1503  0x4203  0x5803
    Sample 8:       0x2303  0x1f03  0x5703  0x6203
    Sample 9:       0x1703  0x7303  0x3103  0x3303
    

    For samps='0-9'; chans='0,3' this generates:

                    Ch0     Ch3
    Sample 0:       0x1a03  0x5703
    Sample 1:       0x4b03  0x0904
    Sample 2:       0x1003  0xae03
    Sample 3:       0x1e03  0xad03
    Sample 4:       0x1003  0x6203
    Sample 5:       0xe003  0xc403
    Sample 6:       0xf802  0x6103
    Sample 7:       0x1003  0x5803
    Sample 8:       0x2303  0x6203
    Sample 9:       0x1703  0x3303
    

    For samps='3,6-8,5-7,2'; chans='2-3,3-6,9' this generates:

                    Ch2     Ch3
    Sample 2:       0x4003  0xae03
    Sample 3:       0x3303  0xad03
    Sample 5:       0x3403  0xc403
    Sample 6:       0x5303  0x6103
    Sample 7:       0x4203  0x5803
    Sample 8:       0x5703  0x6203