Search code examples
awksedtext-processing

Parse text file, change some strings to camel case, add other strings


The parsing rules are:

  1. Replace the string "public static final String" with the string "export const" if that string occurs only once.
  2. Replace the string "public static final String" with the string "export enum" if similar strings (which has understores) occur more than once. Change all similar strings to the camel case string Str1. Append Str1 to the string "export enum".
  • Keep only the string Str2 after the last understore just before the character '='.
  • If the new strings contain only number, prefix Str2 with the camel case string Str1.
  • Enclose the new strings with "{}" only once.
  • Change ';' to ','.

These are sample in and out.

in

    public static final String ACCOUNT_TYPE_CD_T01 = "01";
    public static final String ACCOUNT_TYPE_CD_T02 = "02";
    public static final String ACCOUNT_TYPE_CD_T03 = "03";
    public static final String ACCOUNT_TYPE_CD_T04 = "04";

    public static final String TEST_ING       = "TEST";
    
    public static final String ACTION_EG_LD    = "EG_Ld";
    public static final String ACTION_EG_01    = "EG_01";
    public static final String ACTION_EG_02    = "EG_02";

out

export enum AccountTypeCd {
    T01 = "01",
    T02 = "02",
    T03 = "03",
    T04 = "04",
}

export const TEST_ING = "TEST";

export enum ActionEg {
    ActionEgLD = "EG_Ld",
    ActionEg01 = "EG_01",
    ActionEg02 = "EG_02",
}

Thank you in advance!

I tried with my limited awk, sed knowledge:

sed -e 's#public static final String ##g' in > out1
awk -v FPAT='[A-Z]+(_[A-Z]+)+' '$0=tolower($1)' out1 | sed -r 's/_(.)/\u\1/g'

Solution

  • If the order of declarations in the output does not matter, with GNU awk (for the multi-dimension arrays) you can try:

    $ cat foo.awk
    function cap(s) { return substr(s, 1, 1) tolower(substr(s, 2)) } # capitalization
    
    function cc(s, a,    b, n, i) { # return a[1] = enum name, a[2] = key
      n = split(s, b, /_/); a[1] = ""
      for(i = 1; i < n; i++) a[1] = a[1] cap(b[i]) # camel-case
      a[2] = cap(b[n]) # key
    }
    
    /public static final String/ {
      # compute enum name (e), key (k), value without final ";" (v)
      cc($5, ek); e = ek[1]; k = ek[2]; v = $NF; sub(/;[[:space:]]*$/, "", v)
      if(!(e in seen)) { idx[++ne] = e; seen[e] } # to preserve input order
      # assign arrays of enum key-value pairs, const values, const names
      enum[e][k] = cval[e] = v; cname[e] = $5
      if(k ~ /^[0-9]+$/) prefix[e] = e # key prefix if only-digits key
    }
    
    END {
      for(i = 1; i <= ne; i++) {
        e = idx[i]
        if(length(enum[e]) == 1) print sep "export const " cname[e] " = " cval[e] ";"
        else {
          print sep "export enum " e " {"
          for(k in enum[e]) print "\t" prefix[e] k " = " enum[e][k] ","
          print "}"
        }
        sep = "\n"
      }
    }
    
    $ awk -f foo.awk in > out
    

    If your awk does not support multi-dimension arrays things are a bit more complicated but we can use numeric array indexes and emulate multi-dimension arrays with the POSIX arr[i,j] notation. A positive side-effect is that the input order is preserved in the output. With any POSIX compliant awk:

    $ cat foo.awk
    function cap(s) { return substr(s, 1, 1) tolower(substr(s, 2)) } # capitalization
    
    function cc(s, a,    b, n, i) { # return a[1] = enum name, a[2] = key
      n = split(s, b, /_/); a[1] = ""
      for(i = 1; i < n; i++) a[1] = a[1] cap(b[i]) # camel-case
      a[2] = cap(b[n]) # key
    }
    
    /public static final String/ {
      # compute enum name (e), key (k), value without final ";" (v)
      cc($5, ek); e = ek[1]; k = ek[2]; v = $NF; sub(/;[[:space:]]*$/, "", v)
      # if new enum name
      if(!(e in seen)) { seen[e] = 1; ne += 1; ename[ne] = e; cname[ne] = $5 }
      # add key and value
      nk[ne] += 1; key[ne,nk[ne]] = k; val[ne,nk[ne]] = v
      # key prefix if only-digits key
      if(k ~ /^[0-9]+$/) pfx[ne] = e
    }
    
    END {
      for(i = 1; i <= ne; i++) { # for all enum/const
        # if only one key-value pair => const
        if(nk[i] == 1) print sep "export const " cname[i] " = " val[i,1] ";"
        else { # enum
          print sep "export enum " ename[i] " {"
          for(j = 1; j <= nk[i]; j++) print "\t" pfx[i] key[i,j] " = " val[i,j] ","
          print "}"
        }
        sep = "\n"
      }
    }
    
    $ awk -f foo.awk in > out