Note that this is the follow up question of Parse text file, change some strings to camel case, add other strings . The parsing rules are similar but different:
These are sample input and output.
input
//Comment
public static final String CUSTOMER_TYPE_CD_T_01 = "01";
public static final String CUSTOMER_TYPE_CD_TB_02 = "02";
public static final String CUSTOMER_TYPE_CD_TCC_03 = "03";
public static final String CUSTOMER_TYPE_CD_TDDD_04 = "04";
public static final String TEST_ING = "TEST";
//----------------------------------------
//Comments
//----------------------------------------
public static final int BEGIN_A_BB_C_D_EE_FFF_01 = 0;
public static final int END_A_BB_C_D_EE_FFF_01 = 2;
output
//Comment
export enum CustomerTypeCd {
T_01 = "01",
TB_02 = "02",
TCC_03 = "03",
TDDD_04 = "04",
}
export const TEST_ING = "TEST";
//----------------------------------------
//Comments
//----------------------------------------
export enum ABbCDEeFff01 {
BEGIN = 0,
END = 2,
}
I modified the answer of: Parse text file, change some strings to camel case, add other strings as follows. It handles rules 1 and 2, fails to handle rules 3 and 4:
function cap(s) { return substr(s, 1, 1) tolower(substr(s, 2)) } # capitalization
function cc(s, a, b, n, i) { # return a[1] = enum name, a[2] = key
n = split(s, b, /_/); a[1] = ""
for(i = 1; i < n; i++) a[1] = a[1] cap(b[i]) # camel-case
a[2] = cap(b[n]) # key
}
function cc2(s, a, b, n, i) { # return a[1] = enum name, a[2] = key
n = split(s, b, /_/); a[1] = ""
for(i = 1; i < n - 1; i++) a[1] = a[1] cap(b[i]) # camel-case
a[2] = b[n - 1] "_" cap(b[n]) # key
}
function cc3(s, a, b, n, i) { # return a[1] = enum name, a[2] = key
n = split(s, b, /_/);
enumkey = b[1]
a[1] = ""
for(i = 1; i < n - 1; i++) a[1] = a[1] cap(b[i]) # camel-case
a[2] = b[n - 1] "_" cap(b[n]) # key
a[1] = enumkey
}
/public static final String/ {
# compute enum name (e), key (k), value without final ";" (v)
cc2($5, ek); e = ek[1]; k = ek[2]; v = $NF; sub(/;[[:space:]]*$/, "", v)
# if new enum name
if(!(e in seen)) { seen[e] = 1; ne += 1; ename[ne] = e; cname[ne] = $5 }
# add key and value
nk[ne] += 1; key[ne,nk[ne]] = k; val[ne,nk[ne]] = v
# key prefix if only-digits key
if(k ~ /^[0-9]+$/) pfx[ne] = e
}
/public static final int/ {
# compute enum name (e), key (k), value without final ";" (v)
cc3($5, ek); e = ek[1]; k = ek[2]; v = $NF; sub(/;[[:space:]]*$/, "", v)
# if new enum name
if(!(e in seen)) { seen[e] = 1; ne += 1; ename[ne] = e; cname[ne] = $5 }
# add key and value
nk[ne] += 1; key[ne,nk[ne]] = k; val[ne,nk[ne]] = v
# key prefix if only-digits key
if(k ~ /^[0-9]+$/) pfx[ne] = e
}
END {
for(i = 1; i <= ne; i++) { # for all enum/const
# if only one key-value pair => const
if(nk[i] == 1) print sep "export const " cname[i] " = " val[i,1] ";"
else { # enum
print sep "export enum " ename[i] " {"
for(j = 1; j <= nk[i]; j++) print "\t" pfx[i] key[i,j] " = " val[i,j] ","
print "}"
}
sep = "\n"
}
}
awk -V GNU Awk 5.0.1, API: 2.0 (GNU MPFR 4.0.2, GNU MP 6.2.0)
---------
EDIT: here is the above code formatted legibly by `gawk -o-`:
/public static final String/ {
# compute enum name (e), key (k), value without final ";" (v)
cc2($5, ek)
e = ek[1]
k = ek[2]
v = $NF
sub(/;[[:space:]]*$/, "", v)
# if new enum name
if (! (e in seen)) {
seen[e] = 1
ne += 1
ename[ne] = e
cname[ne] = $5
}
# add key and value
nk[ne] += 1
key[ne, nk[ne]] = k
val[ne, nk[ne]] = v
# key prefix if only-digits key
if (k ~ /^[0-9]+$/) {
pfx[ne] = e
}
}
/public static final int/ {
# compute enum name (e), key (k), value without final ";" (v)
cc3($5, ek)
e = ek[1]
k = ek[2]
v = $NF
sub(/;[[:space:]]*$/, "", v)
# if new enum name
if (! (e in seen)) {
seen[e] = 1
ne += 1
ename[ne] = e
cname[ne] = $5
}
# add key and value
nk[ne] += 1
key[ne, nk[ne]] = k
val[ne, nk[ne]] = v
# key prefix if only-digits key
if (k ~ /^[0-9]+$/) {
pfx[ne] = e
}
}
END {
for (i = 1; i <= ne; i++) { # for all enum/const
# if only one key-value pair => const
if (nk[i] == 1) {
print sep "export const " cname[i] " = " val[i, 1] ";"
} else { # enum
print sep "export enum " ename[i] " {"
for (j = 1; j <= nk[i]; j++) {
print "\t" pfx[i] key[i, j] " = " val[i, j] ","
}
print "}"
}
sep = "\n"
}
}
function cap(s)
{
return (substr(s, 1, 1) tolower(substr(s, 2)))
}
# capitalization
function cc(s, a, b, n, i)
{
# return a[1] = enum name, a[2] = key
n = split(s, b, /_/)
a[1] = ""
for (i = 1; i < n; i++) {
a[1] = a[1] cap(b[i]) # camel-case
}
a[2] = cap(b[n]) # key
}
function cc2(s, a, b, n, i)
{
# return a[1] = enum name, a[2] = key
n = split(s, b, /_/)
a[1] = ""
for (i = 1; i < n - 1; i++) {
a[1] = a[1] cap(b[i]) # camel-case
}
a[2] = b[n - 1] "_" cap(b[n]) # key
}
function cc3(s, a, b, n, i)
{
# return a[1] = enum name, a[2] = key
n = split(s, b, /_/)
enumkey = b[1]
a[1] = ""
for (i = 1; i < n - 1; i++) {
a[1] = a[1] cap(b[i]) # camel-case
}
a[2] = b[n - 1] "_" cap(b[n]) # key
a[1] = enumkey
}
I'd blame tiredness for those problems, because while the carefully crafted cc2
works as intended, cc3()
seems to be just three typos away from working:
cc3()
's return handling all agree that public static final int
is the contrary to public static final String
(first the unique key, then the common enum name),a[2] = b[n - 1] "_" cap(b[n]) # key
which is a copy-paste from cc2()
, with absolutely no use hereSo by reindexing the loop, removing the unwanted line, and putting the key to a[2]
, you'll have your awk rework.
Diff:
- for(i = 1; i < n - 1; i++) a[1] = a[1] cap(b[i]) # camel-case
- a[2] = b[n - 1] "_" cap(b[n]) # key
- a[1] = enumkey
+ for(i = 2; i <= n; i++) a[1] = a[1] cap(b[i]) # camel-case
+ a[2] = enumkey