I can't get the required output. I think this code should be updated when using the loop, but I don't know how.
Please guys need your support here.
BEGIN {
split("PMP,OPTTYPE,PTD,SK,ASG,DEH,CCH,I,ASGLNUM,GGMW,MCMW,SSLO,GC2MW,MC2MW,TIF,PSTNMW,OSMSMW,GC3MW,MC3MW,GC4MW,MC4MW,MMMW,TSMSMW",array,",");
print_array2(array)
}
/^PMP/ { getline
array[1]=trim_blank(substr($0,1,7)) #PMP
#print_array1(array)
}
/^OPTTYPE PTD SK / { getline
while (NF>1)
{
array[2]=trim_blank(substr($0,1,8)) #OPTTYPE
array[3]=trim_blank(substr($0,10,5))#PTD
array[4]=trim_blank(substr($0,15,10)) #SK
array[5]=trim_blank(substr($0,26,15)) #ASG
array[6]=trim_blank(substr($0,43,3)) #DEH
array[7]=trim_blank(substr($0,49,4)) #CCH
array[8]=trim_blank(substr($0,53,1)) #I
array[9]=trim_blank(substr($0,56,10)) #ASGLNUM
#print_array2(array)
getline
}
}
/^GGMW MCMW SSLO/
{ getline
array[10]=trim_blank(substr($0,1,5)) #GGMW
array[11]=trim_blank(substr($0,7,4)) #MCMW
array[12]=trim_blank(substr($0,13,4)) #SSLO
array[13]=trim_blank(substr($0,19,5)) #GC2MW
array[14]=trim_blank(substr($0,26,5)) #MCS2
array[15]=trim_blank(substr($0,34,3)) #TIF
array[16]=trim_blank(substr($0,39,7)) #PSTNMW
array[17]=trim_blank(substr($0,47,6)) #OSMSMW
#print_array2(array)
getline
print_array2(array)
}
END{}
function print_array1(array)
{
printf("%-5s", array[1])
printf("\n");
}
function print_array2(array)
{
printf("%-5s", array[1])
printf("%-8s", array[2])
printf("%-6s", array[3])
printf("%-10s", array[4])
printf("%-15s", array[5])
printf("%-4s", array[6])
printf("%4s ", array[7])
printf("%-4s", array[8])
printf("%-10s", array[9])
#for(item=2;item<=9;item++){array[item]=""}
printf("%-5s", array[10])
printf("%-5s", array[11])
printf("%-5s", array[12])
printf("%-6s", array[13])
printf("%-6s", array[14])
printf("%-4s", array[15])
printf("%-7s", array[16])
printf("%-6s", array[17])
printf("\n");
#for(item=2;item<=9;item++){array[item]=""}
}
function trim_blank(str){
#trim will remove the SPACE & TAB at begining or end of str
blk=" "
tab="\t"
while(1){
if((substr(str,1,1)==blk)||(substr(str,1,1)==tab)) {str=substr(str,2,(length(str)-1))}
else if((substr(str,length(str),1)==blk)||(substr(str,length(str),1)==tab)) {str=substr(str,1,(length(str)-1))}
else {return str}
}
}
The input is as shown:--
PMP
5
OPTTYPE PTD SK ASG DEH CCH I ASGLNUM
PGWPW 2 9091 224477695101 0 2
MGMPW 12 9091 224477695101 0 2 Y
GGMW MCMW SSLO GC2MW MC2MW TIF PSTNMW OSMSMW
0 0 0 1 1 0 0 0
GC3MW MC3MW GC4MW MC4MW
0 0 0 0
MMMW TSMSMW
0 0
END
PMP
7
OPTTYPE PTD SK ASG DEH CCH I ASGLNUM
PGWPW 2 9096 224477695106 0 2
MGMPW 12 9096 224477695106 0 2 Y
GGMW MCMW SSLO GC2MW MC2MW TIF PSTNMW OSMSMW
0 0 1 0 1 0 0 0
GC3MW MC3MW GC4MW MC4MW
0 0 0 0
MMMW TSMSMW
0 0
END
PMP
300
OPTTYPE PTD SK ASG DEH CCH I ASGLNUM
GGMW MCMW SSLO GC2MW MC2MW TIF PSTNMW OSMSMW
0 0 0 0 0 0 0 0
GC3MW MC3MW GC4MW MC4MW
0 0 0 0
MMMW TSMSMW
0 0
END
PMP
236
OPTTYPE PTD SK ASG DEH CCH I ASGLNUM
MGMPW 12 8025 224477606916 0 2 Y
GGMW MCMW SSLO GC2MW MC2MW TIF PSTNMW OSMSMW
0 0 1 1 1 0 0 0
GC3MW MC3MW GC4MW MC4MW
0 0 0 0
MMMW TSMSMW
0 0
END
The required output will be like that
The main point in the code is to check the lines after (/^OPTTYPE PTD SK ASG/
):
1)If there is no data, NF=0
-- > The PMP
value and other parameters will not be shown in the printout. So No printout will be shown for PMP=300
.
2) If there is a data NF>0
. there will be a loop to go through every line. Then print all the data. So, for PMP=5
& PMP=7
there will be 2 records of output. For PMP=236
, there is only one printout.
PMP OPTTYPE PTD SK ASG DEH CCH I ASGLNUM GGMW MCMW SSLO GC2MW MC2MW TIF PSTNMW OSMSMW GC3MW MC3MW GC4MW MC4MW MMMW TSMSMW
5 PGWPW 2 9091 224477695101 0 2 0 0 0 1 1 0 0 0 0 0 0 0 0 0
5 MGMPW 12 9091 224477695101 0 2 Y 0 0 0 1 1 0 0 0 0 0 0 0 0 0
7 PGWPW 2 9096 224477695106 0 2 0 0 1 0 1 0 0 0 0 0 0 0 0 0
7 MGMPW 12 9096 224477695106 0 2 Y 0 0 1 0 1 0 0 0 0 0 0 0 0 0
236 MGMPW 12 8025 224477606916 0 2 Y 0 0 1 1 1 0 0 0 0 0 0 0 0 0
Modifying OP's current code:
printf
formats to match OP's expected outputOPTTYPE
block dataGC3MW
and MMMW
blockstrim_blank()
functionprint_array1()
and print_array2()
functionsOne awk
idea:
$ cat parse.1.awk
BEGIN { n = split("PMP,OPTTYPE,PTD,SK,ASG,DEH,CCH,I,ASGLNUM,GGMW,MCMW,SSLO,GC2MW,MC2MW,TIF,PSTNMW,OSMSMW,GC3MW,MC3MW,GC4MW,MC4MW,MMMW,TSMSMW",array,",")
split("-8,-9,-5,-6,-15,-6,-5,-3,-10,-5,-5,-5,-6,-6,-4,-7,-7,-6,-6,-6,-6,-5,-6",fmt,",")
for (i=1;i<=n;i++)
printf "%*s", fmt[i], array[i]
print ""
}
/^PMP/ { getline
delete arr1
delete arr2
arr1[1]=$1 #PMP
}
/^OPTTYPE/ { getline
cnt=0
while (NF>1) {
cnt++
arr2[cnt][2]=trim_blank(substr($0,1,8)) #OPTTYPE
arr2[cnt][3]=trim_blank(substr($0,10,5)) #PTD
arr2[cnt][4]=trim_blank(substr($0,15,10)) #SK
arr2[cnt][5]=trim_blank(substr($0,26,15)) #ASG
arr2[cnt][6]=trim_blank(substr($0,43,3)) #DEH
arr2[cnt][7]=trim_blank(substr($0,49,4)) #CCH
arr2[cnt][8]=trim_blank(substr($0,53,1)) #I
arr2[cnt][9]=trim_blank(substr($0,56,10)) #ASGLNUM
getline
}
}
/^GGMW/ { getline
arr1[10]=trim_blank(substr($0,1,5)) #GGMW
arr1[11]=trim_blank(substr($0,7,4)) #MCMW
arr1[12]=trim_blank(substr($0,13,4)) #SSLO
arr1[13]=trim_blank(substr($0,19,5)) #GC2MW
arr1[14]=trim_blank(substr($0,26,5)) #MC2MW
arr1[15]=trim_blank(substr($0,33,4)) #TIF
arr1[16]=trim_blank(substr($0,38,7)) #PSTNMW
arr1[17]=trim_blank(substr($0,46,7)) #OSMSMW
}
/^GC3MW/ { getline
arr1[18]=trim_blank(substr($0,1,6)) #GC3MW
arr1[19]=trim_blank(substr($0,8,6)) #MC3MW
arr1[20]=trim_blank(substr($0,15,6)) #GC4MW
arr1[21]=trim_blank(substr($0,22,6)) #MC4MW
}
/^MMMW/ { getline
arr1[22]=trim_blank(substr($0,1,5)) #GC3MW
arr1[23]=trim_blank(substr($0,7,7)) #MC3MW
}
/^END/ { print_lines() }
function print_lines( i,j) {
for (i=1;i<=cnt;i++) {
printf "%*s", fmt[1], arr1[1]
for (j=2;j<=9;j++)
printf "%*s", fmt[j], arr2[i][j]
for (j=10;j<=n;j++)
printf "%*s", fmt[j], arr1[j]
print ""
}
}
function trim_blank(str) {
gsub(/^[[:space:]]+|[[:space:]]+$/,"",str)
return str
}
NOTES:
GNU awk
for two-dimensional array support (arr2[][]
)getline
(see link in Ed Morton's comment) there aren't any issues (in this case) that keep us from generating the desired resultsThis generates:
$ awk -f parse.1.awk input.txt
PMP OPTTYPE PTD SK ASG DEH CCH I ASGLNUM GGMW MCMW SSLO GC2MW MC2MW TIF PSTNMW OSMSMW GC3MW MC3MW GC4MW MC4MW MMMW TSMSMW
5 PGWPW 2 9091 224477695101 0 2 0 0 0 1 1 0 0 0 0 0 0 0 0 0
5 MGMPW 12 9091 224477695101 0 2 Y 0 0 0 1 1 0 0 0 0 0 0 0 0 0
7 PGWPW 2 9096 224477695106 0 2 0 0 1 0 1 0 0 0 0 0 0 0 0 0
7 MGMPW 12 9096 224477695106 0 2 Y 0 0 1 0 1 0 0 0 0 0 0 0 0 0
236 MGMPW 12 8025 224477606916 0 2 Y 0 0 1 1 1 0 0 0 0 0 0 0 0 0
An alternative approach:
arr1[]
with 4x variables (col_1
, col_10_17
, col_18_21
and col_22_23
)arr2[][]
with a one-dimensional array arr[]
(eliminates need for GNU awk
)printf
calls with comparable sprintf()
callsgetline
callsOne awk
idea:
$ cat parse.2.awk
BEGIN { n = split("PMP,OPTTYPE,PTD,SK,ASG,DEH,CCH,I,ASGLNUM,GGMW,MCMW,SSLO,GC2MW,MC2MW,TIF,PSTNMW,OSMSMW,GC3MW,MC3MW,GC4MW,MC4MW,MMMW,TSMSMW",array,",")
split("-8,-9,-5,-6,-15,-6,-5,-3,-10,-5,-5,-5,-6,-6,-4,-7,-7,-6,-6,-6,-6,-5,-6",fmt,",")
for (i=1;i<=n;i++)
printf "%*s", fmt[i], array[i]
print ""
}
NF==0 { blk = "" }
/^PMP/ { blk = "PMP"
cnt = 0
delete arr
next
}
/^OPTTYPE/ { blk = "OPTTYPE" ; next }
/^GGMW/ { blk = "GGMW" ; next }
/^GC3MW/ { blk = "GC3MW" ; next }
/^MMMW/ { blk = "MMMW" ; next }
/^END/ { blk = "END" }
blk { if (blk == "PMP")
col_1 = sprintf( "%*s", fmt[1], $1 )
else if (blk == "OPTTYPE")
arr[++cnt] = sprintf( "%*s", fmt[2], trim_blank(substr($0, 1, 8)) ) \
sprintf( "%*s", fmt[3], trim_blank(substr($0,10, 5)) ) \
sprintf( "%*s", fmt[4], trim_blank(substr($0,15,10)) ) \
sprintf( "%*s", fmt[5], trim_blank(substr($0,26,15)) ) \
sprintf( "%*s", fmt[6], trim_blank(substr($0,43, 3)) ) \
sprintf( "%*s", fmt[7], trim_blank(substr($0,49, 4)) ) \
sprintf( "%*s", fmt[8], trim_blank(substr($0,53, 1)) ) \
sprintf( "%*s", fmt[9], trim_blank(substr($0,56,10)) )
else if (blk == "GGMW")
col_10_17 = sprintf( "%*s", fmt[10], trim_blank(substr($0, 1,5)) ) \
sprintf( "%*s", fmt[11], trim_blank(substr($0, 7,4)) ) \
sprintf( "%*s", fmt[12], trim_blank(substr($0,13,4)) ) \
sprintf( "%*s", fmt[13], trim_blank(substr($0,19,5)) ) \
sprintf( "%*s", fmt[14], trim_blank(substr($0,26,5)) ) \
sprintf( "%*s", fmt[15], trim_blank(substr($0,33,4)) ) \
sprintf( "%*s", fmt[16], trim_blank(substr($0,38,7)) ) \
sprintf( "%*s", fmt[17], trim_blank(substr($0,46,7)) )
else if (blk == "GC3MW")
col_18_21 = sprintf( "%*s", fmt[18], trim_blank(substr($0, 1,6)) ) \
sprintf( "%*s", fmt[19], trim_blank(substr($0, 8,6)) ) \
sprintf( "%*s", fmt[20], trim_blank(substr($0,15,6)) ) \
sprintf( "%*s", fmt[21], trim_blank(substr($0,22,6)) )
else if (blk == "MMMW")
col_22_23 = sprintf( "%*s", fmt[22], trim_blank(substr($0, 1,5)) ) \
sprintf( "%*s", fmt[23], trim_blank(substr($0, 7,7)) )
else if (blk == "END")
print_lines()
}
function print_lines( i,j) {
for (i=1;i<=cnt;i++) {
print col_1 arr[i] col_10_17 col_18_21 col_22_23
}
}
function trim_blank(str) {
gsub(/^[[:space:]]+|[[:space:]]+$/,"",str)
return str
}
This generates:
$ awk -f parse.2.awk input.txt
PMP OPTTYPE PTD SK ASG DEH CCH I ASGLNUM GGMW MCMW SSLO GC2MW MC2MW TIF PSTNMW OSMSMW GC3MW MC3MW GC4MW MC4MW MMMW TSMSMW
5 PGWPW 2 9091 224477695101 0 2 0 0 0 1 1 0 0 0 0 0 0 0 0 0
5 MGMPW 12 9091 224477695101 0 2 Y 0 0 0 1 1 0 0 0 0 0 0 0 0 0
7 PGWPW 2 9096 224477695106 0 2 0 0 1 0 1 0 0 0 0 0 0 0 0 0
7 MGMPW 12 9096 224477695106 0 2 Y 0 0 1 0 1 0 0 0 0 0 0 0 0 0
236 MGMPW 12 8025 224477606916 0 2 Y 0 0 1 1 1 0 0 0 0 0 0 0 0 0
Yet another alternative:
substr()
calls by parsing based on fixed width fieldsOne awk
idea:
$ cat parse.3.awk
BEGIN { n = split("PMP,OPTTYPE,PTD,SK,ASG,DEH,CCH,I,ASGLNUM,GGMW,MCMW,SSLO,GC2MW,MC2MW,TIF,PSTNMW,OSMSMW,GC3MW,MC3MW,GC4MW,MC4MW,MMMW,TSMSMW",array,",")
split("-8,-9,-5,-6,-15,-6,-5,-3,-10,-5,-5,-5,-6,-6,-4,-7,-7,-6,-6,-6,-6,-5,-6",fmt,",")
for (i=1;i<=n;i++)
printf "%*s", fmt[i], array[i]
print ""
}
NF==0 { blk = "" }
/^PMP/ { cnt = 0
delete arr
blk = "PMP" ; FIELDWIDTHS = "20" ; next
}
/^OPTTYPE/ { blk = "OPTTYPE" ; FIELDWIDTHS = "9 5 11 16 6 4 3 10"; next }
/^GGMW/ { blk = "GGMW" ; FIELDWIDTHS = "6 6 6 7 7 5 8 7"; next }
/^GC3MW/ { blk = "GC3MW" ; FIELDWIDTHS = "7 7 7 6" ; next }
/^MMMW/ { blk = "MMMW" ; FIELDWIDTHS = "6 7" ; next }
/^END/ { blk = "END" ; FIELDWIDTHS = "" }
blk { if (blk == "PMP")
col_1 = sprintf( "%*s", fmt[1], trim_blank($1) )
else if (blk == "OPTTYPE")
arr[++cnt] = sprintf( "%*s", fmt[2], trim_blank($1) ) \
sprintf( "%*s", fmt[3], trim_blank($2) ) \
sprintf( "%*s", fmt[4], trim_blank($3) ) \
sprintf( "%*s", fmt[5], trim_blank($4) ) \
sprintf( "%*s", fmt[6], trim_blank($5) ) \
sprintf( "%*s", fmt[7], trim_blank($6) ) \
sprintf( "%*s", fmt[8], trim_blank($7) ) \
sprintf( "%*s", fmt[9], trim_blank($8) )
else if (blk == "GGMW")
col_10_17 = sprintf( "%*s", fmt[10], trim_blank($1) ) \
sprintf( "%*s", fmt[11], trim_blank($2) ) \
sprintf( "%*s", fmt[12], trim_blank($3) ) \
sprintf( "%*s", fmt[13], trim_blank($4) ) \
sprintf( "%*s", fmt[14], trim_blank($5) ) \
sprintf( "%*s", fmt[15], trim_blank($6) ) \
sprintf( "%*s", fmt[16], trim_blank($7) ) \
sprintf( "%*s", fmt[17], trim_blank($8) )
else if (blk == "GC3MW")
col_18_21 = sprintf( "%*s", fmt[18], trim_blank($1) ) \
sprintf( "%*s", fmt[19], trim_blank($2) ) \
sprintf( "%*s", fmt[20], trim_blank($3) ) \
sprintf( "%*s", fmt[21], trim_blank($4) )
else if (blk == "MMMW")
col_22_23 = sprintf( "%*s", fmt[22], trim_blank($1) ) \
sprintf( "%*s", fmt[23], trim_blank($2) )
else if (blk == "END")
print_lines()
}
function print_lines( i,j) {
for (i=1;i<=cnt;i++) {
print col_1 arr[i] col_10_17 col_18_21 col_22_23
}
}
function trim_blank(str) {
gsub(/^[[:space:]]+|[[:space:]]+$/,"",str)
return str
}
NOTE: requires GNU awk
for FIELDWIDTHS
support
This generates:
$ awk -f parse.2.awk input.txt
PMP OPTTYPE PTD SK ASG DEH CCH I ASGLNUM GGMW MCMW SSLO GC2MW MC2MW TIF PSTNMW OSMSMW GC3MW MC3MW GC4MW MC4MW MMMW TSMSMW
5 PGWPW 2 9091 224477695101 0 2 0 0 0 1 1 0 0 0 0 0 0 0 0 0
5 MGMPW 12 9091 224477695101 0 2 Y 0 0 0 1 1 0 0 0 0 0 0 0 0 0
7 PGWPW 2 9096 224477695106 0 2 0 0 1 0 1 0 0 0 0 0 0 0 0 0
7 MGMPW 12 9096 224477695106 0 2 Y 0 0 1 0 1 0 0 0 0 0 0 0 0 0
236 MGMPW 12 8025 224477606916 0 2 Y 0 0 1 1 1 0 0 0 0 0 0 0 0 0