We are using a script that takes in some data and converts it to a csv file, the script we are using is below:
!/bin/bash
read -p "Enter /DIR/PATH/FILENAME where you wish to copy the data: " FILENAME
cat test.txt |
awk '
BEGIN {
OFS = ","
numTags = split("insert_job job_type box_name command machine owner date_conditions condition run_calendar exclude_calendar days_of_week run_window start_times start_mins resources profile term_run_time watch_file watch_interval",tags)
for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
tag = tags[tagNr]
printf "\"%s\"%s", tag, (tagNr<numTags ? OFS : ORS)
}
}
!NF || /^\/\*/ { next }
{ gsub(/^[[:space:]]+|[[:space:]]+$/,"") }
match($0,/[[:space:]]job_type:/) {
if ( jobNr++ ) {
prt()
delete tag2val
}
# save "insert_job" value
tag = substr($1,1,length($1)-1)
val = substr($0,length($1)+1,RSTART-(length($1)+2))
gsub(/^[[:space:]]+|[[:space:]]+$/,"",val)
tag2val[tag] = val
# update $0 to start with "job_type" to look like all other input
$0 = substr($0,RSTART+1)
}
{
tag = val = $0
sub(/:.*/,"",tag)
sub(/[^:]+:[[:space:]]*/,"",val)
tag2val[tag] = val
}
END { prt() }
function prt( tagNr,tag,val) {
for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
tag = tags[tagNr]
val = tag2val[tag]
printf "\"%s\"%s", val, (tagNr<numTags ? OFS : ORS)
}
}
' > $FILENAME.csv
And below is the test data:
$ cat test.txt
/* ----------------- Test_A ----------------- */
insert_job: Test_A job_type: CMD
command: sleep 3000
machine: machine1
owner: user1
permission:
date_conditions: 1
days_of_week: mo,tu,we,th,fr
start_times: “06:00,08:00,10:00,12:00,14:00,16:00”
term_run_time: 1
alarm_if_fail: 1
alarm_if_terminated: 1
/* ----------------- Test_B ----------------- */
insert_job: Test_B job_type: CMD
command: echo
machine: machine2
owner: user2
permission:
date_conditions: 0
description: "Test"
std_out_file: "/tmp/$AUTO_JOB_NAME.$AUTORUN.out"
std_err_file: "/tmp/$AUTO_JOB_NAME.$AUTORUN.err"
max_run_alarm: 1
alarm_if_fail: 0
alarm_if_terminated: 0
send_notification: 1
/* ----------------- Test_c ----------------- */
insert_job: Test_c job_type: CMD
command: sleep 10
machine: machine3
owner: user3
permission:
date_conditions: 0
alarm_if_fail: 0
alarm_if_terminated: 0
/* ----------------- Test_d ----------------- */
insert_job: Test_d job_type: CMD
command: ls
machine: machine4
owner: user4
permission:
date_conditions: 0
alarm_if_fail: 1
alarm_if_terminated: 1
I’m executing the script I am getting the below results:
"insert_job","job_type","box_name","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","resources","profile","term_run_time","watch_file","watch_interval"
"Test_A","CMD","","sleep 3000","machine1","user1","1","","","","mo,tu,we,th,fr","",""06:00,08:00,10:00,12:00,14:00,16:00"","","","","1","",""
"Test_B","CMD","","echo","machine2","user2","0","","","","","","","","","","","",""
"Test_c","CMD","","sleep 10","machine3","user3","0","","","","","","","","","","","",""
"Test_d","CMD","","ls","machine4","user4","0","","","","","","","","","","","",""
If you see the output I am getting extra "" under start_times
column.
The output I am looking for is:
"insert_job","job_type","box_name","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","resources","profile","term_run_time","watch_file","watch_interval"
"Test_A","CMD","","sleep 3000","machine1","user1","1","","","","mo,tu,we,th,fr","","06:00,08:00,10:00,12:00,14:00,16:00","","","","1","",""
"Test_B","CMD","","echo","machine2","user2","0","","","","","","","","","","","",""
"Test_c","CMD","","sleep 10","machine3","user3","0","","","","","","","","","","","",""
"Test_d","CMD","","ls","machine4","user4","0","","","","","","","","","","","",""
What changes do I need to make to the script so it compensates the extra "" in the input data
You need to add gsub(/^"*|"*$/, "", val)
to your value extraction in the block below to strip leading or trailing double quotes from the value:
{
tag = val = $0
sub(/:.*/,"",tag)
sub(/[^:]+:[[:space:]]*/,"",val)
gsub(/^"*|"*$/, "", val)
tag2val[tag] = val
}
Here is your fixed script:
cat test2csv
#!/usr/bin/env -S awk -f
BEGIN {
OFS = ","
numTags = split("insert_job job_type box_name command machine owner date_conditions condition run_calendar exclude_calendar days_of_week run_window start_times start_mins resources profile term_run_time watch_file watch_interval",tags)
for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
tag = tags[tagNr]
printf "\"%s\"%s", tag, (tagNr<numTags ? OFS : ORS)
}
}
!NF || /^\/\*/ { next }
{ gsub(/^[[:space:]]+|[[:space:]]+$/,"") }
match($0,/[[:space:]]job_type:/) {
if ( jobNr++ ) {
prt()
delete tag2val
}
# save "insert_job" value
tag = substr($1,1,length($1)-1)
val = substr($0,length($1)+1,RSTART-(length($1)+2))
gsub(/^[[:space:]]+|[[:space:]]+$/,"",val)
tag2val[tag] = val
# update $0 to start with "job_type" to look like all other input
$0 = substr($0,RSTART+1)
}
{
tag = val = $0
sub(/:.*/,"",tag)
sub(/[^:]+:[[:space:]]*/,"",val)
# Delete leading or trailing double quotes
gsub(/^"*|"*$/, "", val)
tag2val[tag] = val
}
END { prt() }
function prt(tagNr,tag,val) {
for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
tag = tags[tagNr]
val = tag2val[tag]
printf "\"%s\"%s", val, (tagNr<numTags ? OFS : ORS)
}
}
Usage:
chmod +x test2csv
./test2csv test.txt