Search code examples
linuxbashshellawkscripting

Text file to csv script


We are using a script that takes in some data and converts it to a csv file, the script we are using is below:

!/bin/bash

  read -p "Enter /DIR/PATH/FILENAME where you wish to copy the data: " FILENAME


 cat test.txt |
    awk '
    BEGIN {
        OFS = ","
        numTags = split("insert_job job_type box_name command machine owner date_conditions condition run_calendar exclude_calendar days_of_week run_window start_times start_mins resources profile term_run_time watch_file watch_interval",tags)
        for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
            tag = tags[tagNr]
            printf "\"%s\"%s", tag, (tagNr<numTags ? OFS : ORS)
        }
    }

    !NF || /^\/\*/ { next }
    { gsub(/^[[:space:]]+|[[:space:]]+$/,"") }

    match($0,/[[:space:]]job_type:/) {
        if ( jobNr++ ) {
            prt()
            delete tag2val
        }

        # save "insert_job" value
        tag = substr($1,1,length($1)-1)
        val = substr($0,length($1)+1,RSTART-(length($1)+2))
        gsub(/^[[:space:]]+|[[:space:]]+$/,"",val)
        tag2val[tag] = val

        # update $0 to start with "job_type" to look like all other input
        $0 = substr($0,RSTART+1)
    }

    {
        tag = val = $0
        sub(/:.*/,"",tag)
        sub(/[^:]+:[[:space:]]*/,"",val)
        tag2val[tag] = val
    }

    END { prt() }

    function prt(    tagNr,tag,val) {
        for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
            tag = tags[tagNr]
            val = tag2val[tag]
            printf "\"%s\"%s", val, (tagNr<numTags ? OFS : ORS)
        }
    }
' > $FILENAME.csv

And below is the test data:


$ cat test.txt
/* ----------------- Test_A ----------------- */

insert_job: Test_A  job_type: CMD
command: sleep 3000
machine: machine1
owner: user1
permission:
date_conditions: 1
days_of_week: mo,tu,we,th,fr
start_times: “06:00,08:00,10:00,12:00,14:00,16:00”
term_run_time: 1
alarm_if_fail: 1
alarm_if_terminated: 1


/* ----------------- Test_B ----------------- */

insert_job: Test_B    job_type: CMD
command: echo
machine: machine2
owner: user2
permission:
date_conditions: 0
description: "Test"
std_out_file: "/tmp/$AUTO_JOB_NAME.$AUTORUN.out"
std_err_file: "/tmp/$AUTO_JOB_NAME.$AUTORUN.err"
max_run_alarm: 1
alarm_if_fail: 0
alarm_if_terminated: 0
send_notification: 1



/* ----------------- Test_c ----------------- */

insert_job: Test_c   job_type: CMD
command: sleep 10
machine: machine3
owner: user3
permission:
date_conditions: 0
alarm_if_fail: 0
alarm_if_terminated: 0


/* ----------------- Test_d ----------------- */

insert_job: Test_d   job_type: CMD
command: ls
machine: machine4
owner: user4
permission:
date_conditions: 0
alarm_if_fail: 1
alarm_if_terminated: 1



I’m executing the script I am getting the below results:


"insert_job","job_type","box_name","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","resources","profile","term_run_time","watch_file","watch_interval"
"Test_A","CMD","","sleep 3000","machine1","user1","1","","","","mo,tu,we,th,fr","",""06:00,08:00,10:00,12:00,14:00,16:00"","","","","1","",""
"Test_B","CMD","","echo","machine2","user2","0","","","","","","","","","","","",""
"Test_c","CMD","","sleep 10","machine3","user3","0","","","","","","","","","","","",""
"Test_d","CMD","","ls","machine4","user4","0","","","","","","","","","","","",""

If you see the output I am getting extra "" under start_times column.

The output I am looking for is:


"insert_job","job_type","box_name","command","machine","owner","date_conditions","condition","run_calendar","exclude_calendar","days_of_week","run_window","start_times","start_mins","resources","profile","term_run_time","watch_file","watch_interval"
"Test_A","CMD","","sleep 3000","machine1","user1","1","","","","mo,tu,we,th,fr","","06:00,08:00,10:00,12:00,14:00,16:00","","","","1","",""
"Test_B","CMD","","echo","machine2","user2","0","","","","","","","","","","","",""
"Test_c","CMD","","sleep 10","machine3","user3","0","","","","","","","","","","","",""
"Test_d","CMD","","ls","machine4","user4","0","","","","","","","","","","","",""

What changes do I need to make to the script so it compensates the extra "" in the input data


Solution

  • You need to add gsub(/^"*|"*$/, "", val) to your value extraction in the block below to strip leading or trailing double quotes from the value:

    {
        tag = val = $0
        sub(/:.*/,"",tag)
        sub(/[^:]+:[[:space:]]*/,"",val)
        gsub(/^"*|"*$/, "", val)
        tag2val[tag] = val
    }
    

    Here is your fixed script: cat test2csv

    #!/usr/bin/env -S awk -f
    
    BEGIN {
        OFS = ","
        numTags = split("insert_job job_type box_name command machine owner date_conditions condition run_calendar exclude_calendar days_of_week run_window start_times start_mins resources profile term_run_time watch_file watch_interval",tags)
        for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
            tag = tags[tagNr]
            printf "\"%s\"%s", tag, (tagNr<numTags ? OFS : ORS)
        }
    }
    
    !NF || /^\/\*/ { next }
    { gsub(/^[[:space:]]+|[[:space:]]+$/,"") }
    
    match($0,/[[:space:]]job_type:/) {
        if ( jobNr++ ) {
            prt()
            delete tag2val
        }
    
        # save "insert_job" value
        tag = substr($1,1,length($1)-1)
        val = substr($0,length($1)+1,RSTART-(length($1)+2))
        gsub(/^[[:space:]]+|[[:space:]]+$/,"",val)
        tag2val[tag] = val
    
        # update $0 to start with "job_type" to look like all other input
        $0 = substr($0,RSTART+1)
    }
    
    {
        tag = val = $0
        sub(/:.*/,"",tag)
        sub(/[^:]+:[[:space:]]*/,"",val)
        # Delete leading or trailing double quotes
        gsub(/^"*|"*$/, "", val)
        tag2val[tag] = val
    }
    
    END { prt() }
    
    function prt(tagNr,tag,val) {
        for ( tagNr=1; tagNr<=numTags; tagNr++ ) {
            tag = tags[tagNr]
            val = tag2val[tag]
            printf "\"%s\"%s", val, (tagNr<numTags ? OFS : ORS)
        }
    }
    

    Usage:

    chmod +x test2csv
    ./test2csv test.txt