I am wondering if somebody could help me re-write this in a more sensible and smarter way?
sed -e '1d; $d' <someinputfile> |
awk -F"\t" '{split($2,a,/-/); print $1","a[1]","a[2]","$3","$4","$5","$6}' |
sed -e "s/,/\",\"/g" |
sed 's/^/"/;s/$/"/' |
sed -e $'1i\\\"field_one","field_two","field_three","field_four","field_five","field_six","field_seven"'
It is possible to write the correct output already with awk and I assume there are much better ways to write this.
Shorter way? More efficient way? More correct way? POSIX compliant? GNU compliant?
If you can help please also try to explain the changes as I really want to understand "how" and "what is what" :)
Thanks!
What it does is:
If somebody wants to play with inputfile here is an example:
START 9 1997-07-27T13:37:01Z
X1 24087-27 Axgma8PYjc1yRJlUr41688 1997-07-27T13:09:00Z 9876 OK
X1 642-68 6nwPtLQTqAAKufH3ejoEeg 1997-07-27T14:31:00Z 9876 OK
X1 642-31 qfKH99UnxZTcp2AN8NNB21 1997-07-27T16:15:00Z 9876 OK
X1 642-24 PouJBByqUJkqhKHBynUesD 1997-07-27T16:15:00Z 9876 OK
X1 642-30 J7t2sJKKtcxWJr18I84A46 1997-07-27T16:15:00Z 9876 OK
X1 642-29 g7hPkNpUywvk6FvGqgpHsx 1997-07-27T16:15:00Z 9876 OK
X1 642-26 W2KM24xvmy0Q8cLV950tXq 1997-07-27T16:15:00Z 9876 OK
X1 642-25 dqu8jB5tUthIKevNAQXgld 1997-07-27T16:15:00Z 9876 OK
X1 753-32 Gh0kZkIJr8j6FSYljbpyyy 1997-07-27T16:15:00Z 9876 OK
X1 753-23 Jvl8LMh6SDHfgvLfJIHi5l 1997-07-27T16:15:00Z 9876 OK
X1 753-28 IZ83996cthjhZGYcAk97iJ 1997-07-27T16:15:00Z 9876 OK
X1 753-22 YJwokU0Dq6xiydkf3EDyxl 1997-07-27T16:15:00Z 9876 OK
X1 753-36 OZHOMirRKjA3LcXTbPJL31 1997-07-27T16:15:00Z 9876 OK
X1 753-34 LvMgT6ed1b1e3uwasGi48G 1997-07-27T16:15:00Z 9877 OK
X1 753-35 VJk4x8sTG1BJTnZYvgu6px 1997-07-27T16:15:00Z 9876 OK
X1 663-27 mkZXgTHKBjmAplrDeoQZXo 1997-07-27T16:15:00Z 9875 ERR
X1 f1K1PzQ9sp2QAv1AX0Zix4 1997-07-27T16:27:00Z 9875 ERR
DONE 69 3QXFXKQAFRSZXJLJ6JZ9NWMXR00B1V1J1FUMBQAA9DQSRCTZF8JXAWWSGHSDIPQ9
Thanks!
PS: Since I'm not sure if you will get the same output on your computer here is how it correctly looks for me when I run it and how I want it:
"field_one","field_two","field_three","field_four","field_five","field_six","field_seven"
"X1","24087","27","Axgma8PYjc1yRJlUr41688","1997-07-27T13:09:00Z","9876","OK"
"X1","642","68","6nwPtLQTqAAKufH3ejoEeg","1997-07-27T14:31:00Z","9876","OK"
"X1","642","31","qfKH99UnxZTcp2AN8NNB21","1997-07-27T16:15:00Z","9876","OK"
"X1","642","24","PouJBByqUJkqhKHBynUesD","1997-07-27T16:15:00Z","9876","OK"
"X1","642","30","J7t2sJKKtcxWJr18I84A46","1997-07-27T16:15:00Z","9876","OK"
"X1","642","29","g7hPkNpUywvk6FvGqgpHsx","1997-07-27T16:15:00Z","9876","OK"
"X1","642","26","W2KM24xvmy0Q8cLV950tXq","1997-07-27T16:15:00Z","9876","OK"
"X1","642","25","dqu8jB5tUthIKevNAQXgld","1997-07-27T16:15:00Z","9876","OK"
"X1","753","32","Gh0kZkIJr8j6FSYljbpyyy","1997-07-27T16:15:00Z","9876","OK"
"X1","753","23","Jvl8LMh6SDHfgvLfJIHi5l","1997-07-27T16:15:00Z","9876","OK"
"X1","753","28","IZ83996cthjhZGYcAk97iJ","1997-07-27T16:15:00Z","9876","OK"
"X1","753","22","YJwokU0Dq6xiydkf3EDyxl","1997-07-27T16:15:00Z","9876","OK"
"X1","753","36","OZHOMirRKjA3LcXTbPJL31","1997-07-27T16:15:00Z","9876","OK"
"X1","753","34","LvMgT6ed1b1e3uwasGi48G","1997-07-27T16:15:00Z","9877","OK"
"X1","753","35","VJk4x8sTG1BJTnZYvgu6px","1997-07-27T16:15:00Z","9876","OK"
"X1","663","27","mkZXgTHKBjmAplrDeoQZXo","1997-07-27T16:15:00Z","9875","ERR"
"X1","","","f1K1PzQ9sp2QAv1AX0Zix4","1997-07-27T16:27:00Z","9875","ERR"
One awk
idea:
awk '
BEGIN { FS="\t"
OFS="\",\"" # define output field delimiter as <doublequote> <comma> <doublequote>
# print header
print "\"field_one","field_two","field_three","field_four","field_five","field_six","field_seven\""
}
FNR>1 { if (prev) print prev
split($2,a,"-")
# reformat current line and save in variable "prev", to be printed on next pass; add <doublequote> on ends
prev= "\"" $1 OFS a[1] OFS a[2] OFS $3 OFS $4 OFS $5 OFS $6 "\""
}
' input.dat
This generates:
"field_one","field_two","field_three","field_four","field_five","field_six","field_seven"
"X1","24087","27","Axgma8PYjc1yRJlUr41688","1997-07-27T13:09:00Z","9876","OK"
"X1","642","68","6nwPtLQTqAAKufH3ejoEeg","1997-07-27T14:31:00Z","9876","OK"
"X1","642","31","qfKH99UnxZTcp2AN8NNB21","1997-07-27T16:15:00Z","9876","OK"
"X1","642","24","PouJBByqUJkqhKHBynUesD","1997-07-27T16:15:00Z","9876","OK"
"X1","642","30","J7t2sJKKtcxWJr18I84A46","1997-07-27T16:15:00Z","9876","OK"
"X1","642","29","g7hPkNpUywvk6FvGqgpHsx","1997-07-27T16:15:00Z","9876","OK"
"X1","642","26","W2KM24xvmy0Q8cLV950tXq","1997-07-27T16:15:00Z","9876","OK"
"X1","642","25","dqu8jB5tUthIKevNAQXgld","1997-07-27T16:15:00Z","9876","OK"
"X1","753","32","Gh0kZkIJr8j6FSYljbpyyy","1997-07-27T16:15:00Z","9876","OK"
"X1","753","23","Jvl8LMh6SDHfgvLfJIHi5l","1997-07-27T16:15:00Z","9876","OK"
"X1","753","28","IZ83996cthjhZGYcAk97iJ","1997-07-27T16:15:00Z","9876","OK"
"X1","753","22","YJwokU0Dq6xiydkf3EDyxl","1997-07-27T16:15:00Z","9876","OK"
"X1","753","36","OZHOMirRKjA3LcXTbPJL31","1997-07-27T16:15:00Z","9876","OK"
"X1","753","34","LvMgT6ed1b1e3uwasGi48G","1997-07-27T16:15:00Z","9877","OK"
"X1","753","35","VJk4x8sTG1BJTnZYvgu6px","1997-07-27T16:15:00Z","9876","OK"
"X1","663","27","mkZXgTHKBjmAplrDeoQZXo","1997-07-27T16:15:00Z","9875","ERR"
"X1","","","f1K1PzQ9sp2QAv1AX0Zix4","1997-07-27T16:27:00Z","9875","ERR"