rookie looking for help
I have a big file (2000+ rows, 1455 columns)
awk '{for (i=3; i<=NF; i++) if ($i<200) print} {next}' table.txt
This is what I'm working with now - but it iterates through rows(?) (starting at column 3 - sample 1) and prints the same row every time it finds a value less than 200 in that row instead of going straight to next row after printing.
awk '{for (i=3; i<=NF; i++) if ($i<200) print} {next}' table.txt
table.txt (small selection)
Gene | targ_bp | s_1 | s_2 | s_3 |
---|---|---|---|---|
GNB1 | 217 | 53 | 102 | 1121 |
GNB1 | 202 | 1112 | 96 | 1226 |
GNB1 | 163 | 1141 | 1162 | 1181 |
Output with current code:
| GNB1 | 217 | 53 | 102 | 1121 |
| GNB1 | 217 | 53 | 102 | 1121 |
| GNB1 | 202 | 1112 | 96 | 1226 |
Desired:
Gene | targ_bp | s_1 | s_2 |
---|---|---|---|
GNB1 | 217 | 53 | 102 |
GNB1 | 202 | 1112 | 96 |
Hope I am clear enough. 0=)
Use a 2-pass approach to identify the rows and columns that need to be printed in the first pass and then print them in the second pass, e.g. using any awk:
$ cat tst.awk
BEGIN {
OFS = "\t"
begInColNr = 3
}
NR == FNR {
if ( FNR == 1 ) {
outRowNrs[FNR]
for ( inColNr=1; inColNr<begInColNr; inColNr++ ) {
inColNrs[inColNr]
}
}
else {
for ( inColNr=begInColNr; inColNr<=NF; inColNr++ ) {
if ( $inColNr < 200 ) {
outRowNrs[FNR]
inColNrs[inColNr]
}
}
}
next
}
FNR in outRowNrs {
if ( FNR == 1 ) {
for ( inColNr=1; inColNr<=NF; inColNr++ ) {
if ( inColNr in inColNrs ) {
out2inColNrs[++numOutCols] = inColNr
}
}
}
for ( outColNr=1; outColNr<=numOutCols; outColNr++ ) {
inColNr = out2inColNrs[outColNr]
printf "%s%s", $inColNr, (outColNr<numOutCols ? OFS : ORS)
}
}
$ awk -f tst.awk table.txt table.txt
Gene targ_bp s_1 s_2
GNB1 217 53 102
GNB1 202 1112 96