Search code examples
bashsh

remove duplicate lines keep one before


i want remove duplicate lines contains "from unit" keep one before like this

input file:

 within unit  1
   9. BD (   2) C   3 - C   4        /***. BD*(   2) C   3 - C   4           14.88    0.25    
 173. LP (   2) O  11                /***. BD*(   1) C   3 - C   4           11.15    0.81 
 from unit  1 to unit  2
   2. BD (   1) C   1 - C   4        /171. LP*(   1) H   8                    8.16    0.66 
 ***. BD*(   2) C   3 - C   4        /171. LP*(   1) H   8                  204.76    0.07 
 from unit  1 to unit  3
 from unit  1 to unit  4
 172. LP (   1) O  11                /***. BD*(   1) N  53 - H  73           10.56    1.15    
 173. LP (   2) O  11                /***. BD*(   1) N  53 - H  73           15.67    0.74    
 from unit  1 to unit  5
 from unit  1 to unit  6
 172. LP (   1) O  11                /***. BD*(   1) O  84 - H  85           11.04    1.19    
 from unit  1 to unit  7
 from unit  2 to unit  1
 within unit  2
 from unit  2 to unit  3
 from unit  3 to unit  1
 from unit  3 to unit  2
  40. BD (   2) C  21 - N  22        /171. LP*(   1) H   8                   40.26    0.40  

output file:

 within unit  1
   9. BD (   2) C   3 - C   4        /***. BD*(   2) C   3 - C   4           14.88    0.25   
 173. LP (   2) O  11                /***. BD*(   1) C   3 - C   4           11.15    0.81  
 from unit  1 to unit  2
   2. BD (   1) C   1 - C   4        /171. LP*(   1) H   8                    8.16    0.66   
 ***. BD*(   2) C   3 - C   4        /171. LP*(   1) H   8                  204.76    0.07  
 from unit  1 to unit  4
 172. LP (   1) O  11                /***. BD*(   1) N  53 - H  73           10.56    1.15   
 173. LP (   2) O  11                /***. BD*(   1) N  53 - H  73           15.67    0.74  
 from unit  1 to unit  6
 172. LP (   1) O  11                /***. BD*(   1) O  84 - H  85           11.04    1.19  
 from unit  3 to unit  2
  40. BD (   2) C  21 - N  22        /171. LP*(   1) H   8                   40.26    0.40 

#!/bin/bash

declare -A m f

read_matrix() {
    local i=0
    local line
    local j
    # Ignore the first 2 lines containing size of the matrix

    while read -r line; do
        j=0
        # split on spaces
        for v in `echo $line`; do
            m[$i,$j]="$v"
            j=$((j+1))
        done
        i=$((i+1))
    done

 }

 read_matrix      < $1
echo "${1:m[@]}"
a=`awk 'END {print FNR}' $1`

       i=0
       for i in $((a-2)); do
           if [[ m[$i,0] == "from" ]]; then
             if [[ m[$((i+1)),0] != "from" ]]; then
             echo -n  "${m[$i,*]}" >> out.1
             #for ((j=0; j<=5; j++)); do  f[$i,j]=m[$i,j]; done
             #for ((j=0; j<=5; j++)); do unset m[$((i+1)),j]; done
             fi
             else
             for ((j=0; j<=5; j++)); do  f[$i,j]=m[$i,j]; done
             fi
            echo -n  "${f[$i,*]}" >> out.1
            i=$((i+1))
        done

Solution

  • Using GNU sed

    $ sed -E ':a;/(from|within) unit/{N;s/ *(from|within) unit[^\n]*\n( *(from|within) unit.*)/\2/;ba}' input_file
     within unit  1
       9. BD (   2) C   3 - C   4        /***. BD*(   2) C   3 - C   4           14.88    0.25
     173. LP (   2) O  11                /***. BD*(   1) C   3 - C   4           11.15    0.81
     from unit  1 to unit  2
       2. BD (   1) C   1 - C   4        /171. LP*(   1) H   8                    8.16    0.66
     ***. BD*(   2) C   3 - C   4        /171. LP*(   1) H   8                  204.76    0.07
     from unit  1 to unit  4
     172. LP (   1) O  11                /***. BD*(   1) N  53 - H  73           10.56    1.15
     173. LP (   2) O  11                /***. BD*(   1) N  53 - H  73           15.67    0.74
     from unit  1 to unit  6
     172. LP (   1) O  11                /***. BD*(   1) O  84 - H  85           11.04    1.19
     from unit  3 to unit  2
      40. BD (   2) C  21 - N  22        /171. LP*(   1) H   8                   40.26    0.40