Search code examples
regexawkfindreplacels

Replace arbitrary number of spaces with '#' (Awk for >1 spaces)


I'm trying to:

  • Taking a file/directory listing, and replacing all spaces >1 (NOT \t) with '#' for all files modified within the last 30 minutes.

Example output of: find / -mmin -30 -ls

310116371    0 -r--r--r--   1 root     root            0 Jan 14 0814 /proc/4732/wchan
310116373    0 -r--r--r--   1 root     root            0 Jan 14 0814 /proc/4732/schedstat
310116374    0 -r--r--r--   1 root     root            0 Jan 14 0814 /proc/4732/cpuset
310116383    0 -r--r--r--   1 root     root            0 Jan 14 0814 /proc/4732/oom_score
310116384    0 -rw-r--r--   1 root     root            0 Jan 14 0814 /proc/4732/oom_adj
310116382    0 -rw-r--r--   1 root     root            0 Jan 14 0814 /proc/4732/loginuid
310116416    0 -r--------   1 root     root            0 Jan 14 0814 /proc/4732/limits
310116418    0 -r--r--r--   1 root     root            0 Jan 14 0814 /proc/4732/io

What I want:

310116371#0#-r--r--r--#1#root#root#0#Jan 14 0814#/proc/4732/wchan

Specifically, I want to use {awk, sed, tr} to replace spaces where the amount of space is greater than 1. The only problem is, there is a single space after the timestamp arg for the directory listing...

Is there a computational method which exists today which can to this?

  • Log file being parsed consists of ~26k entries
  • Output being pasted into an .XLS file

What I've tried:

find / -mmin -5 -ls |  awk '{gsub(/s+/,"#",$0); print;}'
find / -mmin -5 -ls |  awk '{gsub(/[' ']+/,"#")}1'
find / -mmin -5 -ls |  awk '{gsub(/["  "]+/,"#")}1'
find / -mmin -5 -ls | sed "s/^ *//;s/ *$//;s/ \{1,\}/#/g"
find / -mmin -5 -ls |  awk -D '{gsub([ +],"#",$0); print;}' 
find / -mmin -5 -ls |  awk '{gsub(/\t/,"#",$0); print;}'

The problem: - The output of find / -mmin -5 -ls is not {tab, comma} delimited by default


Any suggestions about where am I going wrong?


Solution

  • This does the trick for me awk 'gsub(/\s+/,"#")':

    $ awk 'gsub(/\s+/,"#")' file
    310116371#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/wchan
    310116373#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/schedstat
    310116374#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/cpuset
    310116383#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/oom_score
    310116384#0#-rw-r--r--#1#root#root#0#Jan#14#0814#/proc/4732/oom_adj
    310116382#0#-rw-r--r--#1#root#root#0#Jan#14#0814#/proc/4732/loginuid
    310116416#0#-r--------#1#root#root#0#Jan#14#0814#/proc/4732/limits
    310116418#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/io
    

    Or awk 'gsub(/\s{2,}/,"#")' for:

    Specifically, I want to use {awk, sed, tr} to replace spaces where the amount of space is greater than 1 (so, I can preserve the timestamp arg)

    $ awk 'gsub(/\s{2,}/,"#")' file
    310116371#0 -r--r--r--#1 root#root#0 Jan 14 0814 /proc/4732/wchan
    310116373#0 -r--r--r--#1 root#root#0 Jan 14 0814 /proc/4732/schedstat
    310116374#0 -r--r--r--#1 root#root#0 Jan 14 0814 /proc/4732/cpuset
    310116383#0 -r--r--r--#1 root#root#0 Jan 14 0814 /proc/4732/oom_score
    310116384#0 -rw-r--r--#1 root#root#0 Jan 14 0814 /proc/4732/oom_adj
    310116382#0 -rw-r--r--#1 root#root#0 Jan 14 0814 /proc/4732/loginuid
    310116416#0 -r--------#1 root#root#0 Jan 14 0814 /proc/4732/limits
    310116418#0 -r--r--r--#1 root#root#0 Jan 14 0814 /proc/4732/io
    
    # Single spacing
    $ awk 'gsub(/\s{2,}/," ")' file
    310116371 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/wchan
    310116373 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/schedstat
    310116374 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/cpuset
    310116383 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/oom_score
    310116384 0 -rw-r--r-- 1 root root 0 Jan 14 0814 /proc/4732/oom_adj
    310116382 0 -rw-r--r-- 1 root root 0 Jan 14 0814 /proc/4732/loginuid
    310116416 0 -r-------- 1 root root 0 Jan 14 0814 /proc/4732/limits
    310116418 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/io
    

    Edit:

    How about just setting the OFS variable:

    # Hash seperated
    $ awk 'BEGIN{OFS="#"}{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11}' file
    310116371#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/wchan
    310116373#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/schedstat
    310116374#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/cpuset
    310116383#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/oom_score
    310116384#0#-rw-r--r--#1#root#root#0#Jan#14#0814#/proc/4732/oom_adj
    310116382#0#-rw-r--r--#1#root#root#0#Jan#14#0814#/proc/4732/loginuid
    310116416#0#-r--------#1#root#root#0#Jan#14#0814#/proc/4732/limits
    310116418#0#-r--r--r--#1#root#root#0#Jan#14#0814#/proc/4732/io
    
    # Hash sperated accounting for the spaces in the date
    $ awk 'BEGIN{OFS="#"}{print $1,$2,$3,$4,$5,$6,$7,$8" "$9" "$10,$11}' file
    310116371#0#-r--r--r--#1#root#root#0#Jan 14 0814#/proc/4732/wchan
    310116373#0#-r--r--r--#1#root#root#0#Jan 14 0814#/proc/4732/schedstat
    310116374#0#-r--r--r--#1#root#root#0#Jan 14 0814#/proc/4732/cpuset
    310116383#0#-r--r--r--#1#root#root#0#Jan 14 0814#/proc/4732/oom_score
    310116384#0#-rw-r--r--#1#root#root#0#Jan 14 0814#/proc/4732/oom_adj
    310116382#0#-rw-r--r--#1#root#root#0#Jan 14 0814#/proc/4732/loginuid
    310116416#0#-r--------#1#root#root#0#Jan 14 0814#/proc/4732/limits
    310116418#0#-r--r--r--#1#root#root#0#Jan 14 0814#/proc/4732/io
    
    # Single space sperated 
    $ awk 'BEGIN{OFS=" "}{print $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11}' file
    310116371 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/wchan
    310116373 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/schedstat
    310116374 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/cpuset
    310116383 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/oom_score
    310116384 0 -rw-r--r-- 1 root root 0 Jan 14 0814 /proc/4732/oom_adj
    310116382 0 -rw-r--r-- 1 root root 0 Jan 14 0814 /proc/4732/loginuid
    310116416 0 -r-------- 1 root root 0 Jan 14 0814 /proc/4732/limits
    310116418 0 -r--r--r-- 1 root root 0 Jan 14 0814 /proc/4732/io