I have this simple script to attempt to traverse the file system and read files line-by-line to match lines on a regex:
package main
import (
"bufio"
"fmt"
"io/ioutil"
"log"
"os"
"regexp"
"sync"
)
type FileWithLine struct{
Line int
Path string
}
var set = map[string]FileWithLine{}
var rgx = regexp.MustCompile("ErrId\\s*:\\s*\"[[:alnum:]]+\"");
func traverseDir(d string, wg *sync.WaitGroup){
fmt.Println("traversing dir:", d)
if d == ".git"{
return
}
wg.Add(1)
go func(wg *sync.WaitGroup){
defer wg.Done()
files, err := ioutil.ReadDir(d)
if err != nil {
log.Fatal(err)
}
for _, f := range files {
fmt.Println("we see file:", f.Name())
if f.IsDir() {
traverseDir(f.Name(), wg)
return
}
file, err := os.Open(f.Name())
if err != nil {
log.Fatalf("failed opening file: %s", err)
}
scanner := bufio.NewScanner(file)
scanner.Split(bufio.ScanLines)
for scanner.Scan() {
var line = scanner.Text()
if rgx.MatchString(line) {
fmt.Println("line matches:", line);
}
}
file.Close()
}
}(wg)
}
func main() {
var wg sync.WaitGroup
traverseDir(".", &wg)
fmt.Println("Main: Waiting for workers to finish")
wg.Wait()
fmt.Println("Main: Completed")
}
the problem is that it's exiting before it reads all the files, I get this output:
traversing dir: .
Main: Waiting for workers to finish
we see file: .git
traversing dir: .git
Main: Completed
but there are more files in the current directory, than just the .git folder. It just so happens that the .git folder is the first item in the current working dir and it exits right after that. Anyone know why my program is exciting early?
It is stopping processing because of these lines:
if f.IsDir() {
traverseDir(f.Name(), wg)
return
}
When it sees a directory, it goes in it and immediately returns, without processing the remaining files in the current directory. And when the first seen directory is ".git", since you handle it as an exception, the nested traverseDir
also returns.