Search code examples
gogoroutine

How to write a directory traversing program which can feedback the total number of subdirectories, files, etc?


Try to write a directory traversing program by goroutine and channel, but unable to get the needed results. Expect to get the number of total sub-directory, files count. But when I run the code below, it will stuck in "dirCount <-1". PS: is that possible to write such a program with infinite depth traversing

package main

import (
    "encoding/json"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"

    "github.com/gorilla/mux"
)

type DirectoryItem struct {
    Name  string `json:"name,omitemty"`
    IsDir bool   `json:"isDir,omitempty"`
    Size  int64  `json:"size,omitempty"`
}

type DirectoryInfo struct {
    Path string          `json:"path,omitemty"`
    Dirs []DirectoryItem `json:"dirs,omitempty"`
}

var dirItems []DirectoryItem
var dirInfo DirectoryInfo

func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
    fpath := "E:\\"

    query := req.URL.Query()
    path := query["path"][0]

    fpath = fpath + path

    dirInfo, _ := CheckEachItem(fpath)

    json.NewEncoder(w).Encode(dirInfo)
}

func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
    var items []DirectoryItem

    dir, err := ioutil.ReadDir(dirPath)

    if err != nil {
        return directory, err
    }

    for _, fi := range dir {
        if fi.IsDir() {
            items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})

        } else {
            items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
        }
    }
    directory = DirectoryInfo{Path: dirPath, Dirs: items}

    return directory, nil
}

func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
    query := req.URL.Query()

    path := query["path"][0]

    url := "http://localhost:8090/api/GetOneDirItems?path="

    url += path

    dirCount := make(chan int)
    fileCount := make(chan int)

    go Recr(url, dirCount, fileCount)

    //
    dirTotalCount := 0

    for i := range dirCount {
        dirTotalCount += i
    }

    fmt.Println(dirTotalCount)

}

func Recr(url string, dirCount chan int, fileCount chan int) {

    fmt.Println(url)

    resp, _ := http.Get(url)

    dirInfo = DirectoryInfo{}

    body, _ := ioutil.ReadAll(resp.Body)

    defer resp.Body.Close()

    json.Unmarshal([]byte(body), &dirInfo)

    for _, itm := range dirInfo.Dirs {
        fmt.Println("--")
        if itm.IsDir {
            newUrl := url + "/" + itm.Name

            //// looks like stuck in here
            dirCount <- 1
            go Recr(newUrl, dirCount, fileCount)
        } else {
            fileCount <- 1
        }
    }
}

func main() {
    router := mux.NewRouter()

    //#1 func one:
    //result sample:
    //{"path":"E:\\code","dirs":[{"name":"A","isDir":true},{"name":"B","isDir":false}]}
    router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")

    //#2 2nd api to invoke 1st api recursively
    //expected result
    //{"path":"E:\\code","dirCount":2, "fileCount":3]}
    router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")

    log.Fatal(http.ListenAndServe(":8090", router))

}

find some code example but not feedback the right number...

package main

import (
    "encoding/json"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"
    "os"
    "path/filepath"
    "sync"

    "github.com/gorilla/mux"
)

//!+1
var done = make(chan struct{})

func cancelled() bool {
    select {
    case <-done:
        return true
    default:
        return false
    }
}

//!-1

type DirectoryItem struct {
    Name  string `json:"name,omitemty"`
    IsDir bool   `json:"isDir,omitempty"`
    Size  int64  `json:"size,omitempty"`
}

type DirectoryInfo struct {
    Path string          `json:"path,omitemty"`
    Dirs []DirectoryItem `json:"dirs,omitempty"`
}

var dirItems []DirectoryItem
var dirInfo DirectoryInfo

func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
    fpath := "E:\\"

    query := req.URL.Query()
    path := query["path"][0]

    fpath = fpath + path

    dirInfo, _ := CheckEachItem(fpath)

    json.NewEncoder(w).Encode(dirInfo)
}

func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
    var items []DirectoryItem

    dir, err := ioutil.ReadDir(dirPath)

    if err != nil {
        return directory, err
    }

    for _, fi := range dir {
        if fi.IsDir() {
            items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})

        } else {
            items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
        }
    }
    directory = DirectoryInfo{Path: dirPath, Dirs: items}

    return directory, nil
}

func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
    query := req.URL.Query()

    path := query["path"][0]

    url := "http://localhost:8090/api/GetOneDirItems?path="

    url += path

    fpath := "E:\\"

    fpath = fpath + path

    dirInfo, _ := CheckEachItem(fpath)

    fileSizes := make(chan int64)
    dirCount := make(chan int, 100)
    var n sync.WaitGroup
    for _, item := range dirInfo.Dirs {

        n.Add(1)
        url = url + "/" + item.Name
        go Recr(url, &n, dirCount, fileSizes)
    }
    go func() {
        n.Wait()
        close(fileSizes)
        close(dirCount)
    }()

    // Print the results periodically.
    // tick := time.Tick(500 * time.Millisecond)
    var nfiles, ndirs, nbytes int64
loop:
    //!+3
    for {
        select {
        case <-done:
            // Drain fileSizes to allow existing goroutines to finish.
            for range fileSizes {
                // Do nothing.
            }

            return
        case size, ok := <-fileSizes:
            // ...
            //!-3
            if !ok {
                break loop // fileSizes was closed
            }
            nfiles++
            nbytes += size

        case _, ok := <-dirCount:
            // ...
            //!-3
            if !ok {
                break loop // dirCount was closed
            }
            ndirs++

            // case <-tick:
            // printDiskUsage(nfiles, ndirs, nbytes)
        }
    }
    printDiskUsage(nfiles, ndirs, nbytes) // final totals

}

func Recr(url string, n *sync.WaitGroup, dirCount chan<- int, fileSizes chan<- int64) {

    defer n.Done()

    resp, _ := http.Get(url)

    dirInfo = DirectoryInfo{}

    body, _ := ioutil.ReadAll(resp.Body)

    defer resp.Body.Close()

    json.Unmarshal([]byte(body), &dirInfo)

    for _, itm := range dirInfo.Dirs {

        if itm.IsDir {
            dirCount <- 1
            n.Add(1)

            newUrl := url + "/" + itm.Name

            go Recr(newUrl, n, dirCount, fileSizes)
        } else {
            fileSizes <- itm.Size
        }
    }

}

func main() {
    // Determine the initial directories.
    roots := os.Args[1:]
    if len(roots) == 0 {
        roots = []string{"."}
    }

    // API Services
    router := mux.NewRouter()

    router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")

    router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")

    log.Fatal(http.ListenAndServe(":8090", router))
}

func printDiskUsage(nfiles, ndirs, nbytes int64) {
    fmt.Printf("%d files  %.1f GB   %d dirs\n", nfiles, float64(nbytes)/1e9, ndirs)
}

// walkDir recursively walks the file tree rooted at dir
// and sends the size of each found file on fileSizes.
//!+4
func walkDir(dir string, n *sync.WaitGroup, fileSizes chan<- int64, dirCount chan<- int) {
    defer n.Done()
    if cancelled() {
        return
    }
    for _, entry := range dirents(dir) {
        // ...
        //!-4
        if entry.IsDir() {
            dirCount <- 1
            n.Add(1)
            subdir := filepath.Join(dir, entry.Name())
            go walkDir(subdir, n, fileSizes, dirCount)
        } else {
            fileSizes <- entry.Size()
        }
        //!+4
    }
}

//!-4

var sema = make(chan struct{}, 20) // concurrency-limiting counting semaphore

// dirents returns the entries of directory dir.
//!+5
func dirents(dir string) []os.FileInfo {
    select {
    case sema <- struct{}{}: // acquire token
    case <-done:
        return nil // cancelled
    }
    defer func() { <-sema }() // release token

    // ...read directory...
    //!-5

    f, err := os.Open(dir)
    if err != nil {
        fmt.Fprintf(os.Stderr, "du: %v\n", err)
        return nil
    }
    defer f.Close()

    entries, err := f.Readdir(0) // 0 => no limit; read all entries
    if err != nil {
        fmt.Fprintf(os.Stderr, "du: %v\n", err)
        // Don't return: Readdir may return partial results.
    }
    return entries
}


Solution

  • The problem here is the your program has no way of ending. Basically whenever the code recurses into another directory, you need to count that, and then when it finishes processing the directory, you need to push 1 to a done channel. When the count of directories recursed into == the number done, you can exit the channel select (that's the other missing part):

    package main
    
    import (
        "encoding/json"
        "fmt"
        "io/ioutil"
        "log"
        "net/http"
    
        "github.com/gorilla/mux"
    )
    
    type DirectoryItem struct {
        Name  string `json:"name,omitemty"`
        IsDir bool   `json:"isDir,omitempty"`
        Size  int64  `json:"size,omitempty"`
    }
    
    type DirectoryInfo struct {
        Path string          `json:"path,omitemty"`
        Dirs []DirectoryItem `json:"dirs,omitempty"`
    }
    
    var dirItems []DirectoryItem
    var dirInfo DirectoryInfo
    
    func GetOneDirItems(w http.ResponseWriter, req *http.Request) {
        fpath := "E:\\"
    
        query := req.URL.Query()
        path := query["path"][0]
    
        fpath = fpath + path
    
        dirInfo, err := CheckEachItem(fpath)
        if err != nil {
            panic(err)
        }
    
        json.NewEncoder(w).Encode(dirInfo)
    }
    
    func CheckEachItem(dirPath string) (directory DirectoryInfo, err error) {
        var items []DirectoryItem
    
        dir, err := ioutil.ReadDir(dirPath)
    
        if err != nil {
            return directory, err
        }
    
        for _, fi := range dir {
            if fi.IsDir() {
                items = append(items, DirectoryItem{Name: fi.Name(), IsDir: true, Size: 0})
    
            } else {
                items = append(items, DirectoryItem{Name: fi.Name(), IsDir: false, Size: fi.Size()})
            }
        }
        directory = DirectoryInfo{Path: dirPath, Dirs: items}
    
        return directory, nil
    }
    
    func CalcDirInfo(w http.ResponseWriter, req *http.Request) {
        query := req.URL.Query()
    
        path := query["path"][0]
    
        url := "http://localhost:8090/api/GetOneDirItems?path="
    
        url += path
    
        dirCount := make(chan int, 10)
        fileCount := make(chan int, 10)
        doneCount := make(chan int, 10)
    
        go Recr(url, dirCount, fileCount, doneCount)
    
        //
        dirTotalCount := 0
        doneTotalCount := 0
    out:
        for {
            select {
            case dir := <-dirCount:
                dirTotalCount += dir
                fmt.Printf("dirTotalCount=%d\n", dirTotalCount)
            case <-fileCount:
            case done := <-doneCount:
                doneTotalCount += done
                fmt.Printf("doneTotalCount=%d dirTotalCount=%d\n", doneTotalCount, dirTotalCount)
                if doneTotalCount == dirTotalCount+1 { // need -1 because of the root dir, which is not counted as a subdirectory
                    break out
                }
            }
        }
        fmt.Println("ALL DONE")
        fmt.Printf("TOTAL=%d\n", dirTotalCount)
    
    }
    
    func Recr(url string, dirCount chan int, fileCount chan int, doneCount chan int) {
    
        // fmt.Printf("url=%s\n", url)
    
        resp, _ := http.Get(url)
    
        dirInfo = DirectoryInfo{}
    
        body, _ := ioutil.ReadAll(resp.Body)
    
        defer resp.Body.Close()
    
        json.Unmarshal([]byte(body), &dirInfo)
        // fmt.Printf("dirInfo=%+v body=%s", dirInfo, string(body))
    
        for _, itm := range dirInfo.Dirs {
            if itm.IsDir {
                newUrl := url + "/" + itm.Name
    
                //// looks like stuck in here
                // fmt.Printf("pushing one dir from %s\n", url)
                dirCount <- 1
                go Recr(newUrl, dirCount, fileCount, doneCount)
            } else {
                // fmt.Println("pushing one file")
                fileCount <- 1
            }
        }
        doneCount <- 1
    }
    
    func main() {
        router := mux.NewRouter()
    
        //#1 func one:
        //result sample:
        //{"path":"E:\\code","dirs":[{"name":"A","isDir":true},{"name":"B","isDir":false}]}
        router.HandleFunc("/api/GetOneDirItems", GetOneDirItems).Methods("GET")
    
        //#2 2nd api to invoke 1st api recursively
        //expected result
        //{"path":"E:\\code","dirCount":2, "fileCount":3]}
        router.HandleFunc("/api/GetDirInfo", CalcDirInfo).Methods("GET")
    
        log.Fatal(http.ListenAndServe(":8090", router))
    
    }