Search code examples
recursionrusttreehierarchyhierarchical

How to convert a flat list of directory paths to hierarchical struct in Rust?


My input is a flat list of filesystem paths that are all sub-directories (or file therein) of a single top-level directory.

My ultimate output should be:

  1. A textual hierarchical display of the paths, like that of unix tree command.
  2. A hierarchical JSON serialization of the paths with matching logical structure to (1)

I have created an intermediate data structure, which is a self-referencing struct Dir that has a name and a vector of Box'ed child struct Dir.

I can successfully use Dir to represent an arbitrary directory tree as shown in the output below.

I was thinking to use a stack to process the list, adding a Dir for each sub-directory and popping when ascending, but I can't seem to make it work with Rust the way it would with C or other languages. I get compiler errors no matter what I try.

How can I turn the flat list into a Dir and keep the compiler happy? Or alternatively, how to achieve (1) and (2) in a different way?

Code:

// A type to represent a path, split into its component parts
#[derive(Debug)]
struct Path {
    parts: Vec<String>,
}
impl Path {
    pub fn new(path: &str) -> Path {
        Path {
            parts: path.to_string().split("/").map(|s| s.to_string()).collect(),
        }
    }
}

// A recursive type to represent a directory tree.
// Simplification: If it has children, it is considered
// a directory, else considered a file.
#[derive(Debug)]
struct Dir {
    name: String,
    children: Vec<Box<Dir>>,
}

impl Dir {
    fn new(name: &str) -> Dir {
        Dir {
            name: name.to_string(),
            children: Vec::<Box<Dir>>::new(),
        }
    }

    fn has_child(&self, name: &str) -> bool {
        for c in self.children.iter() {
            if c.name == name {
                return true;
            }
        }
        false
    }

    fn add_child<T>(mut self, leaf: T) -> Self
    where
        T: Into<Dir>,
    {
        self.children.push(Box::new(leaf.into()));
        self
    }
}

fn dir(val: &str) -> Dir {
    Dir::new(val)
}

fn main() {
    // Form our INPUT:  a list of paths.
    let paths = vec![
        Path::new("root/child1/grandchild1.txt"),
        Path::new("root/child1/grandchild2.json"),
        Path::new("root/child2/grandchild3.pdf"),
        Path::new("root/child3"),
    ];
    println!("Input Paths:\n{:#?}\n", paths);

    // Transformation:
    // I need an algorithm here that converts the list of paths
    // above to a recursive struct (tree) below.
    // ie: paths --> dir
    let top = dir("root");
    let mut cwd = &top;
    for p in paths.iter() {
        for part in p.parts.iter() {
            if !cwd.has_child(part) {
                // cwd.add_child(dir(part));
                // cwd = &cwd.children[cwd.children.len() - 1];
            }
        }
    }

    // Intermediate Representation:
    // The above transformation should result in the following
    // hierarchical structure.
    let top = dir("root")
        .add_child(
            dir("child1")
                .add_child(dir("grandchild1.txt"))
                .add_child(dir("grandchild2.json")),
        )
        .add_child(dir("child2").add_child(dir("grandchild3.pdf")))
        .add_child(dir("child3"));
    println!("Intermediate Representation of Dirs:\n{:#?}\n\nOutput Tree Format:\n", top);

    // Output:  textual `tree` format
    print_dir(&top, 0);
}

// A function to print a Dir in format similar to unix `tree` command.
fn print_dir(dir: &Dir, depth: u32) {
    if depth == 0 {
        println!("{}", dir.name);
    } else {
        println!(
            "{:indent$}{} {}",
            "",
            "└──",
            dir.name,
            indent = ((depth as usize) - 1) * 4
        );
    }

    for child in dir.children.iter() {
        print_dir(child, depth + 1)
    }
}

Output:

$ ./target/debug/rust-tree                  
Input Paths:
[
    Path {
        parts: [
            "root",
            "child1",
            "grandchild1.txt",
        ],
    },
    Path {
        parts: [
            "root",
            "child1",
            "grandchild2.json",
        ],
    },
    Path {
        parts: [
            "root",
            "child2",
            "grandchild3.pdf",
        ],
    },
    Path {
        parts: [
            "root",
            "child3",
        ],
    },
]

Intermediate Representation of Dirs:
Dir {
    name: "root",
    children: [
        Dir {
            name: "child1",
            children: [
                Dir {
                    name: "grandchild1.txt",
                    children: [],
                },
                Dir {
                    name: "grandchild2.json",
                    children: [],
                },
            ],
        },
        Dir {
            name: "child2",
            children: [
                Dir {
                    name: "grandchild3.pdf",
                    children: [],
                },
            ],
        },
        Dir {
            name: "child3",
            children: [],
        },
    ],
}

Output Tree Format:

root
└── child1
    └── grandchild1.txt
    └── grandchild2.json
└── child2
    └── grandchild3.pdf
└── child3

Solution

  • Since someone deleted my previous answer with a link to the working code, I will post the full working code here.

    // A type to represent a path, split into its component parts
    #[derive(Debug)]
    struct Path {
        parts: Vec<String>,
    }
    impl Path {
        pub fn new(path: &str) -> Path {
            Path {
                parts: path.to_string().split("/").map(|s| s.to_string()).collect(),
            }
        }
    }
    
    // A recursive type to represent a directory tree.
    // Simplification: If it has children, it is considered
    // a directory, else considered a file.
    #[derive(Debug, Clone)]
    struct Dir {
        name: String,
        children: Vec<Box<Dir>>,
    }
    
    impl Dir {
        fn new(name: &str) -> Dir {
            Dir {
                name: name.to_string(),
                children: Vec::<Box<Dir>>::new(),
            }
        }
    
        fn find_child(&mut self, name: &str) -> Option<&mut Dir> {
            for c in self.children.iter_mut() {
                if c.name == name {
                    return Some(c);
                }
            }
            None
        }
    
        fn add_child<T>(&mut self, leaf: T) -> &mut Self
        where
            T: Into<Dir>,
        {
            self.children.push(Box::new(leaf.into()));
            self
        }
    }
    
    fn dir(val: &str) -> Dir {
        Dir::new(val)
    }
    
    fn main() {
        // Form our INPUT:  a list of paths.
        let paths = vec![
            Path::new("child1/grandchild1.txt"),
            Path::new("child1/grandchild2.json"),
            Path::new("child2/grandchild3.pdf"),
            Path::new("child3"),
        ];
        println!("Input Paths:\n{:#?}\n", paths);
    
        // Transformation:
        // A recursive algorithm that converts the list of paths
        // above to Dir (tree) below.
        // ie: paths --> dir
        let mut top = dir("root");
        for path in paths.iter() {
            build_tree(&mut top, &path.parts, 0);
        }
    
        println!(
            "Intermediate Representation of Dirs:\n{:#?}\n\nOutput Tree Format:\n",
            top
        );
    
        // Output:  textual `tree` format
        print_dir(&top, 0);
    }
    
    fn build_tree(node: &mut Dir, parts: &Vec<String>, depth: usize) {
        if depth < parts.len() {
            let item = &parts[depth];
    
            let mut dir = match node.find_child(&item) {
                Some(d) => d,
                None => {
                    let d = Dir::new(&item);
                    node.add_child(d);
                    match node.find_child(&item) {
                        Some(d2) => d2,
                        None => panic!("Got here!"),
                    }
                }
            };
            build_tree(&mut dir, parts, depth + 1);
        }
    }
    
    // A function to print a Dir in format similar to unix `tree` command.
    fn print_dir(dir: &Dir, depth: u32) {
        if depth == 0 {
            println!("{}", dir.name);
        } else {
            println!(
                "{:indent$}{} {}",
                "",
                "└──",
                dir.name,
                indent = ((depth as usize) - 1) * 4
            );
        }
    
        for child in dir.children.iter() {
            print_dir(child, depth + 1)
        }
    }