Search code examples
pythonfilesystemsfile-traversal

Python File Traversal


I am trying to create a function that takes in a the name of a root file then traverses through the directory and returns a list like this.

[["folder1",[
    ["subfolder1",[
        "file1",
        "file2"
    ]],
    ["subfolder2",[
        "file3",
        "file4"
    ]]
],"file5","file6"]

Below is my attempt at the function:

def traverse(rootdir):
    names = []
    for cdirname, dirnames, filenames in os.walk(rootdir):
        # record path to all subdirectories first.
        for subdirname in dirnames:
            names.append([subdirname,traverse(os.path.join(cdirname, subdirname))])

        # record path to all filenames.
        for filename in filenames:
            names.append(os.path.join(cdirname, filename))

    return names

My problem is that I always end up getting duplications of the same files/folders being recorded with the function and that I the paths are always shown relative to the "rootdir" instead of just the names of the respective file/folder. How do I weed out the duplicates? Additionally how could I make it so that it's not the full path that gets recorded.


Solution

  • sorted is used to make directory come first. If you don't mind that order, just return names.

    def traverse(rootdir):
        names = []
        dirs, files = [], []
        for filename in os.listdir(rootdir):
            filepath = os.path.join(rootdir, filename)
            if os.path.isdir(filepath):
                names.append([filename, traverse(filepath)])
            else:
                names.append(filename)
        return sorted(names, key=lambda x: (0, x[0]) if isinstance(x, list) else (1, x))
    

    Another version that use os.walk:

    def traverse(rootdir):
        names = []
        dir_to_names = {rootdir: names}
        for cdirname, dirnames, filenames in os.walk(rootdir):
            subnames = dir_to_names[cdirname]
            for subdirname in sorted(dirnames):
                subnames2 = dir_to_names[os.path.join(cdirname, subdirname)] = []
                subnames.append([subdirname, subnames2])
            for filename in sorted(filenames):
                subnames.append(filename)
        return names