Python File Traversal

I am trying to create a function that takes in a the name of a root file then traverses through the directory and returns a list like this.

[["folder1",[
    ["subfolder1",[
        "file1",
        "file2"
    ]],
    ["subfolder2",[
        "file3",
        "file4"
    ]]
],"file5","file6"]

Below is my attempt at the function:

def traverse(rootdir):
    names = []
    for cdirname, dirnames, filenames in os.walk(rootdir):
        # record path to all subdirectories first.
        for subdirname in dirnames:
            names.append([subdirname,traverse(os.path.join(cdirname, subdirname))])

        # record path to all filenames.
        for filename in filenames:
            names.append(os.path.join(cdirname, filename))

    return names

My problem is that I always end up getting duplications of the same files/folders being recorded with the function and that I the paths are always shown relative to the "rootdir" instead of just the names of the respective file/folder. How do I weed out the duplicates? Additionally how could I make it so that it's not the full path that gets recorded.

Solution

sorted is used to make directory come first. If you don't mind that order, just return names.

def traverse(rootdir):
    names = []
    dirs, files = [], []
    for filename in os.listdir(rootdir):
        filepath = os.path.join(rootdir, filename)
        if os.path.isdir(filepath):
            names.append([filename, traverse(filepath)])
        else:
            names.append(filename)
    return sorted(names, key=lambda x: (0, x[0]) if isinstance(x, list) else (1, x))

Another version that use os.walk:

def traverse(rootdir):
    names = []
    dir_to_names = {rootdir: names}
    for cdirname, dirnames, filenames in os.walk(rootdir):
        subnames = dir_to_names[cdirname]
        for subdirname in sorted(dirnames):
            subnames2 = dir_to_names[os.path.join(cdirname, subdirname)] = []
            subnames.append([subdirname, subnames2])
        for filename in sorted(filenames):
            subnames.append(filename)
    return names