I am trying to create a function that takes in a the name of a root file then traverses through the directory and returns a list like this.
[["folder1",[
["subfolder1",[
"file1",
"file2"
]],
["subfolder2",[
"file3",
"file4"
]]
],"file5","file6"]
Below is my attempt at the function:
def traverse(rootdir):
names = []
for cdirname, dirnames, filenames in os.walk(rootdir):
# record path to all subdirectories first.
for subdirname in dirnames:
names.append([subdirname,traverse(os.path.join(cdirname, subdirname))])
# record path to all filenames.
for filename in filenames:
names.append(os.path.join(cdirname, filename))
return names
My problem is that I always end up getting duplications of the same files/folders being recorded with the function and that I the paths are always shown relative to the "rootdir" instead of just the names of the respective file/folder. How do I weed out the duplicates? Additionally how could I make it so that it's not the full path that gets recorded.
sorted
is used to make directory come first. If you don't mind that order, just return names
.
def traverse(rootdir):
names = []
dirs, files = [], []
for filename in os.listdir(rootdir):
filepath = os.path.join(rootdir, filename)
if os.path.isdir(filepath):
names.append([filename, traverse(filepath)])
else:
names.append(filename)
return sorted(names, key=lambda x: (0, x[0]) if isinstance(x, list) else (1, x))
Another version that use os.walk
:
def traverse(rootdir):
names = []
dir_to_names = {rootdir: names}
for cdirname, dirnames, filenames in os.walk(rootdir):
subnames = dir_to_names[cdirname]
for subdirname in sorted(dirnames):
subnames2 = dir_to_names[os.path.join(cdirname, subdirname)] = []
subnames.append([subdirname, subnames2])
for filename in sorted(filenames):
subnames.append(filename)
return names