I wrote a function to get the total size and total files/subdirs of the entire disk or specific folder. The recursive function was very simple:
# Make the python dirSize function a non recursive one:
from pathlib import Path
obj = {}
def dirSize(dir: Path, size = 0, files = 0, dirs = 0):
for sub_dir in dir.glob('*'):
if sub_dir.is_dir():
dirs += 1
res_size, res_files, res_dirs = dirSize(sub_dir)
size += res_size
files += res_files
dirs += res_dirs
elif sub_dir.is_file():
files += 1
size += sub_dir.stat().st_size
obj[dir.as_posix()] = {
'size': size,
'files': files,
'dirs': dirs,
}
return size, files, dirs
dirSize(Path('~/Documents').expanduser())
#Line above is for tests. Final code should run in elevated cmd:
#dirSize(Path('C:/'))
with open(Path('~/Desktop/results.tsv').expanduser().as_posix(), 'w') as file:
file.write('Directory\tBytes\tFiles\tDirs\n')
for key in sorted(obj):
dir = obj[key]
file.write(key)
file.write('\t')
file.write(str(dir['size']))
file.write('\t')
file.write(str(dir['files']))
file.write('\t')
file.write(str(dir['dirs']))
file.write('\n')
I tried to use AI to convert it to a non recursive function, but AI seems to ignore these lines are a bottom up sum, not a simple sum:
size += res_size
files += res_files
dirs += res_dirs
So I tried a stack loop, like in the Dijkstra algorithm, but it turns up that I couldn't find a simple solution to make the bottom up sum. Alternatively I could add level/parent/child properties to the obj variable to make a bottom up sum after the queue processing. But this seems very cumbersome. Is there a simpler way to achieve this?
Replying questions asked below:
Based on answer from @awarrier99 I came up with this version that simplifies the logic a bit. I hope this helps future researchers:
def dirSizeIterative(dir: Path):
dirs = [(dir, {'size': 0, 'files': 0, 'dirs': 0}, None)]
i = 0
#step 1, read dir/files info and add to array
while i < len(dirs):
dir, info, _ = dirs[i]
i += 1
for sub_dir in dir.glob('*'):
if sub_dir.is_dir():
info['dirs'] += 1
dirs.append((sub_dir, {'size': 0, 'files': 0, 'dirs': 0}, info))
elif sub_dir.is_file():
info['files'] += 1
info['size'] += sub_dir.stat().st_size
#step 2, bottom up sum using the parent
for _, info, parent in dirs[-1:0:-1]:
parent['size'] += info['size']
parent['files'] += info['files']
parent['dirs'] += info['dirs']
return {dir.as_posix(): info for dir, info, _ in dirs}