Search code examples
pythonsortingpython-os

Python Custom sort files read from directory


I have a directory with the following structure :

Main directory:
|--2001
   |--200101
      |--feed_013_01.zip
      |--feed_restr_013_01.zip
      |--feed_013_04.zip
      |--feed_restr_013_04.zip
      ...
      |--feed_013_30.zip
      |--feed_restr_013_30.zip
...
|--2021
   |--202101
      |--feed_013_01.zip
      |--feed_restr_013_01.zip
      |--feed_013_04.zip
      |--feed_restr_013_04.zip
      ...
      |--feed_013_30.zip
      |--feed_restr_013_30.zip

I need to read and sort the zip files in order:

feed_restr_013_30.zip, feed_013_30.zip.....feed_restr_013_01.zip, feed_013_01.zip

I am currently doing something like this :

def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    return [atoi(c) for c in re.split(r'(\d+)', text)]

for path, subdirs, files in os.walk(directory):
    subdirs.sort(key=natural_keys)
    subdirs.reverse()
    files.sort(key=natural_keys)
    files.reverse()

It takes all the "restr" files first and I am getting the list as :

feed_restr_013_30.zip,feed_restr_013_01.zip.....feed_013_30.zip, feed_013_01.zip

UPDATE

I was able to solve this using buran's and SCKU's answer coupled with my existing logic

def atoi(text):
    return int(text) if text.isdigit() else text

def parse(fname):
    try:
        prefix, *middle, n1, n2 = fname.split('_')
    except:
        prefix, *middle, n1 = fname.split('_')
        n2 = ''
    return (prefix, n1, [atoi(c) for c in re.split(r'(\d+)',n2)], ''.join(middle))

def get_Files(self, directory, source, keywords):
    file_paths = []
    for path, subdirs, files in os.walk(directory):
        for file in files:
            file_name = os.path.join(path, file)
            file_paths.append(file_name)
    return file_paths

files = get_Files(directory, source, keywords)
files.sort(key=parse, reverse=True)

Solution

  • files = ['feed_013_01.zip', 'feed_restr_013_01.zip',
            'feed_013_04.zip', 'feed_restr_013_04.zip', 
            'feed_013_30.zip', 'feed_restr_013_30.zip']
    
    def parse(fname):
        prefix, *middle, n1, n2 = fname.split('_')
        return (prefix, int(n1), int(n2), ''.join(middle))
    
    files.sort(key=parse, reverse=True)
    print(files)
    

    output

    ['feed_restr_013_30.zip', 'feed_013_30.zip', 'feed_restr_013_04.zip', 'feed_013_04.zip',
     'feed_restr_013_01.zip', 'feed_013_01.zip']