Search code examples
pythonpython-repython-os

I want to move a file based on part of the name to a folder with that name


I have a directory with a large number of files that I want to move into folders based on part of the file name. My list of files looks like this:

001-020-012B-B.nc
001-022-151-A.nc
001-023-022-PY-T1.nc.nc
001-096-016B-A.nc

I want to move the files I have into separate folders based on the first part of the file name (001-096-016B, 001-023-022, 001-022-151). The first parts of the file name always have the same number of numbers and are always in 3 parts separated by an underscore '-'.

The folder names are named like this \oe-xxxx\xxxx\xxxx\001-Disc-PED\020-Rotor-parts-1200.

So for example, this file should be placed in the above folder, based on the folder name (the numbers):

001-020-012B-B.nc

File path divided into column to show where the above file has to be moved to:
(001)-Disc-PED\(020)-Rotor-parts-1200.

Therefore:

(001)-Disc-PED\(020)-Rotor-parts-1200 (001)-(020)-012B-B.nc

This is what I have tried from looking online but it does not work: My thinking is I want to loop through the folders and look for matches.

import os
import glob
import itertools
import re

#Source file
sourcefile = r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'

#Seperation
dirs = glob.glob('*-*')

#Every file with file extension .nc
files = glob.glob('*.nc')

for root, dirs, files in os.walk(sourcefile):
    for file in files:
        if file.endswith(".nc"):

            first3Char = str(file[0:3])
            last3Char = str(file[4:7])

            for root in os.walk(destinationPath):

                first33CharsOfRoot = str(root[0:33])

                cleanRoot1 = str(root).replace("[", "")
                cleanRoot2 = str(cleanRoot1).replace("]", "")
                cleanRoot3 = str(cleanRoot2).replace(")", "")
                cleanRoot4 = str(cleanRoot3).replace("'", "")
                cleanRoot5 = str(cleanRoot4).replace(",", "")

                firstCharOfRoot = re.findall(r'(.{3})\s*$', str(cleanRoot5))

                print(firstCharOfRoot==first3Char)

                if(firstCharOfRoot == first3Char):
                    print("Hello")
                    for root in os.walk(destinationPath):
                        print(os.path.basename(root))

                     #   if(os.path)

I realized that I should not look for the last 3 chars in the path, because it is the first (001) etc. Numbers that I need to look for in the beginning to find the first path that I need to go to.

EDIT:

    import os
import glob
import itertools
import re

#Source file 
sourcefile = r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'

#Seperation
dirs = glob.glob('*-*')

#Every file with file extension .nc
files = glob.glob('*.nc')



for root, dirs, files in os.walk(sourcefile):
    for file in files:
        if file.endswith(".nc"):
            
            first3Char = str(file[0:3])
            last3Char = str(file[4:7])


            for root in os.walk(destinationPath):
                
                
            

                cleanRoot1 = str(root).replace("[", "")
                cleanRoot2 = str(cleanRoot1).replace("]", "")
                cleanRoot3 = str(cleanRoot2).replace(")", "")
                cleanRoot4 = str(cleanRoot3).replace("'", "")
                cleanRoot5 = str(cleanRoot4).replace(",", "")

                firstCharOfRoot = re.findall(r'^(?:[^\\]+\\\\){5}(\d+).*$', str(cleanRoot5))
                secondCharOfRoot = re.findall(r'^(?:[^\\]+\\\\){6}(\d+).*$', str(cleanRoot5))
                

                firstCharOfRootCleaned = ''.join(firstCharOfRoot)
                secondCharOfRoot = ''.join(secondCharOfRoot)


                cleanRoot6 = str(cleanRoot5).replace("(", "")

                if(firstCharOfRootCleaned == str(first3Char) & secondCharOfRoot == str(last3Char)):

                    print("BINGOf")
                  #  for root1 in os.walk(cleanRoot6):
                   

Solution

  • Solution

    There is an improved solution in the next section. But let's decompose the straightforward solution before.

    First, get the complete list of subfolders.

    all_folders_splitted = [os.path.split(f)\
        for f in glob.iglob(os.path.join(destinationPath, "**"), recursive=True)\
        if os.path.isdir(f)]
    

    Then, use a function on each of your file to find its matching folder, or a new filepath if it doesn't exist. I include this function called find_folder() in the rest of the script:

    import os
    import glob
    import shutil
    
    sourcefile= r'C:\Users\cah\Desktop\000Turning'
    destinationPath = r'C:\Users\cah\Desktop\08-CAM'
    
    all_folders_splitted = [os.path.split(f)\
    for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
    if os.path.isdir(f)]
    
    # It will create and return a new directory if no directory matches
    def find_folder(part1, part2):
        matching_folders1 = [folder for folder in all_folders_splitted\
                        if os.path.split(folder[0])[-1].startswith(part1)]
        matching_folder2 = None
        for matching_folder2 in matching_folders1:
            if matching_folder2[-1].startswith(part2):
                return os.path.join(*matching_folder2)
    
        # Whole new folder tree
        if matching_folder2 is None:
            dest = os.path.join(destinationPath, part1, part2)
            os.makedirs(dest)
            return dest
    
        # Inside the already existing folder part "1"
        dest = os.path.join(matching_folder2[0], part2)
        os.makedirs(dest)
        return dest
    
    # All the files you want to move
    files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
    
    for file in files_gen:
        # Split the first two "-"
        basename = os.path.basename(file)
        splitted = basename.split("-", 2)
    
        # Format the destination folder.
        # Creates it if necessary
        destination_folder = find_folder(splitted[0], splitted[1])
    
        # Copying the file 
        shutil.copy2(file, os.path.join(destination_folder, basename))
    

    Improved solution

    In case you have a large number of files, it could be detrimental to "split and match" every folder at each iteration.

    We can store the folder, found given a pattern, in a dictionary. The dictionary will be updated if a new pattern is given, else it will return the previously found folder.

    import os
    import glob
    import shutil
    
    sourcefile= r'C:\Users\cah\Desktop\000Turning'
    destinationPath = r'C:\Users\cah\Desktop\08-CAM'
    
    # Global dictionary to store folder paths, relative to a pattern
    found_pattern = dict()
    
    all_folders_splitted = [os.path.split(f)\
    for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
    if os.path.isdir(f)]
    
    
    def find_folder(part1, part2):
    
        current_key = tuple([part1, part2])
    
        if current_key in pattern_match:
            # Already found previously. 
            # We just return the folder path, stored as the value.
            return pattern_match[current_key]
    
        matching_folders1 = [folder for folder in all_folders_splitted\
                        if os.path.split(folder[0])[-1].startswith(part1)]
        matching_folder2 = None
    
        for matching_folder2 in matching_folders1:
            if matching_folder2[-1].startswith(part2):
                dest = os.path.join(*matching_folder2)
                # Update the dictionary
                pattern_match[current_key] = dest
                return dest
    
        if matching_folder2 is None:
            dest = os.path.join(destinationPath, part1, part2)
        else:
            dest = os.path.join(matching_folder2[0], part2)
        
        # Update the dictionary
        pattern_match[current_key] = dest
        
        os.makedirs(dest, exist_ok = True)
        return dest
    
    # All the files you want to move
    files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
    
    for file in files_gen:
        # Split the first two "-"
        basename = os.path.basename(file)
        splitted = basename.split("-", 2)
    
        # Format the destination folder.
        # Creates it if necessary
        destination_folder = find_folder(splitted[0], splitted[1])
    
        # Copying the file 
        shutil.copy2(file, os.path.join(destination_folder, basename))
    

    This updated solution makes it more efficient (especially when many files should share the same folder) and you could also make use of the dictionary later, if you save it.