I have a directory with a large number of files that I want to move into folders based on part of the file name. My list of files looks like this:
001-020-012B-B.nc
001-022-151-A.nc
001-023-022-PY-T1.nc.nc
001-096-016B-A.nc
I want to move the files I have into separate folders based on the first part of the file name (001-096-016B
, 001-023-022
, 001-022-151
). The first parts of the file name always have the same number of numbers and are always in 3 parts separated by an underscore '-'
.
The folder names are named like this \oe-xxxx\xxxx\xxxx\001-Disc-PED\020-Rotor-parts-1200
.
So for example, this file should be placed in the above folder, based on the folder name (the numbers):
001-020-012B-B.nc
File path divided into column to show where the above file has to be moved to:
(001)-Disc-PED\(020)-Rotor-parts-1200
.
Therefore:
(001)-Disc-PED\(020)-Rotor-parts-1200 (001)-(020)-012B-B.nc
This is what I have tried from looking online but it does not work: My thinking is I want to loop through the folders and look for matches.
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
first33CharsOfRoot = str(root[0:33])
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'(.{3})\s*$', str(cleanRoot5))
print(firstCharOfRoot==first3Char)
if(firstCharOfRoot == first3Char):
print("Hello")
for root in os.walk(destinationPath):
print(os.path.basename(root))
# if(os.path)
I realized that I should not look for the last 3 chars in the path, because it is the first (001) etc. Numbers that I need to look for in the beginning to find the first path that I need to go to.
EDIT:
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'^(?:[^\\]+\\\\){5}(\d+).*$', str(cleanRoot5))
secondCharOfRoot = re.findall(r'^(?:[^\\]+\\\\){6}(\d+).*$', str(cleanRoot5))
firstCharOfRootCleaned = ''.join(firstCharOfRoot)
secondCharOfRoot = ''.join(secondCharOfRoot)
cleanRoot6 = str(cleanRoot5).replace("(", "")
if(firstCharOfRootCleaned == str(first3Char) & secondCharOfRoot == str(last3Char)):
print("BINGOf")
# for root1 in os.walk(cleanRoot6):
There is an improved solution in the next section. But let's decompose the straightforward solution before.
First, get the complete list of subfolders.
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath, "**"), recursive=True)\
if os.path.isdir(f)]
Then, use a function on each of your file to find its matching folder, or a new filepath if it doesn't exist. I include this function called find_folder()
in the rest of the script:
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
# It will create and return a new directory if no directory matches
def find_folder(part1, part2):
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
return os.path.join(*matching_folder2)
# Whole new folder tree
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
os.makedirs(dest)
return dest
# Inside the already existing folder part "1"
dest = os.path.join(matching_folder2[0], part2)
os.makedirs(dest)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
In case you have a large number of files, it could be detrimental to "split and match" every folder at each iteration.
We can store the folder, found given a pattern, in a dictionary. The dictionary will be updated if a new pattern is given, else it will return the previously found folder.
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
# Global dictionary to store folder paths, relative to a pattern
found_pattern = dict()
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
def find_folder(part1, part2):
current_key = tuple([part1, part2])
if current_key in pattern_match:
# Already found previously.
# We just return the folder path, stored as the value.
return pattern_match[current_key]
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
dest = os.path.join(*matching_folder2)
# Update the dictionary
pattern_match[current_key] = dest
return dest
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
else:
dest = os.path.join(matching_folder2[0], part2)
# Update the dictionary
pattern_match[current_key] = dest
os.makedirs(dest, exist_ok = True)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
This updated solution makes it more efficient (especially when many files should share the same folder) and you could also make use of the dictionary later, if you save it.