Search code examples
pythonpython-2.7unicodeglobpython-os

How to rename/replace a particular keyword with unicode character for all files and the associated folders?


I have the following files and subdirectories in a directory ('input_folder') and I would like to change the name of all the files with '.dat' extension and all the folders that have a particular keyword (e.g., ABC) with a Unicode character. A MWE is given below:

import os
import random
import errno    
#--------------------------------------
# Create random folders and files

# tzot's forced directory create hack https://stackoverflow.com/a/600612/4576447
def mkdir_p(path):
    try:
        os.makedirs(path)
    except OSError as exc:  # Python >2.5
        if exc.errno == errno.EEXIST and os.path.isdir(path):
            pass
        else:
            raise


if not os.path.isdir('./input_folder'):
    os.makedirs('input_folder')
for i in range(10):
    mkdir_p('./input_folder/folder_ABC_' + str(random.randint(100,999)))


for root, dirs, files in os.walk('./input_folder'):
    for dir in dirs:
        result = open(os.path.join(root,dir) + '/ABC ' + str(random.randint(100,999)) + '.dat','w')
        result = open(os.path.join(root,dir) + '/XYZ-ABC ' + str(random.randint(100,999)) + '.dat','w')

#--------------------------------------
# Main rename code

for root, dirs, files in os.walk('./input_folder'):
    for file in files:  
        if file.endswith((".dat")):
            os.rename(file, file.replace('ABC', u'\u2714'.encode('utf-8')))

This MWE gives the following error:

os.rename(file, file.replace('ABC', u'\u2714'.encode('utf-8')))
WindowsError: [Error 2] The system cannot find the file specified

How to correctly rename all the files and folders that has ABC with a unioode character in Python 2.7?


Solution

  • There are at least five issues:

    1. When dealing with Unicode, use it everywhere. os.walk will return Unicode filenames if passed a Unicode path. from __future__ import unicode_literals will default strings to Unicode.
    2. When opening files, close them. You'll run into problems later when renaming. result still exists and has a reference to the last file opened.
    3. As mentioned in a comment, use os.path.join on the root and the file for both the before and after name.
    4. Use os.walk with topdown=False. This will process the leaf nodes first, so the directory tree isn't corrupted (and keeping root and dirs valid) while traversing it.
    5. Rename the files first, then the directories, again to not corrupt the directory tree while traversing it.

    Result:

    from __future__ import unicode_literals
    
    # Skipping unchanged code...
    
    for root, dirs, files in os.walk('./input_folder'):
        for dir in dirs:
            # One way to ensure the file is closed.
            with open(os.path.join(root,dir) + '/ABC ' + str(random.randint(100,999)) + '.dat','w'):
                pass
            with open(os.path.join(root,dir) + '/XYZ-ABC ' + str(random.randint(100,999)) + '.dat','w'):
                pass
    
    #--------------------------------------
    # Main rename code
    
    for root, dirs, files in os.walk('./input_folder',topdown=False):
        for file in files:  
            if file.endswith((".dat")):
                # Generate the full file path names.
                filename1 = os.path.join(root,file)
                filename2 = os.path.join(root,file.replace('ABC', '\u2714'))
                os.rename(filename1,filename2)
        for d in dirs:  
            # Generate the full directory path names.
            dirname1 = os.path.join(root,d)
            dirname2 = os.path.join(root,d.replace('ABC', '\u2714'))
            os.rename(dirname1,dirname2)