Search code examples
pythondatasetreadfile

Using os.listdir() to read images from LFW dataset can only read images from the first file folder


LFW dataset put images in different folders based on names. I want to read and move all images to one folder. I used a for loop with os.listdir() function to read the file, but it only returns an image from the first folder in LFW dataset.

'''code is showing below the line'''

    import os
    from os.path import join as pjoin
    from skimage import io
    img_dir = 'new path'
    def load_data(data_dir): #dataset is the originalPath

        count = 0
        for guys in os.listdir(data_dir):
            print(guys)
            person_dir = pjoin(data_dir, guys)
            for i in os.listdir(person_dir):
                image_dir = pjoin(person_dir, i)
                img22 = io.imread(image_dir, mode='RGB')
                print(image_dir)
                io.imsave(pjoin(img_dir, i), img22)
                count = count+1
            print(person_dir)
            print(count)
            return count

    sourcepath = 'originalPath'
    load_data(sourcepath)

The expected result is all images from all folders of LFW dataset are read to the new path (not in any folder, only images.) The code runs with no error, but the count actually returns '1' and I can see in the new path, there's only 1 image, which was in the first folder of the LFW dataset.


Solution

  • One obvious problem is indentation of your return statement:

    '''code is showing below the line'''
    
        import os
        from os.path import join as pjoin
        from skimage import io
        img_dir = 'new path'
        def load_data(data_dir): #dataset is the originalPath
    
            count = 0
            for guys in os.listdir(data_dir):
                print(guys)
                person_dir = pjoin(data_dir, guys)
                for i in os.listdir(person_dir):
                    image_dir = pjoin(person_dir, i)
                    img22 = io.imread(image_dir, mode='RGB')
                    print(image_dir)
                    io.imsave(pjoin(img_dir, i), img22)
                    count = count+1
                print(person_dir)
            print(count)
            return count
    
        sourcepath = 'originalPath'
        load_data(sourcepath)
    

    Can you check and confirm if the first folder (guys) it is working for has more than one images.