Search code examples
python-3.xglob

How to iterate over multiple files by name within given range?


So I'm trying to iterate over multiple xml files from a library which contains more then 100k files, I need to list files by their 3 last digits. Expected result is a list of files named from 'asset-PD471090' to 'asset-PD471110' or 'asset-GT888185' to 'asset-GT888209', and so on.

My Code -

'''

import glob

strtid = input('From ID: ') # First file in range
seps = strtid[-3:]
endid = input('To ID: ') # Last file in range
eeps = endid[-3:] 
FileId = strtid[:5] # always same File Id for whole range

for name in glob.iglob('asset-' + FileId + [seps-eeps] + '.xml', recursive=True):
    print(name) # iterate over every file in given range and print file names.

''' The error I'm getting is

TypeError: unsupported operand type(s) for -: 'str' and 'str'

How to load a specific range of input files ?


Solution

  • As the error tells you: you try to use - on strings:

    strtid = input('From ID: ') # string
    seps = strtid[-3:]          # part of a string
    
    endid = input('To ID: ')    # string 
    eeps = endid[-3:]           # part of a string
    
    FileId = strtid[:5]         # also part of a string 
    
    # [seps-eeps]: trying to substract a string from a string:    
    for name in glob.iglob('asset-' + FileId + [seps-eeps] + '.xml', recursive=True):
    

    You can convert the string to a integer using int("1234") - won't help you much though, because then you only have one (wrong) number for your iglob.

    If you wanted to give them as glob-pattern you would need to encase them in stringdelimiters - and glob does not work that way with numberranges:

    • "[123-678]" would be one digit of 1,2,3,4,5,6,7,8 - not 123 up to 678

    However, you can test your files yourself:

    import os
    
    def get_files(directory, prefix, postfix, numbers):
        lp = len(prefix)       # your assets-GT
        li = len(postfix) + 4  # your id + ".xml"
        for root, dirs, files in os.walk(directory):
            for file in sorted(files): # sorted to get files in order, might not need it
                if int(file[lp:len(file)-li]) in numbers:
                    yield os.path.join(root,file)
    
    d = "test"
    prefix = "asset-GT"  # input("Basename: ")
    postfix = "185"      # input("Id: ")
    
    # create demo files to search into
    os.makedirs(d)
    for i in range(50,100):
        with open (os.path.join(d,f"{prefix}{i:03}{postfix}.xml"),"w") as f:
            f.write("")
    
    # search params        
    fromto = "75 92"     # input("From To (space seperated numbers): ")
    
    fr, to = map(int,fromto.strip().split()) 
    to += 1 # range upper limit is exclusive, so need to add 1 to include it
    
    all_searched = list(get_files("./test", prefix, postfix, range(fr,to)))
    print(*all_searched, sep="\n")
    

    Output:

    ./test/asset-GT075185.xml
    ./test/asset-GT076185.xml
    ./test/asset-GT077185.xml
    ./test/asset-GT078185.xml
    ./test/asset-GT079185.xml
    ./test/asset-GT080185.xml
    ./test/asset-GT081185.xml
    ./test/asset-GT082185.xml
    ./test/asset-GT083185.xml
    ./test/asset-GT084185.xml
    ./test/asset-GT085185.xml
    ./test/asset-GT086185.xml
    ./test/asset-GT087185.xml
    ./test/asset-GT088185.xml
    ./test/asset-GT089185.xml
    ./test/asset-GT090185.xml
    ./test/asset-GT091185.xml
    ./test/asset-GT092185.xml