So I'm trying to iterate over multiple xml files from a library which contains more then 100k files, I need to list files by their 3 last digits. Expected result is a list of files named from 'asset-PD471090' to 'asset-PD471110' or 'asset-GT888185' to 'asset-GT888209', and so on.
My Code -
'''
import glob
strtid = input('From ID: ') # First file in range
seps = strtid[-3:]
endid = input('To ID: ') # Last file in range
eeps = endid[-3:]
FileId = strtid[:5] # always same File Id for whole range
for name in glob.iglob('asset-' + FileId + [seps-eeps] + '.xml', recursive=True):
print(name) # iterate over every file in given range and print file names.
''' The error I'm getting is
TypeError: unsupported operand type(s) for -: 'str' and 'str'
How to load a specific range of input files ?
As the error tells you: you try to use -
on strings:
strtid = input('From ID: ') # string seps = strtid[-3:] # part of a string endid = input('To ID: ') # string eeps = endid[-3:] # part of a string FileId = strtid[:5] # also part of a string # [seps-eeps]: trying to substract a string from a string: for name in glob.iglob('asset-' + FileId + [seps-eeps] + '.xml', recursive=True):
You can convert the string to a integer using int("1234")
- won't help you much though, because then you only have one (wrong) number for your iglob.
If you wanted to give them as glob-pattern you would need to encase them in stringdelimiters - and glob does not work that way with numberranges:
"[123-678]"
would be one digit of 1,2,3,4,5,6,7,8 - not 123 up to 678However, you can test your files yourself:
import os
def get_files(directory, prefix, postfix, numbers):
lp = len(prefix) # your assets-GT
li = len(postfix) + 4 # your id + ".xml"
for root, dirs, files in os.walk(directory):
for file in sorted(files): # sorted to get files in order, might not need it
if int(file[lp:len(file)-li]) in numbers:
yield os.path.join(root,file)
d = "test"
prefix = "asset-GT" # input("Basename: ")
postfix = "185" # input("Id: ")
# create demo files to search into
os.makedirs(d)
for i in range(50,100):
with open (os.path.join(d,f"{prefix}{i:03}{postfix}.xml"),"w") as f:
f.write("")
# search params
fromto = "75 92" # input("From To (space seperated numbers): ")
fr, to = map(int,fromto.strip().split())
to += 1 # range upper limit is exclusive, so need to add 1 to include it
all_searched = list(get_files("./test", prefix, postfix, range(fr,to)))
print(*all_searched, sep="\n")
Output:
./test/asset-GT075185.xml
./test/asset-GT076185.xml
./test/asset-GT077185.xml
./test/asset-GT078185.xml
./test/asset-GT079185.xml
./test/asset-GT080185.xml
./test/asset-GT081185.xml
./test/asset-GT082185.xml
./test/asset-GT083185.xml
./test/asset-GT084185.xml
./test/asset-GT085185.xml
./test/asset-GT086185.xml
./test/asset-GT087185.xml
./test/asset-GT088185.xml
./test/asset-GT089185.xml
./test/asset-GT090185.xml
./test/asset-GT091185.xml
./test/asset-GT092185.xml