I am trying to find format from input file. But sometimes I get no match if I use 'r' and sometimes unicode error.
def extract_files(filename):
file = open(filename, 'r')
text = file.read()
files_match = re.findall('<Compile Include="src\asf\preprocessor\string.h">', text)
if not files_match:
sys.stderr.write('no match')
sys.exit()
for f in files_match:
print(f)
Looks like you're trying to pull all the strings after <Compile Include="
and until ">
. We can do that, but be aware this will probably break on edge cases!
import re
def extract_files(filename):
with open(filename,'r') as file:
text = file.read
matches = re.findall(r'(?<=<Compile Include=")[-.A-Za-z\\]+(?=")', text)
# finds all pathnames that contain ONLY lowercase or uppercase letters,
# a dash (-) or a dot (.), separated ONLY by a backslash (\)
# terminates as soon as it finds a double-quote ("), NOT WHEN IT FINDS A
# SINGLE QUOTE (')
if not matches:
sys.stderr.write("no match")
sys.exit()
for match in matches:
print(match)