I'd like to make an QAbstractItemModel
that gets its data from a series of Xml files, all situated in the same directory. Since PyQt5 no longer supports QDomDocument
(or atleast i couldn't find a way to make it work), i've had to resort to a QXmlStreamReader
. I'm putting the data itself in a giant python dictionary (well... not exactly giant by computer science standards) that contains other dictionaries under various keys to create a tree-like structure.
this is my code so far:
class DataModel(QtCore.QAbstractItemModel):
def __init__(self, settingsDirectory, parent = None):
super(DataModel, self).__init__(parent)
settingsDirectory.setNameFilters(["*.xml"])
files = settingsDirectory.entryList()
print(files)
self.data = {}
for i in range(len(files)):
filePath = str(files[i])
file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath))
fileOpens = file.open(file.ReadOnly | file.Text)
if fileOpens:
parser = QtCore.QXmlStreamReader(file)
print("--------Beginning parsing----------")
print("Reading file: "+str(filePath))
while not parser.atEnd():
parser.readNext()
token = parser.tokenType()
print("Reading tag: " + str(parser.name()))
print("Tag type is: " + str(token))
if token == parser.StartDocument:
self.data["XML Version"] = str(parser.documentVersion())
self.data["XML Encoding"] = str(parser.documentEncoding())
if token == parser.StartElement:
tokenName = parser.name()
if parser.tokenType() == parser.Characters:
tokenText = parser.text()
print("This tag has a text value: " + str(tokenText))
print("current data: " + str(self.data))
if token == parser.EndElement:
if tokenText != None:
self.data[tokenName] = tokenText
else:
self.data[tokenName] = {}
tokenName = None
tokenText = None
else:
print(self.tr("xml file did not open properly"))
print(self.data)
While this code doesn't crash or anything, it does have a few issues that i have no idea why they're happening or how to fix:
1.the tokenName
never changes from None
for some reason - solved
2.the structure of the self.data
dictionary does not turn into a tree-like one, no idea why :|
example data:
<?xml version="1.0" encoding="UTF-8"?>
<tag>
<description>This is a text</description>
<types>
<typesAllowed></typesAllowed>
<typesEnabled></typesEnabled>
</types>
</tag>
yields the final result:
{'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'typesAllowed': '\n\t\t', None: '\n', 'typesEnabled': '\n\t\t', 'description': 'This is a text'}
instead of the wanted:
{'XML Encoding': 'UTF-8', 'XML Version': '1.0', 'tag': {'description': 'this is a text', typesAllowed': '\n\t\t', 'typesEnabled': '\n\t\t'}}
I know these issues are most likely a result of my poor understanding of how a StreamReader
works, so any and all tips would be welcome :)
edit 1:
the tokenName
change was a silly positioning error, silly me. the code reflects the fix.
edit 2:
added an example and example output
This question is now solved; I took a different approach to the problem.
I basically took a list into which i appended tuple
s (name, {})
if the StartElement
token had the attribute parseAs
== "element"
and put an evaluated string (parseText
function) into the last tuple
's dictionary. When it meets an EndElement
token, it finds the tuple
with name
== tokenName
, which is the name of the current token, puts it into the previous tuple
's dictionary as an entry with key name
.
There's a few more details as to how it works, but I'd probably just overly complicate my explanation if I included them (how it knows when to submit currData
to self.data
etc.)
class DataModel(QtCore.QAbstractItemModel):
def __init__(self, settingsDirectory, parent = None):
super(DataModel, self).__init__(parent)
settingsDirectory.setNameFilters(["*.xml"])
files = settingsDirectory.entryList()
print(files)
self.data = {}
self.parsingLog = {}
for i in range(len(files)):
filePath = str(files[i])
file = QtCore.QFile(settingsDirectory.absolutePath() + "/" + str(filePath))
fileOpens = file.open(file.ReadOnly | file.Text)
if fileOpens:
parser = QtCore.QXmlStreamReader(file)
currData = []
haveStartToken = False
print(self.tr("--------Beginning parsing--------"))
print(self.tr("Reading file: "+str(filePath)))
print(self.tr("---------------------------------"))
while not parser.atEnd():
if not parser.hasError():
parser.readNext()
token = parser.tokenType()
print(self.tr("--------------------"))
print(self.tr("Token type: " + str(self.printTokenType(token))))
if token == parser.StartElement:
tokenName = parser.name()
attributes = parser.attributes()
parseAs = attributes.value("parseAs")
print(self.tr("Reading StartElement: " + str(tokenName)))
print(self.tr("parseAs: " + str(parseAs)))
if parseAs == "text":
textValue = self.parseText(parser.readElementText())
print(self.tr("Text Value: " + str(textValue)))
if len(currData) != 0:
currData[len(currData)-1][1][tokenName] = textValue
else:
print(self.tr("*******Terminating application*******"))
print(self.tr("Reason: currData is empty"))
print(self.tr("*******Terminating application*******"))
sys.exit()
elif parseAs == "element":
currData.append((tokenName, {}))
else:
print(self.tr("******WARNING******"))
print(self.tr("parseAs attribute is not given correctly"))
print(self.tr("******WARNING******"))
print(self.tr("--------------------"))
elif token == parser.EndElement:
tokenName = parser.name()
print(self.tr("Reading EndElement: " + str(tokenName)))
print(self.tr("currData before: " + str(currData)))
if not haveStartToken:
startToken = currData[0][0]
haveStartToken = True
for i in currData:
if i[0] == tokenName:
print(self.tr("Closing token: " + str(tokenName)))
if i[0] != startToken:
currData[len(currData)-2][1][tokenName] = currData[len(currData)-1][1]
del currData[len(currData)-1]
print(self.tr("currData after: " + str(currData)))
print(self.tr("--------------------"))
elif i[0] == startToken:
print(self.tr("This is the final token, writing to self.data"), end = "")
self.data[startToken] = currData[0][1]
for i in range(5):
time.sleep(0.25)
print(self.tr("."), end = "")
print(self.tr("done."))
print(self.tr("--------------------"))
elif token == parser.Characters:
print(self.tr("Characters value: " + str(parser.text())))
print(self.tr("--------------------"))
elif token == parser.StartDocument:
self.parsingLog["File: "+str(filePath)] = {}
self.parsingLog["File: "+str(filePath)]["XML Version"] = str(parser.documentVersion())
self.parsingLog["File: "+str(filePath)]["XML Encoding"] = str(parser.documentEncoding())
print(self.tr("File Version: " + str(self.parsingLog["File: "+str(filePath)]["XML Version"])))
print(self.tr("File Encoding: " + str(self.parsingLog["File: "+str(filePath)]["XML Encoding"])))
elif token == parser.EndDocument:
print(self.tr("Cleaning up"), end = "")
for i in range(5):
time.sleep(0.25)
print(self.tr("."), end = "")
time.sleep(0.1)
print(self.tr("done."))
print(self.tr("self.data: " + str(self.data)))
print(self.tr("types of data: yesNo (should be str) - " +
str(type(self.data["building"]["specialSlot"]["yesNo"])) +
" - id - should be int - " + str(type(self.data["building"]["specialSlot"]["id"])) +
" - isItFloat - should be float - " + str(type(self.data["building"]["specialSlot"]["isItFloat"]))))
print(self.tr("--------------------"))
else:
print(self.tr("XML file is not well-formatted"))
else:
print(self.tr("xml file did not open properly"))
def parseText(self, text):
if isinstance(text, str):
if text == "":
return str(text)
for i in text:
if i not in ("0123456789."):
return str(text)
for j in text:
if j not in ("0123456789"):
return float(text)
return int(text)
else:
return ValueError
def printTokenType(self, token):
if token == QtCore.QXmlStreamReader.NoToken:
return "NoToken"
elif token == 1:
return "Invalid"
elif token == QtCore.QXmlStreamReader.StartDocument:
return "StartDocument"
elif token == QtCore.QXmlStreamReader.EndDocument:
return "EndDocument"
elif token == QtCore.QXmlStreamReader.StartElement:
return "StartElement"
elif token == QtCore.QXmlStreamReader.EndElement:
return "EndElement"
elif token == QtCore.QXmlStreamReader.Characters:
return "Characters"
elif token == QtCore.QXmlStreamReader.Comment:
return "Comment"
elif token == QtCore.QXmlStreamReader.DTD:
return "DTD"
elif token == QtCore.QXmlStreamReader.EntityReference:
return "EntityReference"
elif token == QtCore.QXmlStreamReader.ProcessingInstruction:
return "ProcessingInstruction"