So my app is currently reading 10k lines into a variable and then using SwiftyJson to parse it into realm.
Source: https://github.com/skishore/makemeahanzi/blob/master/graphics.txt https://github.com/skishore/makemeahanzi/blob/master/dictionary.txt
Problem: It takes way too long: 2:28minutes. Also it takes 400mb of memory!
Question: How to make this faster? Any of you having experience with Flatbuffers or Protobuf?
Help would be very welcome!
Cheers, Dom
This is the code:
func parseToRealm() {
// each of these files have 9500+ lines of data
// (basically dictionaries with word definitions)
let graphicsFileContents = readFile_Graphics()
let dictFileContents = readFile_Dict()
// check if counts of two source files match
if (graphicsFileContents.count == dictFileContents.count && graphicsFileContents.count > 1 && dictFileContents.count > 1) {
var i = 0
// make empty array of characters
var characterArr = [Characters()]
// loop through two files to get all chars
for jsonString in graphicsFileContents {
// parse data from string into json
let dataFromString = jsonString.data(using: .utf8)
let singleCharJson = try? JSON(data: dataFromString!)
// parse stuff from file1
// ... deleted lines for legal reasons
// DICT information
let dictDataFromString = dictFileContents[i].data(using: .utf8)
let singleDictJson = try? JSON(data: dictDataFromString!)
// parse stuff from that dictionary
// ... deleted lines for legal reasons
characterArr.append(Character)
// Every x characters, write them into DB
if (i % 150 == 0 || i == graphicsFileContents.count){
realmActions.writeCharsToRealm(characterArr: characterArr)
print("Writing \(i)-\(i + 150)")
// reset array to safe memory
characterArr = [Characters()]
}
i+=1
} // end loop file contents
}else{
print ("two files have different counts of lines. aborting...")
}
}
// read graphics file and return all contents as array of strings
// return Array of Strings
func readFile_Graphics () -> [String] {
// predeclare emtpy string array
var myStrings = [String]()
if let path = Bundle.main.path(forResource: "graphics", ofType: "txt") {
do {
let data = try String(contentsOfFile: path, encoding: .utf8)
myStrings = data.components(separatedBy: .newlines)
} catch {
print("cannot get file graphics.txt. Error message:")
print(error)
}
}
return myStrings
}
// read dictionary file and return all contents as array of strings
func readFile_Dict () -> [String]{
var myStrings = [""]
if let path = Bundle.main.path(forResource: "dictionary", ofType: "txt") {
do {
let data = try String(contentsOfFile: path, encoding: .utf8)
myStrings = data.components(separatedBy: .newlines)
} catch {
print("cannot get file dictionary.txt. Error message:")
print(error)
}
}
return myStrings
}
DispatchQueue.global(qos: .background).async {
guard let path = Bundle.main.path(forResource: "graphics", ofType: "txt") else {
print("Dang! File wasn't found!")
return
}
let cal = Calendar.current
let d1 = Date()
guard let streamReader = StreamReader(path: path) else {
print("Dang! StreamReader couldn't be created!")
return
}
var counter = 0
while !streamReader.atEof {
guard let nextLine = streamReader.nextLine() else {
print("Oops! Reached the end before printing!")
break
}
let json = JSON(parseJSON: nextLine)
counter += 1
print("\(counter): \(nextLine)")
}
let d2 = Date()
let components = cal.dateComponents([.minute], from: d2, to: d1)
print("Diff: \(components.minute!)")
}
}
import Foundation
class StreamReader {
let encoding : String.Encoding
let chunkSize : Int
var fileHandle : FileHandle!
let delimData : Data
var buffer : Data
var atEof : Bool
init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
chunkSize: Int = 4096) {
guard let fileHandle = FileHandle(forReadingAtPath: path),
let delimData = delimiter.data(using: encoding) else {
return nil
}
self.encoding = encoding
self.chunkSize = chunkSize
self.fileHandle = fileHandle
self.delimData = delimData
self.buffer = Data(capacity: chunkSize)
self.atEof = false
}
deinit {
self.close()
}
/// Return next line, or nil on EOF.
func nextLine() -> String? {
precondition(fileHandle != nil, "Attempt to read from closed file")
// Read data chunks from file until a line delimiter is found:
while !atEof {
if let range = buffer.range(of: delimData) {
// Convert complete line (excluding the delimiter) to a string:
let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
// Remove line (and the delimiter) from the buffer:
buffer.removeSubrange(0..<range.upperBound)
return line
}
let tmpData = fileHandle.readData(ofLength: chunkSize)
if tmpData.count > 0 {
buffer.append(tmpData)
} else {
// EOF or read error.
atEof = true
if buffer.count > 0 {
// Buffer contains last line in file (not terminated by delimiter).
let line = String(data: buffer as Data, encoding: encoding)
buffer.count = 0
return line
}
}
}
return nil
}
/// Start reading from the beginning of file.
func rewind() -> Void {
fileHandle.seek(toFileOffset: 0)
buffer.count = 0
atEof = false
}
/// Close the underlying file. No reading must be done after calling this method.
func close() -> Void {
fileHandle?.closeFile()
fileHandle = nil
}
}
extension StreamReader : Sequence {
func makeIterator() -> AnyIterator<String> {
return AnyIterator {
return self.nextLine()
}
}
}
The StreamReader class reads the text file line by line so no need to read the whole file at once. the first block reads the content of the file. Try the above code. this should solve your problem. please note that I've used background thread whereas the realm doesn't work on background thread (AFAIK). let me know if that helps.