So i'm working a an app that can patch words that are broken.
Lets take:
mny people say there is a error in this sentence
With swift here we can us UITextChecker
and get a wonderful result of what the word mny
could actually be... However, i actually get a couple of choices, one of which is many
and among the other you have money
so obviously money wouldn't fit in very well in this sentence. Are there any way to check if the sentence itself is logical?
Consider that this still needs to be improved. I updated this swift 3 solution to Swift 5. Worth to mention that it was originally inspired by this python tutorial
Create a new iOS project, add there a text file named bigtext.txt
which will contain this text. This will be our "learning" dictionary.
Then in ViewController
:
import UIKit
import NaturalLanguage
class ViewController: UIViewController {
override func viewDidLoad() {
super.viewDidLoad()
let inputString = "mny people say there is a error in this sentence"
var newString = inputString
// Read a text file and "study" the model
guard let path = Bundle.main.path(forResource: "bigtext", ofType: "txt") else {
print("Path not available")
return
}
let checker = SpellChecker(contentsOfFile: path)
// better to use this to iterate between words in a sentence
let tokenizer = NLTokenizer(unit: .word)
tokenizer.string = inputString
tokenizer.enumerateTokens(in: inputString.startIndex..<inputString.endIndex) { tokenRange, _ in
let word = String(inputString[tokenRange])
let checked = checker?.correct(word: word)
let candidates = checker?.candidates(word: word)
if word == checked {
print("\(word) unchanged")
} else {
if let checked = checked {
newString.replaceSubrange(tokenRange, with: checked)
}
print("Correct:\t\(word) -> \(String(describing: checked))")
print("Candidates:\t\(word) -> \(String(describing: candidates))")
}
return true
}
print("Result: \(newString)")
}
}
func edits(word: String) -> Set<String> {
if word.isEmpty { return [] }
let splits = word.indices.map {
(word[word.startIndex..<$0], word[$0..<word.endIndex])
}
let deletes = splits.map { $0.0 + String($0.1.dropFirst()) }
let transposes: [String] = splits.map { left, right in
if let fst = right.first {
let drop1 = String(right.dropFirst())
if let snd = drop1.first {
let drop2 = String(drop1.dropFirst())
return "\(left)\(snd)\(fst)\(drop2)"
}
}
return ""
}.filter { !$0.isEmpty }
let alphabet = "abcdefghijklmnopqrstuvwxyz"
let replaces = splits.flatMap { left, right in
alphabet.map { "\(left)\($0)\(String(right.dropFirst()))" }
}
let inserts = splits.flatMap { left, right in
alphabet.map { "\(left)\($0)\(right)" }
}
let setString = [String(deletes.first!)] + transposes + replaces + inserts
return Set(setString)
}
struct SpellChecker {
var knownWords: [String:Int] = [:]
mutating func train(word: String) {
if let idx = knownWords[word] {
knownWords[word] = idx + 1
}
else {
knownWords[word] = 1
}
}
init?(contentsOfFile file: String) {
do {
let text = try String(contentsOfFile: file, encoding: .utf8).lowercased()
let words = text.unicodeScalars.split(whereSeparator: { !("a"..."z").contains($0) }).map { String($0) }
for word in words { self.train(word: word) }
}
catch {
return nil
}
}
func knownEdits2(word: String) -> Set<String>? {
var known_edits: Set<String> = []
for edit in edits(word: word) {
if let k = known(words: edits(word: edit)) {
known_edits.formUnion(k)
}
}
return known_edits.isEmpty ? nil : known_edits
}
func known<S: Sequence>(words: S) -> Set<String>? where S.Iterator.Element == String {
let s = Set(words.filter { self.knownWords.index(forKey: $0) != nil })
return s.isEmpty ? nil : s
}
func candidates(word: String) -> Set<String> {
guard let result = known(words: [word]) ?? known(words: edits(word: word)) ?? knownEdits2(word: word) else {
return Set<String>()
}
return result
}
func correct(word: String) -> String {
return candidates(word: word).reduce(word) {
(knownWords[$0] ?? 1) < (knownWords[$1] ?? 1) ? $1 : $0
}
}
}
Will output you:
Correct: mny -> Optional("may")
Candidates: mny -> Optional(Set(["any", "ny", "may", "many"]))
people unchanged
say unchanged
there unchanged
is unchanged
a unchanged
error unchanged
in unchanged
this unchanged
sentence unchanged
Result: may people say there is a error in this sentence
Please, consider that we took first correction candidate. Need first to clarify ourselves the word order and understand the sentence context.