The NLTagger.enumerateTags
method returns "otherWord" for every word. This problem appear in my simulator. Most of my code is copied directly from the docs.
Here's an example app:
// ContentView.swift
import SwiftUI
import NaturalLanguage
struct ContentView: View {
var body: some View {
VStack {
Text(example())
}
.padding()
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}
func example() -> String{
let text = "The ripe taste of cheese improves with age."
var res = ""
let tagger = NLTagger(tagSchemes: [.lexicalClass])
tagger.string = text
let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace]
tagger.enumerateTags(in: text.startIndex..<text.endIndex, unit: .word, scheme: .lexicalClass, options: options) { tag, tokenRange in
if let tag = tag {
res += "\(text[tokenRange]): \(tag.rawValue)\n"
}
return true
}
return res
}
I'm on Mac OS 13.3.1 (a) and Xcode Version 14.3.1
I cannot reproduce your problem, as your code works for me (on US-configured device). So I suspect it is something with respect to the languages installed on the device.
I would first make sure your device has the necessary assets. Check with availableTagSchemes(for:language:)
, and if you don’t have .lexicalClass
for .english
, then call asynchronous method requestAssets(for:tagScheme:completionHandler:)
, or its async
brethren.
For example:
import SwiftUI
import NaturalLanguage
struct ContentView: View {
@State var result: String = "Processing"
var body: some View {
Text(result)
.padding()
.task {
await example()
}
}
func example() async {
let unit: NLTokenUnit = .word
let scheme: NLTagScheme = .lexicalClass
let language: NLLanguage = .english
let options: NLTagger.Options = [.omitPunctuation, .omitWhitespace]
let text = "The ripe taste of cheese improves with age."
let range = text.startIndex..<text.endIndex
if !NLTagger.availableTagSchemes(for: unit, language: language).contains(scheme) {
do {
result = "Requesting assets"
try await NLTagger.requestAssets(for: language, tagScheme: scheme)
} catch {
print(error)
result = "error: " + error.localizedDescription
return
}
}
let tagger = NLTagger(tagSchemes: [scheme])
tagger.string = text
// tagger.setLanguage(language, range: range)
result = tagger
.tags(in: range, unit: .word, scheme: scheme, options: options)
.map { tag, tokenRange in "\(text[tokenRange]): \(tag?.rawValue ?? "Unknown")" }
.joined(separator: "\n")
}
}
If that still doesn’t work, I’d suggest temporarily changing the scheme
to .language
and see what language it is attempting. You will get the OtherWord
if the language doesn’t match the language presented in the text (though in my experience, it does a good job figuring this out on its own). But if you want, you can explicitly set the language (as shown in that line of code that has been commented out).