The code below can just recognize for words, but I don't know how to get the positions. I can get the texts but not boundingboxes.
From: https://medium.com/@jakir/text-recognition-or-ocr-using-vision-framework-ios-swiftui-b9c5df36ec32
import SwiftUI
import Vision
struct ContentView: View {
@State var recognizedText = ""
var body: some View {
VStack {
Text("OCR using Vission")
.font(.title)
Image("quote")
.resizable()
.scaledToFit()
Button("Recognize Text"){
ocr()
}
TextEditor(text: $recognizedText)
}
.padding()
}
func ocr() {
let image = UIImage(named: "quote")
if let cgImage = image?.cgImage {
// Request handler
let handler = VNImageRequestHandler(cgImage: cgImage)
let recognizeRequest = VNRecognizeTextRequest { (request, error) in
// Parse the results as text
guard let result = request.results as? [VNRecognizedTextObservation] else {
return
}
// Extract the data
let stringArray = result.compactMap { result in
result.topCandidates(1).first?.string
}
// Update the UI
DispatchQueue.main.async {
recognizedText = stringArray.joined(separator: "\n")
}
}
// Process the request
recognizeRequest.recognitionLevel = .accurate
do {
try handler.perform([recognizeRequest])
} catch {
print(error)
}
}
}
}
I visited many websites but found no results
This answer tells how to recognize for sentences boundingboxes but not words Extracting Word-Level BoundingBoxes with VNRecognizeTextRequest's .accurate in Vision Framework - SwiftUI
func recognizeText(image: UIImage, completion: @escaping([String], [CGRect]) -> Void) {
var texts: [String] = []
var positions: [CGRect] = []
guard let cgImage = image.cgImage else { return }
let request = VNRecognizeTextRequest { (request, error) in
guard let observations = request.results as? [VNRecognizedTextObservation], error == nil else {
print("Text recognition error: \(error?.localizedDescription ?? "Unknown error")")
return
}
for observation in observations {
guard let topCandidate = observation.topCandidates(1).first else { continue }
texts.append(topCandidate.string)
positions.append(observation.boundingBox)
}
DispatchQueue.main.async {
print(texts)
print(positions)
completion(texts, positions)
}
}
request.recognitionLevel = .accurate
let handler = VNImageRequestHandler(cgImage: cgImage)
try? handler.perform([request])
}