Search code examples
swiftswiftuiios-vision

SwiftUI - How can I recognize words and get positions in Vision


The code below can just recognize for words, but I don't know how to get the positions. I can get the texts but not boundingboxes.

From: https://medium.com/@jakir/text-recognition-or-ocr-using-vision-framework-ios-swiftui-b9c5df36ec32

import SwiftUI
import Vision

struct ContentView: View {
    
    @State var recognizedText = ""
    
    var body: some View {
        VStack {
            Text("OCR using Vission")
                .font(.title)
            
            Image("quote")
                .resizable()
                .scaledToFit()
            
            Button("Recognize Text"){
                ocr()
            }
            
            TextEditor(text: $recognizedText)
        }
        .padding()
        
    }
    
    func ocr() {
        let image = UIImage(named: "quote")
        
        if let cgImage = image?.cgImage {
            
            // Request handler
            let handler = VNImageRequestHandler(cgImage: cgImage)
            
            let recognizeRequest = VNRecognizeTextRequest { (request, error) in
                
                // Parse the results as text
                guard let result = request.results as? [VNRecognizedTextObservation] else {
                    return
                }
                
                // Extract the data
                let stringArray = result.compactMap { result in
                    result.topCandidates(1).first?.string
                }
                
                // Update the UI
                DispatchQueue.main.async {
                    recognizedText = stringArray.joined(separator: "\n")
                }
            }
            
            // Process the request
            recognizeRequest.recognitionLevel = .accurate
            do {
                try handler.perform([recognizeRequest])
            } catch {
                print(error)
            }
            
        }
    }
}

I visited many websites but found no results


Solution

  • This answer tells how to recognize for sentences boundingboxes but not words Extracting Word-Level BoundingBoxes with VNRecognizeTextRequest's .accurate in Vision Framework - SwiftUI

    func recognizeText(image: UIImage, completion: @escaping([String], [CGRect]) -> Void) {
        var texts: [String] = []
        var positions: [CGRect] = []
        
        guard let cgImage = image.cgImage else { return }
        let request = VNRecognizeTextRequest { (request, error) in
          guard let observations = request.results as? [VNRecognizedTextObservation], error == nil else {
            print("Text recognition error: \(error?.localizedDescription ?? "Unknown error")")
            return
          }
          for observation in observations {
            guard let topCandidate = observation.topCandidates(1).first else { continue }
            texts.append(topCandidate.string)
            positions.append(observation.boundingBox)
          }
          DispatchQueue.main.async {
            print(texts)
            print(positions)
            completion(texts, positions)
          }
        }
        request.recognitionLevel = .accurate
        
        let handler = VNImageRequestHandler(cgImage: cgImage)
        try? handler.perform([request])
      }