I have the following code. It simply run Vision API to get the text from the image. I use very simple GCD to dispatch the heavy Vision operation to the background queue, and then dispatch it back to main queue for completion
:
public struct TextRecognitionResult {
let observation: VNRecognizedTextObservation
let text: VNRecognizedText
let rect: CGRect
}
public enum TextRecognitionUtil {
private static let queue = DispatchQueue(label: "text_recognition", qos: .userInitiated)
public static func process(
image: UIImage,
recognitionLevel: VNRequestTextRecognitionLevel,
completion: @Sendable @escaping ([TextRecognitionResult]) -> Void)
{
guard let cgImage = image.cgImage else {
completion([])
return
}
let request = VNRecognizeTextRequest { (request, error) in
guard
error == nil,
let observations = request.results as? [VNRecognizedTextObservation]
else {
DispatchQueue.main.async {
completion([])
}
return
}
var results = [TextRecognitionResult]()
// Vision's origin is on bottom left
let transform = CGAffineTransform.identity
.scaledBy(x: 1, y: -1)
.translatedBy(x: 0, y: -image.size.height)
.scaledBy(x: image.size.width, y: image.size.height)
for observation in observations {
guard let text = observation.topCandidates(1).first else { continue }
let rect = observation.boundingBox.applying(transform)
results += [TextRecognitionResult(observation: observation, text: text, rect: rect)]
}
DispatchQueue.main.async {
completion(results)
}
}
request.recognitionLevel = recognitionLevel
self.queue.async {
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
try? handler.perform([request])
}
}
}
This code violates Swift 6 concurrency, because TextRecognitionResult
is not Sendable
:
Sending 'results' risks causing data races; this is an error in the Swift 6 language mode
However, my TextRecognitionResult
can't be directly marked as Sendable, because VNRecognizedTextObservation
and VNRecognizedText
is not Sendable, and they are both types defined in Vision that I cannot change. This is a pretty common practice in GCD. I don't know what to do here.
A few observations:
I would declare your struct
as Sendable
:
public struct TextRecognitionResult: Sendable {
let observation: VNRecognizedTextObservation
let text: VNRecognizedText
let rect: CGRect
}
I would import Vision
as @preconcurrency
:
@preconcurrency import Vision
I would make results
immutable (using compactMap
rather than a for
loop):
public enum TextRecognitionUtil {
enum TextRecognitionUtilError: Error {
case noCgImage
case notVNRecognizedTextObservation
}
private static let queue = DispatchQueue(label: "text_recognition", qos: .userInitiated)
public static func process(
image: UIImage,
recognitionLevel: VNRequestTextRecognitionLevel = .accurate,
completion: @Sendable @escaping (Result<[TextRecognitionResult], Error>) -> Void)
{
guard let cgImage = image.cgImage else {
completion(.failure(TextRecognitionUtilError.noCgImage))
return
}
let request = VNRecognizeTextRequest { request, error in
guard
error == nil,
let observations = request.results as? [VNRecognizedTextObservation]
else {
DispatchQueue.main.async {
completion(.failure(error ?? TextRecognitionUtilError.notVNRecognizedTextObservation))
}
return
}
// Vision's origin is on bottom left
let transform = CGAffineTransform.identity
.scaledBy(x: 1, y: -1)
.translatedBy(x: 0, y: -image.size.height)
.scaledBy(x: image.size.width, y: image.size.height)
let results = observations.compactMap { (observation) -> TextRecognitionResult? in
guard let text = observation.topCandidates(1).first else { return nil }
let rect = observation.boundingBox.applying(transform)
return TextRecognitionResult(observation: observation, text: text, rect: rect)
}
DispatchQueue.main.async {
completion(.success(results))
}
}
request.recognitionLevel = recognitionLevel
self.queue.async {
let handler = VNImageRequestHandler(cgImage: cgImage)
}
}
}
I also changed the closure return type to a Result
so the caller could disambiguate between an error and no recognition results.
Swift 6 will probably simplify this a bit, but the above works in Xcode 15.4.