Search code examples
iosswifttext-classificationcoremlapple-vision

TextClassification/ Extraction from image How to get single text frame and string Using Core ML from a Image


Need to mark the rec boxes around string and then to get that string after tapping


Solution

  • import UIKit
    import Vision
    
    
    class ViewController: UIViewController, ImageGet {
    
    //MARK: OUTLETS
    
    @IBOutlet weak var selectButton: UIButton!
    
    //MARK: VARIABLES
    var objU = UtilityClass()
    var image:UIImage?
    var str:String?
    var uiButton : UIButton?
    var arrayString = [String]()
    var imageView : UIImageView = UIImageView()
    
    //MARK: DELEGATE FUNCTION
    
    func img(image: UIImage) {
        self.image = image
        imageView.image = image
        setUp()
    }
    
    override func viewDidLoad() {
        super.viewDidLoad()
        imageView.isUserInteractionEnabled = true
        // Do any additional setup after loading the view.
    }
    
    //MARK: SETUPUI
    func setUp() {
        let realImg =  resizeImage(image: (imageView.image!) , targetSize:CGSize(width: view.frame.width, height: view.frame.height) )
        self.image = realImg
        self.imageView .image = self.image
        imageView.isUserInteractionEnabled = true
        self.imageView.frame = CGRect(x: 0, y: 0, width: realImg.size.width, height: realImg.size.height)
        view.addSubview(imageView)
        guard let cgimg = realImg.cgImage else {return}
        let requestHandler = VNImageRequestHandler(cgImage: cgimg)
        let req = VNRecognizeTextRequest(completionHandler: recognizeTextHandler)
        req.recognitionLevel = .accurate
        do {
            try requestHandler.perform([req])
        } catch {
            print("Unable to perform the request: \(error)")
        }
    }
    
    //MARK: SELECT THE IMAGE
    @IBAction func selectButtontapped(_ sender: Any) {
        objU.delegate = self
        objU.obj = self
        objU.ImageGet()
    }
    
      func recognizeTextHandler(request : VNRequest , error:Error?) {
        guard let observation = request.results as? [VNRecognizedTextObservation], error == nil else {
            return
        }
        _ = observation.compactMap({
            $0.topCandidates(1).first?.string
        }).joined(separator: "/n")
        
        for subView in imageView.subviews {
            subView.removeFromSuperview()
        }
        
        let boundingRect :[CGRect]  = observation.compactMap{
            observation in
            guard let candidate = observation.topCandidates(1).first else {return .zero}
            //find the bounding box observation
            let stringRange = candidate.string.startIndex..<candidate.string.endIndex
            let boxObservation = try? candidate.boundingBox(for: stringRange)
            let boundingBox = boxObservation?.boundingBox ?? .zero
            str = candidate.string
            self.arrayString.append(str!)
            let rectInImg = VNImageRectForNormalizedRect(boundingBox, Int((imageView.frame.size.width)), Int((imageView.frame.size.height)))
            let convertedRect = self.getConvertedRect(boundingBox: observation.boundingBox, inImage:image!.size , containedIn: (imageView.bounds.size))
            drawBoundBox(rect: convertedRect)
            return rectInImg
        }
        print(arrayString)
        print(boundingRect)
        
    }
    func drawBoundBox(rect: CGRect) {
        uiButton = UIButton(type: .custom)
        uiButton?.frame = rect
        uiButton?.layer.borderColor = UIColor.systemPink.cgColor
        uiButton?.setTitle("", for: .normal)
        uiButton?.layer.borderWidth = 2
        uiButton?.tag = arrayString.count
        imageView.addSubview(uiButton ?? UIButton())
        uiButton?.addTarget(self, action: #selector(pressed(_:)), for: .touchUpInside)
    }
    
    @objc func pressed(_ sender : UIButton) {
        alert(key: arrayString[sender.tag - 1])
        
    }
    
    //MARK: CONVERT THE NORMALISED BOUNDING RECT
    
    func getConvertedRect(boundingBox: CGRect, inImage imageSize: CGSize, containedIn containerSize: CGSize) -> CGRect {
        
        let rectOfImage: CGRect
        
        let imageAspect = imageSize.width / imageSize.height
        let containerAspect = containerSize.width / containerSize.height
        
        if imageAspect > containerAspect { /// image extends left and right
            let newImageWidth = containerSize.height * imageAspect /// the width of the overflowing image
            let newX = -(newImageWidth - containerSize.width) / 2
            rectOfImage = CGRect(x: newX, y: 0, width: newImageWidth, height: containerSize.height)
            
        } else { /// image extends top and bottom
            let newImageHeight = containerSize.width * (1 / imageAspect) /// the width of the overflowing image
            let newY = -(newImageHeight - containerSize.height) / 2
            rectOfImage = CGRect(x: 0, y: newY, width: containerSize.width, height: newImageHeight)
        }
        
        let newOriginBoundingBox = CGRect(
            x: boundingBox.origin.x,
            y: 1 - boundingBox.origin.y - boundingBox.height,
            width: boundingBox.width,
            height: boundingBox.height
        )
        
        var convertedRect = VNImageRectForNormalizedRect(newOriginBoundingBox, Int(rectOfImage.width), Int(rectOfImage.height))
        
        /// add the margins
        convertedRect.origin.x += rectOfImage.origin.x
        convertedRect.origin.y += rectOfImage.origin.y
        
        return convertedRect
        
    }
    
    //MARK: RESIZE THE IMAGE ACCORD TO DEVICE
    func resizeImage(image: UIImage, targetSize: CGSize) -> UIImage {
        let size = image.size
        
        let widthRatio  = targetSize.width  / image.size.width
        let heightRatio = targetSize.height / image.size.height
        
        // Figure out what our orientation is, and use that to form the rectangle
        var newSize: CGSize
        if(widthRatio > heightRatio) {
            newSize = CGSize(width: size.width * heightRatio, height: size.height * heightRatio)
        } else {
            newSize = CGSize(width: size.width * widthRatio,  height: size.height * widthRatio)
        }
        
        // This is the rect that we've calculated out and this is what is actually used below
        let rect = CGRect(x: 0, y: 0, width: newSize.width, height: newSize.height)
        
        // Actually do the resizing to the rect using the ImageContext stuff
        UIGraphicsBeginImageContextWithOptions(newSize, false, 1.0)
        image.draw(in: rect)
        let newImage = UIGraphicsGetImageFromCurrentImageContext()
        UIGraphicsEndImageContext()
        
        return newImage!
    }
    
    //MARK: POPPING ALERT WITH STRING
    func alert(key:String){
        let alertController = UIAlertController(title: "String", message: key, preferredStyle: .alert)
        let OKAction = UIAlertAction(title: "OK", style: .default) {
            (action: UIAlertAction!) in
            // Code in this block will trigger when OK button tapped.
           
        }
        let copyAction = UIAlertAction(title: "Copy", style: .default) {
            (action: UIAlertAction!) in
            UIPasteboard.general.string = key
           
        }
        alertController.addAction(copyAction)
        alertController.addAction(OKAction)
        self.present(alertController, animated: true, completion: nil)
    }
    

    }