Search code examples
javascriptnode.jsazureazure-cognitive-services

Azure Computer Vision : Recognize Printed Text


I'm using Azure computer vision with nodejs, and I would to extract text on the images, it works as expected but I'm facing some challenges : the code :

'use strict';



const request = require('request');




const subscriptionKey = 'key';

const endpoint = 'endpoint'



var uriBase = endpoint + 'vision/v3.1/ocr';



const imageUrl = 'https://livesimply.me/wp-content/uploads/2015/09/foods-to-avoid-real-food-3036-2-1024x683.jpg';



// Request parameters.

const params = {

'language': 'unk',

'detectOrientation': 'true',

};



const options = {

uri: uriBase,

qs: params,

body: '{"url": ' + '"' + imageUrl + '"}',

headers: {

    'Content-Type': 'application/json',

    'Ocp-Apim-Subscription-Key' : subscriptionKey

}

};



request.post(options, (error, response, body) => {

if (error) {

console.log('Error: ', error);

return;

}

let jsonResponse = JSON.stringify(JSON.parse(body), null, '  ');

console.log('JSON Response\n');

console.log(jsonResponse);

});

the output :

"regions": [

{

  "boundingBox": "0,191,277,281",

  "lines": [

    {

      "boundingBox": "53,191,23,49",

      "words": [

        {

          "boundingBox": "53,191,23,49",

          "text": "in"

        }

      ]

    },

    {

      "boundingBox": "0,285,277,82",

      "words": [

        {

          "boundingBox": "0,285,150,82",

          "text": ")arb.0g"

        },

        {

          "boundingBox": "214,288,63,63",

          "text": "0%"

        }

      ]

    },

    {

      "boundingBox": "14,393,45,79",

      "words": [

        {

          "boundingBox": "14,393,45,79",

          "text": "Og"

        }

      ]

    },

    {

      "boundingBox": "213,394,63,63",

      "words": [

        {

          "boundingBox": "213,394,63,63",

          "text": "00/0"

        }

      ]

    }

  ]

},

{

  "boundingBox": "322,184,352,457",

  "lines": [

    {

      "boundingBox": "326,184,348,54",

      "words": [

        {

          "boundingBox": "326,184,239,52",

          "text": "INGREDIENTS:"

        },

        {

          "boundingBox": "588,188,86,50",

          "text": "WHITE"

        }

      ]

    },

    {

      "boundingBox": "325,248,281,59",

      "words": [

        {

          "boundingBox": "325,248,83,56",

          "text": "TUNA,"

        },

        {

          "boundingBox": "417,250,127,51",

          "text": "SOYBEAN"

        },

        {

          "boundingBox": "555,252,51,55",

          "text": "OIL,"

        }

      ]

    },

    {

      "boundingBox": "324,313,341,60",

      "words": [

        {

          "boundingBox": "324,313,155,52",

          "text": "VEGETABLE"

        },

        {

          "boundingBox": "489,316,101,56",

          "text": "BROTH,"

        },

        {

          "boundingBox": "598,317,67,56",

          "text": "SALT,"

        }

      ]

    },

    {

      "boundingBox": "324,378,334,53",

      "words": [

        {

          "boundingBox": "324,378,235,52",

          "text": "PYROPHOSPHATE"

        },

        {

          "boundingBox": "566,381,92,50",

          "text": "ADDED"

        }

      ]

    },

    {

      "boundingBox": "323,519,248,52",

      "words": [

        {

          "boundingBox": "323,519,193,51",

          "text": "DISTRIBUTED"

        },

        {

          "boundingBox": "528,521,43,50",

          "text": "BY:"

        }

      ]

    },

    {

      "boundingBox": "322,584,298,57",

      "words": [

        {

          "boundingBox": "322,584,124,50",

          "text": "BUMBLE"

        },

        {

          "boundingBox": "457,585,52,50",

          "text": "BEE"

        },

        {

          "boundingBox": "519,585,101,56",

          "text": "FOODS,"

        }

      ]

    }

  ]

},

{

  "boundingBox": "791,400,198,117",

  "lines": [

    {

      "boundingBox": "921,400,68,45",

      "words": [

        {

          "boundingBox": "921,400,68,45",

          "text": ",11."

        }

      ]

    },

    {

      "boundingBox": "791,464,128,53",

      "words": [

        {

          "boundingBox": "791,464,75,53",

          "text": "PRC:"

        },

        {

          "boundingBox": "874,467,45,48",

          "text": "x"

        }

      ]

    }

  ]

}

  ]

  }

but I'm facing some challenges with this code :

  1. I want the output as a string and not JSON tree.
  2. I would like to extract just the ingredients and not the all text.
  3. in some cases the images may have ingredients without specifying the ingredient key-word, how can I extract the ingredients in this case ?

image : enter image description here

Thanks for you help experts .


Solution

  • We extract printed text with optical character recognition (OCR) from an image using the Computer Vision REST API. And a successful response is returned in JSON. You can't get a direct string output form this Azure Cognitive Service.

    For the problem -

    I want the output as a string and not JSON tree.

    We can't directly print the ingredients like a string as seen in the image. To extract the content and display it in particular format, after you get the JSON string, parse that into a JSON object and run a loop to extract data from it. After that use the split function to get the data stored into arrays. As shown in the below snippet.

    function(error, response, body){
        if(error) {
            console.log(error);
        } else {
            //parsing the JSON string
            var jsonObj = JSON.parse(body);
    
            var ob = jsonObj;
            //running loop to extract the text values
                for(i=0;i<....){
                    for(j=0;j<....){
                        for(k=0;k<....){
                             var str = str + " "+ob.....text;
                        }
                        str = str + "\n";
                    }
                }
              var arr = str.split("\n");
    

    Put your logic based on the JSON structure you are getting.

    For your second and third problem -

    I would like to extract just the ingredients and not the all text.

    In some cases the images may have ingredients without specifying the ingredient key-word, how can I extract the ingredients in this case ?

    Computer vision will ingest all the printed text from the image and give them to you as JSON, you can't extract particular texts. You can achieve the required result by using the same above mentioned approach and only extract the ingredient.

    I would suggest to read this Extract printed text (OCR) using the Computer Vision REST API and Node.js GitHub document for more information.