This working CodePen demo shows a number which is represented as a vector.
May I ask how to convert the number vector (SVG Format) as shown in demo to a real number?
From my search on the web, Tesseract do not read vector graphics (SVG), can anyone show some demo or samples of how to read number vector?
For Example, this vector graphics code below should be read as a number 9 in the console.log
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
viewBox="0 0 1066.6667 800"
height="800"
width="1066.6667"
xml:space="preserve"
id="svg2"
version="1.1"><metadata
id="metadata8"><rdf:RDF><cc:Work
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
id="defs6" /><g
transform="matrix(1.3333333,0,0,-1.3333333,0,800)"
id="g10"><path
id="path20"
style="fill:none;stroke:#000000;stroke-width:0.074;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10;stroke-dasharray:none;stroke-opacity:1"
d="m 294.09,426.05 -0.46,-1.38 -0.92,-0.92 -1.38,-0.46 h -0.46 l -1.37,0.46 -0.92,0.92 -0.46,1.38 v 0.46 l 0.46,1.37 0.92,0.92 1.37,0.46 h 0.46 l 1.38,-0.46 0.92,-0.92 0.46,-1.83 v -2.3 l -0.46,-2.29 -0.92,-1.38 -1.38,-0.46 h -0.91 l -1.38,0.46 -0.46,0.92" /></g></svg>
As already commented, you need to convert the svg to a raster image for OCR.
Fortunately we have a lot of options to create a bitmap temporarily using <canvas>
.
Apart from the fact, tesseract's OCR might struggle with different shapes, low contrasts – you might not get the desired result, if your svg contains many shapes.
svg2PngAndOCR("svg");
function svg2PngAndOCR(selector) {
const svgEl = document.querySelector(selector);
let viewBox = svgEl.viewBox.baseVal;
let svgBB = svgEl.getBBox();
let svgW = viewBox.width ? viewBox.width : svgBB.width;
let svgH = viewBox.height ? viewBox.height : svgBB.height;
/**
* convert svg to png via canvas
*/
let blob = new Blob([svgEl.outerHTML], { type: "image/svg+xml" });
let URL = window.URL;
let blobURL = URL.createObjectURL(blob);
let tmpImg = new Image();
tmpImg.src = blobURL;
tmpImg.width = svgW;
tmpImg.height = svgH;
tmpImg.onload = () => {
let canvas = document.createElement("canvas");
canvas.width = svgW;
canvas.height = svgH;
let context = canvas.getContext("2d");
context.drawImage(tmpImg, 0, 0, svgW, svgH);
let pngDataUrl = canvas.toDataURL();
let svgImg = document.createElement("img");
svgImg.width = svgW;
svgImg.height = svgH;
svgImg.class = "svgImg";
svgImg.src = pngDataUrl;
// just additional wrapping for example usage
let imgWrp = document.querySelector('.img-wrp');
if(!imgWrp){
imgWrp = document.createElement("div");
imgWrp.setAttribute("class", "img-wrp img-wrp-vanilla");
imgWrp.appendChild(svgImg);
document.body.appendChild(imgWrp);
}
/**
* OCR: recognize text
* via tesseract
*/
Tesseract.recognize(svgImg, "eng", {
//logger: (m) => console.log(m)
}).then(({ data: { text } }) => {
ocrOutput.textContent = text;
});
};
}
svg,
.img-wrp
{
border:1px solid #ccc;
width:40%;
height:auto;
display:inline-block;
}
img{
max-width:100%;
height:auto;
}
.hidden{
display:none
}
<script src='https://unpkg.com/tesseract.js@4.0.2/dist/tesseract.min.js'></script>
<p>OCR Output: <span id="ocrOutput"></span></p>
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1066.6667 800" height="800" width="1066.6667" xml:space="preserve" id="svg2" version="1.1">
<metadata id="metadata8">
<rdf:RDF>
<cc:Work rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
</cc:Work>
</rdf:RDF>
</metadata>
<defs id="defs6" />
<g transform="matrix(1.3333333,0,0,-1.3333333,0,800)" id="g10">
<path id="path20" style="fill:none;stroke:#000000;stroke-width:0.074;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10;stroke-dasharray:none;stroke-opacity:1" d="m 294.09,426.05 -0.46,-1.38 -0.92,-0.92 -1.38,-0.46 h -0.46 l -1.37,0.46 -0.92,0.92 -0.46,1.38 v 0.46 l 0.46,1.37 0.92,0.92 1.37,0.46 h 0.46 l 1.38,-0.46 0.92,-0.92 0.46,-1.83 v -2.3 l -0.46,-2.29 -0.92,-1.38 -1.38,-0.46 h -0.91 l -1.38,0.46 -0.46,0.92" />
</g>
</svg>
svg2PngAndOCR("svg");
function svg2PngAndOCR(selector) {
const svgEl = document.querySelector(selector);
let viewBox = svgEl.viewBox.baseVal;
let svgBB = svgEl.getBBox();
let svgW = viewBox.width ? viewBox.width : svgBB.width;
let svgH = viewBox.height ? viewBox.height : svgBB.height;
/**
* convert svg to png via canvas
*/
let blob = new Blob([svgEl.outerHTML], { type: "image/svg+xml" });
let URL = window.URL;
let blobURL = URL.createObjectURL(blob);
let tmpImg = new Image();
tmpImg.src = blobURL;
tmpImg.width = svgW;
tmpImg.height = svgH;
tmpImg.onload = () => {
let canvas = document.createElement("canvas");
canvas.width = svgW;
canvas.height = svgH;
let context = canvas.getContext("2d");
context.drawImage(tmpImg, 0, 0, svgW, svgH);
let pngDataUrl = canvas.toDataURL();
let svgImg = document.createElement("img");
svgImg.width = svgW;
svgImg.height = svgH;
svgImg.class = "svgImg";
svgImg.src = pngDataUrl;
// just additional wrapping for example usage
let imgWrp = document.querySelector('.img-wrp');
if(!imgWrp){
imgWrp = document.createElement("div");
imgWrp.setAttribute("class", "img-wrp img-wrp-vanilla");
imgWrp.appendChild(svgImg);
document.body.appendChild(imgWrp);
}
/**
* OCR: recognize text
* via tesseract
*/
Tesseract.recognize(svgImg, "eng", {
//logger: (m) => console.log(m)
}).then(({ data: { text } }) => {
ocrOutput.textContent = text;
});
};
}
svg,
.img-wrp
{
border:1px solid #ccc;
width:40%;
height:auto;
display:inline-block;
}
img{
max-width:100%;
height:auto;
}
.hidden{
display:none
}
<script src='https://unpkg.com/tesseract.js@4.0.2/dist/tesseract.min.js'></script>
<p>OCR Output: <span id="ocrOutput"></span></p>
<svg xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 1066.6667 800" height="800" width="1066.6667" xml:space="preserve" id="svg2" version="1.1">
<metadata id="metadata8">
<rdf:RDF>
<cc:Work rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
</cc:Work>
</rdf:RDF>
</metadata>
<defs id="defs6" />
<g transform="matrix(1.3333333,0,0,-1.3333333,0,800)" id="g10">
<path id="path20" style="fill:none;stroke:#000000;stroke-width:0.5;stroke-linecap:round;stroke-linejoin:round;stroke-miterlimit:10;stroke-dasharray:none;stroke-opacity:1" d="m 294.09,426.05 -0.46,-1.38 -0.92,-0.92 -1.38,-0.46 h -0.46 l -1.37,0.46 -0.92,0.92 -0.46,1.38 v 0.46 l 0.46,1.37 0.92,0.92 1.37,0.46 h 0.46 l 1.38,-0.46 0.92,-0.92 0.46,-1.83 v -2.3 l -0.46,-2.29 -0.92,-1.38 -1.38,-0.46 h -0.91 l -1.38,0.46 -0.46,0.92" />
</g>
<path id="hello" transform="scale(1)" class="glyph" d="M4.8 200.5h-0.9v-3.3h-3v3.3h-0.9v-6.9h0.9v2.8h3v-2.8h0.9v6.9zm5.9-2.8q0 0.2 0 0.4l0 0h-3.4q0.1 0.9 0.5 1.3t1 0.4l0 0q0.4 0 0.7-0.1t0.6-0.3l0 0l0.4 0.5q-0.8 0.7-1.8 0.7l0 0q-1.1 0-1.7-0.7q-0.6-0.8-0.6-2l0 0q0-0.8 0.2-1.5q0.3-0.6 0.8-1q0.5-0.3 1.2-0.3l0 0q1 0 1.6 0.7q0.5 0.6 0.5 1.9l0 0zm-0.9-0.2v-0.1q0-0.8-0.3-1.2t-0.9-0.4l0 0q-1.1 0-1.3 1.7l0 0h2.5zm3.5 3.1q-0.6 0-0.9-0.3q-0.2-0.3-0.2-0.9l0 0v-6.3l0.9-0.1v6.4q0 0.2 0 0.3q0.1 0.1 0.3 0.1l0 0q0.2 0 0.3 0l0 0l0.3 0.6q-0.3 0.2-0.7 0.2l0 0zm2.9 0q-0.5 0-0.8-0.3t-0.3-0.9l0 0v-6.3l0.9-0.1v6.4q0 0.2 0.1 0.3q0 0.1 0.2 0.1l0 0q0.2 0 0.4 0l0 0l0.2 0.6q-0.3 0.2-0.7 0.2l0 0zm3.8-5.5q1.1 0 1.8 0.7q0.6 0.7 0.6 2l0 0q0 0.8-0.3 1.5q-0.3 0.6-0.8 0.9q-0.5 0.4-1.3 0.4l0 0q-1.1 0-1.7-0.8q-0.6-0.7-0.6-2l0 0q0-0.8 0.3-1.4q0.2-0.6 0.8-1q0.5-0.3 1.2-0.3l0 0zm0 0.7q-1.3 0-1.3 2l0 0q0 2 1.3 2l0 0q1.4 0 1.4-2l0 0q0-2-1.4-2l0 0zm12.7-2.2h0.9l-1.4 6.9h-1.2l-1.3-5.8l-1.3 5.8h-1.2l-1.4-6.9h1l1.1 6l1.3-6h1l1.4 6l1.1-6zm4.1 1.5q1.1 0 1.7 0.7t0.6 2l0 0q0 0.8-0.2 1.5q-0.3 0.6-0.9 0.9q-0.5 0.4-1.2 0.4l0 0q-1.1 0-1.8-0.8q-0.6-0.7-0.6-2l0 0q0-0.8 0.3-1.4t0.8-1q0.5-0.3 1.3-0.3l0 0zm0 0.7q-1.4 0-1.4 2l0 0q0 2 1.4 2l0 0q1.3 0 1.3-2l0 0q0-2-1.3-2l0 0zm6.1-0.7q0.3 0 0.6 0l0 0l-0.2 0.9q-0.2 0-0.5 0l0 0q-0.5 0-0.8 0.3q-0.3 0.4-0.4 1.1l0 0v3.1h-0.9v-5.3h0.7l0.1 1.1q0.2-0.6 0.6-0.9t0.8-0.3l0 0zm2.7 5.5q-0.5 0-0.8-0.3t-0.3-0.9l0 0v-6.3l0.9-0.1v6.4q0 0.2 0.1 0.3q0 0.1 0.2 0.1l0 0q0.2 0 0.4 0l0 0l0.2 0.6q-0.3 0.2-0.7 0.2l0 0zm5-7.6l0.9 0.1v7.4h-0.8l-0.1-0.8q-0.2 0.4-0.6 0.6q-0.4 0.3-0.9 0.3l0 0q-0.9 0-1.5-0.8q-0.5-0.7-0.5-2l0 0q0-0.8 0.3-1.4q0.2-0.6 0.7-1q0.5-0.3 1.1-0.3l0 0q0.8 0 1.4 0.6l0 0v-2.7zm-1.3 6.8q0.4 0 0.7-0.2q0.3-0.1 0.6-0.5l0 0v-2.6q-0.3-0.4-0.6-0.5q-0.2-0.2-0.6-0.2l0 0q-0.6 0-1 0.5q-0.3 0.5-0.3 1.5l0 0q0 1 0.3 1.5t0.9 0.5l0 0z " />
<use href="#hello" x="200" transform="scale(2)"/>
</svg>