As you can see down there i made a programme that scans a document and optionally get the page info and material & size infos and date info.
When i use OCR scanning like this:
Dim Mdoc As MODI.Document
Dim Mlay As MODI.Layout
Dim fso As Scripting.FileSystemObject
Dim logfile As Object
Public Function ScanMan(ByVal Name As String, ByVal Path As String) As String
Set Mdoc = New MODI.Document
'Set Mdoc = CreateObject("MODI.Document")
Set fso = New Scripting.FileSystemObject
DoEvents
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
''''''''''''''''''' Create OCRLog File '''''''''''''''''''
OCRPath = App.Path & "\OCR Results Log\"
OCRName = Str(DateTime.Date) & " OCRresults"
If fso.FolderExists(OCRPath) = False Then
fso.CreateFolder (OCRPath)
End If
If fso.FileExists(OCRPath & OCRName & ".txt") = False Then
fso.CreateTextFile OCRPath & OCRName & ".txt"
End If
Set logfile = fso.OpenTextFile(OCRPath & OCRName & ".txt", ForAppending)
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
On Error GoTo OCRErr
DoEvents
Mdoc.Create Path & "\" & Name
Mdoc.Images(0).OCR miLANG_ENGLISH, True, True
logfile.Write Mdoc.Images(0).Layout.Text
ScanMan = Mlay.Text
Mdoc.Close False
Set Mlay = Nothing
Set Mdoc = Nothing
Exit Function
OCRErr:
logfile.WriteLine "OCR given (" & Err.Number & ") numbered (" & Err.Description & ") error."
logfile.Close
End Function
This gets the whole page but i just want those 3 spesific area to be scanned so how can i achive that? Is there any function for that? Which scans only X,Y coordinates?
A vb6 snippet
Sub TestTextSelection()
Dim miTextSel As MODI.IMiSelectableItem
Dim miSelectRects As MODI.miSelectRects
Dim miSelectRect As MODI.miSelectRect
Dim strTextSelInfo As String
Set miTextSel = MiDocView1.TextSelection
Set miSelectRects = miTextSel.GetSelectRects
strTextSelInfo = _
"Bounding rectangle page & coordinates: " & vbCrLf
For Each miSelectRect In miSelectRects
With miSelectRect
strTextSelInfo = strTextSelInfo & _
.PageNumber & ", " & .Top & ", " & _
.Left & ", " & .Bottom & ", " & _
.Right & vbCrLf
End With
Next
MsgBox strTextSelInfo, vbInformation + vbOKOnly, _
"Text Selection Info"
Set miSelectRect = Nothing
Set miSelectRects = Nothing
Set miTextSel = Nothing
End Sub
Though the question is tagged as vb6
but answer is from vb.Net 2010
. I hope vb.NET
could easily be converted to vb6
, only matters is just a few more time.
x1,y1
) and (x2,y2
).The core class
Imports System
Imports System.IO
Imports System.Xml
Imports System.Linq
Imports MODI
Public Class clsCore
Public Sub New()
'blah blah blah
End Sub
Public Function GetTextFromCoordinates(ByVal iPath$, ByVal x1&, ByVal y1&, ByVal x2&, ByVal y2&) As String
Try
Dim xDoc As XElement = Me.ConvertImage2XML(iPath)
If IsNothing(xDoc) = False Then
Dim result As New XElement(<text/>)
Dim query = xDoc...<wd>.Where(Function(c) Val(CStr(c.@left)) >= x1 And Val(CStr(c.@right)) <= x2 And Val(CStr(c.@top)) >= y1 And Val(CStr(c.@bottom)) <= y2)
For Each ele As XElement In query
result.Add(CStr(ele.Value) & " ")
Next ele
Return Trim(result.Value)
Else
Return ""
End If
Catch ex As Exception
Console.WriteLine(ex.ToString)
Return ex.ToString
End Try
End Function
Private Function ConvertImage2XML(ByVal iPath$) As XElement
Try
If File.Exists(iPath) = True Then
Dim miDoc As New MODI.Document
Dim result As New XElement(<image path=<%= iPath %>/>)
miDoc.Create(iPath)
For Each miImg As MODI.Image In miDoc.Images
Dim page As New XElement(<page id=<%= result...<page>.Count + 1 %>/>)
miImg.OCR()
For Each miWord As MODI.Word In miImg.Layout.Words
Dim wd As New XElement(<wd block=<%= miWord.RegionId.ToString %>><%= miWord.Text %></wd>)
For Each miRect As MODI.MiRect In miWord.Rects
wd.Add(New XAttribute("left", miRect.Left))
wd.Add(New XAttribute("top", miRect.Top))
wd.Add(New XAttribute("right", miRect.Right))
wd.Add(New XAttribute("bottom", miRect.Bottom))
Next miRect
page.Add(wd)
Next miWord
result.Add(page)
Next miImg
Return result
Else
Return Nothing
End If
Catch ex As Exception
Console.WriteLine(ex.ToString)
Return Nothing
End Try
End Function
End Class
the main module
Imports System
Imports System.IO
Imports System.Text.RegularExpressions
Module modMain
Sub Main()
Dim iPath$ = "", iPos$ = "150,825,1400,1200"
Console.WriteLine("Enter path to file:")
iPath = Console.ReadLine()
Console.WriteLine("")
Console.WriteLine("Enter co-ordinates(i.e., x1,y1,x2,y2 or 150,825,1400,1200):")
iPos = Console.ReadLine()
Dim tmp As String() = Regex.Split(iPos, "\D+")
Dim outText$ = New clsCore().GetTextFromCoordinates(iPath, tmp(0), tmp(1), tmp(2), tmp(3))
Console.WriteLine("")
Console.WriteLine(String.Format("{0}[({1},{2})-({3},{4})]:{5}{5}{6}", Dir(iPath), tmp(0), tmp(1), tmp(2), tmp(3), vbCrLf, outText))
Console.ReadLine()
End Sub
End Module
UPDATE
The following example reports the page number and the coordinates of the bounding rectangle around the user's image selection in the viewer control. And which can be used later within picturebox.
Sub TestImageSelection()
Dim miImageSel As MODI.IMiSelectableImage
Dim lngPageNo As Long
Dim lngLeft As Long, lngTop As Long
Dim lngRight As Long, lngBottom As Long
Dim strImageSelInfo As String
Set miImageSel = MiDocView1.ImageSelection
miImageSel.GetBoundingRect lngPageNo, _
lngLeft, lngTop, lngRight, lngBottom
strImageSelInfo = _
"Page number: " & lngPageNo & vbCrLf & _
"Bounding rectangle coordinates: " & vbCrLf & _
lngLeft & ", " & lngTop & ", " & _
lngRight & ", " & lngBottom
MsgBox strImageSelInfo, vbInformation + vbOKOnly, _
"Image Selection Info"
Set miImageSel = Nothing
End Sub
Hope this helps.