The OcrInput class provides granular control to C# and .Net developers to preprocess image input for speed and accuracy before.
This negates the common practice of using Photoshop Batch Scripts or ImageMagick to prepare images for OCR.
C#:
using IronOcr;
var Ocr = new IronTesseract();
using (var Input = new OcrInput(@"images\image.png"))
{
Input.WithTitle("My Document");
Input.Binarize();
Input.Contrast();
Input.Deskew();
Input.DeNoise();
Input.Dilate();
Input.EnhanceResolution(300);
Input.Invert();
Input.Rotate(90);
Input.Scale(150); // or Input.Scale(3000, 2000);
Input.Sharpen();
Input.ToGrayScale();
// you don't need all of them
// most users only need Deskew() and occasionally DeNoise()
// Optional: Export modified images so you can view them.
foreach(var page in Input.Pages){
page.SaveAsImage("filtered.bmp")
}
var Result = Ocr.Read(Input);
Console.WriteLine(Result.Text);
}
VB:
Imports IronOcr
Private Ocr = New IronTesseract()
Using Input = New OcrInput("images\image.png")
Input.WithTitle("My Document")
Input.Binarize()
Input.Contrast()
Input.Deskew()
Input.DeNoise()
Input.Dilate()
Input.EnhanceResolution(300)
Input.Invert()
Input.Rotate(90)
Input.Scale(150) ' or Input.Scale(3000, 2000);
Input.Sharpen()
Input.ToGrayScale()
' you don't need all of them
' most users only need Deskew() and occasionally DeNoise()
' Optional: Export modified images so you can view them.
For Each page In Input.Pages
page.SaveAsImage("filtered.bmp")
Next page
Dim Result = Ocr.Read(Input)
Console.WriteLine(Result.Text)
End Using
Top comments (0)