Skip to content

Commit

Permalink
Added properties CurrentEngineMode, InitLanguage, DataPath, LoadedLan…
Browse files Browse the repository at this point in the history
…guages and AvailableLanguages
Kees van Spelde committed Apr 10, 2022

Verified

This commit was signed with the committer’s verified signature.
evenyag Yingwen
1 parent b6203a6 commit b62bb27
Showing 9 changed files with 288 additions and 90 deletions.
3 changes: 3 additions & 0 deletions Tesseract.sln.DotSettings
Original file line number Diff line number Diff line change
@@ -104,6 +104,8 @@
<s:Boolean x:Key="/Default/UserDictionary/Words/=pageseg/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=pangle/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Panjabi/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=paraids/@EntryIndexedValue">True</s:Boolean>

<s:Boolean x:Key="/Default/UserDictionary/Words/=Pashto/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=pconf/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=phototest/@EntryIndexedValue">True</s:Boolean>
@@ -148,6 +150,7 @@
<s:Boolean x:Key="/Default/UserDictionary/Words/=Tesseract_0027s/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=testregion/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=TEXTLINE/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Textlines/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=textonly/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=textord/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=Thaana/@EntryIndexedValue">True</s:Boolean>
16 changes: 14 additions & 2 deletions TesseractOCR.Tests/BaseApiTests.cs
Original file line number Diff line number Diff line change
@@ -6,11 +6,23 @@ namespace Tesseract.Tests
public class BaseApiTests : TesseractTestBase
{
[TestMethod]
public void GetVersion_Is500()
public void GetVersion_Is510()
{
using var engine = CreateEngine();
var version = engine.Version;
Assert.IsTrue(version.StartsWith("5.0.0"));
Assert.IsTrue(version.StartsWith("5.1.0"));
}

[TestMethod]
public void LoadedLanguages()
{
using var engine = CreateEngine();
var dp = engine.DataPath;
engine.ClearAdaptiveClassifier();
engine.ClearPersistentCache();
var languages = engine.AvailableLanguages;
//Assert.IsTrue(version.StartsWith("5.0.0"));
}

}
}
22 changes: 1 addition & 21 deletions TesseractOCR.Tests/EngineTests.cs
Original file line number Diff line number Diff line change
@@ -141,27 +141,6 @@ public void CanProcessDifferentRegionsInSameImage()
Assert.AreEqual(region2Text, expectedTextRegion2);
}

[TestMethod]
public void CanGetSegmentedRegions()
{
const int expectedCount = 8; // number of text lines in test image

using var engine = CreateEngine();
var imgPath = TestFilePath(TestImagePath);
using var img = TesseractOCR.Pix.Image.LoadFromFile(imgPath);
using var page = engine.Process(img);
var boxes = page.GetSegmentedRegions(PageIteratorLevel.TextLine);

for (var i = 0; i < boxes.Count; i++)
{
var box = boxes[i];
Console.WriteLine("Box[{0}]: x={1}, y={2}, w={3}, h={4}", i, box.X, box.Y, box.Width,
box.Height);
}

Assert.AreEqual(boxes.Count, expectedCount);
}

[TestMethod]
public void CanProcessEmptyPixUsingResultIterator()
{
@@ -265,6 +244,7 @@ public void CanProcessPixUsingResultIterator()

foreach (var paragraph in block.Paragraphs)
{
var regions = block.SegmentedRegions;
result.AppendLine($"Paragraph confidence: {paragraph.Confidence}");
if (paragraph.BoundingBox != null)
{
55 changes: 54 additions & 1 deletion TesseractOCR/Engine.cs
Original file line number Diff line number Diff line change
@@ -57,6 +57,31 @@ public class Engine : DisposableBase
/// Gets or sets default <see cref="PageSegMode" /> mode used by one of the Process methods
/// </summary>
public PageSegMode DefaultPageSegMode { get; set; }

/// <summary>
/// Returns the current engine mode
/// </summary>
public EngineMode CurrentEngineMode => TessApi.Native.BaseAPIOem(_handle);

/// <summary>
/// Returns the <see cref="Language"/> used in the last valid initialization
/// </summary>
public Language InitLanguage => LanguageHelper.StringToEnum(MarshalHelper.PtrToString(TessApi.Native.BaseApiGetDatapath(_handle)));

/// <summary>
/// Returns the data path
/// </summary>
public string DataPath => MarshalHelper.PtrToString(TessApi.Native.BaseApiGetDatapath(_handle)).Replace('/', Path.DirectorySeparatorChar);

/// <summary>
/// Returns a list of loaded <see cref="Language"/>'s
/// </summary>
public List<Language> LoadedLanguages => TessApi.BaseApiLoadedLanguages(_handle);

/// <summary>
/// Returns a list of available <see cref="Language"/>'s
/// </summary>
public List<Language> AvailableLanguages => TessApi.BaseAPIGetAvailableLanguagesAsVector(_handle);
#endregion

#region Constructors
@@ -630,7 +655,35 @@ private void Initialize(string dataPath, Language language, EngineMode engineMod
}
#endregion

#region BaseApiSetDebugVariable
#region ClearAdaptiveClassifier
/// <summary>
/// Call between pages or documents etc to free up memory and forget adaptive data
/// </summary>
public void ClearAdaptiveClassifier()
{
TessApi.Native.BaseAPIClearAdaptiveClassifier(_handle);
}
#endregion

#region ClearPersistentCache
/// <summary>
/// Clear any library-level memory caches. There are a variety of expensive-to-load constant data structures
/// (mostly language dictionaries) that are cached globally -- surviving the Init() and End() of individual TessBaseAPI's.
/// This function allows the clearing of these caches
/// </summary>
public void ClearPersistentCache()
{
TessApi.Native.BaseAPIClearPersistentCache(_handle);
}
#endregion

#region SetDebugVariable
/// <summary>
/// Sets a debug variable.
/// </summary>
/// <param name="name"></param>
/// <param name="value"></param>
/// <returns></returns>
public bool SetDebugVariable(string name, string value)
{
return TessApi.BaseApiSetDebugVariable(_handle, name, value) != 0;
192 changes: 166 additions & 26 deletions TesseractOCR/Interop/TessApi.cs

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions TesseractOCR/Layout/EnumeratorBase.cs
Original file line number Diff line number Diff line change
@@ -19,6 +19,8 @@
// limitations under the License.

using System;
using System.Collections.Generic;
using System.Drawing;
using System.Runtime.InteropServices;
using TesseractOCR.Enums;
using TesseractOCR.Helpers;
@@ -86,6 +88,39 @@ public class EnumeratorBase
/// <returns>The <see cref="Pix.Image"/> or <c>null</c> when it fails</returns>
public Pix.Image BinaryImage => Pix.Image.Create(TessApi.Native.PageIteratorGetBinaryImage(IteratorHandleRef, PageIteratorLevel));

/// <summary>
/// Returns segmented regions at the current <see cref="PageIteratorLevel"/>
/// </summary>
public List<Rectangle> SegmentedRegions
{
get
{
Logger.LogInformation("Getting segmented regions");

var boxArray = TessApi.Native.BaseApiGetComponentImages(EngineHandleRef, PageIteratorLevel, Constants.True, IntPtr.Zero, IntPtr.Zero);
var boxCount = LeptonicaApi.Native.boxaGetCount(new HandleRef(this, boxArray));
var result = new List<Rectangle>();

for (var i = 0; i < boxCount; i++)
{
var box = LeptonicaApi.Native.boxaGetBox(new HandleRef(this, boxArray), i, PixArrayAccessType.Clone);

if (box == IntPtr.Zero)
continue;

LeptonicaApi.Native.boxGetGeometry(new HandleRef(this, box), out var px, out var py, out var pw, out var ph);
result.Add(new Rectangle(px, py, pw, ph));
LeptonicaApi.Native.boxDestroy(ref box);
}

LeptonicaApi.Native.boxaDestroy(ref boxArray);

Logger.LogInformation($"Found {result.Count} region{(result.Count == 1 ? string.Empty : "s")}");

return result;
}
}

/// <summary>
/// Returns a <see cref="Pix.Image"/> from what is seen at the current <see cref="PageIteratorLevel"/>>
/// </summary>
39 changes: 0 additions & 39 deletions TesseractOCR/Page.cs
Original file line number Diff line number Diff line change
@@ -20,8 +20,6 @@
// limitations under the License.

using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Runtime.InteropServices;
using System.Text;
@@ -355,43 +353,6 @@ internal Page(
}
#endregion

#region GetSegmentedRegions
/// <summary>
/// Get segmented regions at specified page iterator level
/// </summary>
/// <param name="pageIteratorLevel">PageIteratorLevel enum</param>
/// <remarks>
/// This method can be called without triggering the <see cref="Recognize"/> method
/// </remarks>
/// <returns>A list with <see cref="Rectangle"/>'s</returns>
public List<Rectangle> GetSegmentedRegions(PageIteratorLevel pageIteratorLevel)
{
Logger.LogInformation("Getting segmented regions");

var boxArray = TessApi.Native.BaseApiGetComponentImages(Engine.Handle, pageIteratorLevel, Constants.True, IntPtr.Zero, IntPtr.Zero);
var boxCount = LeptonicaApi.Native.boxaGetCount(new HandleRef(this, boxArray));
var boxList = new List<Rectangle>();

for (var i = 0; i < boxCount; i++)
{
var box = LeptonicaApi.Native.boxaGetBox(new HandleRef(this, boxArray), i, PixArrayAccessType.Clone);

if (box == IntPtr.Zero)
continue;

LeptonicaApi.Native.boxGetGeometry(new HandleRef(this, box), out var px, out var py, out var pw, out var ph);
boxList.Add(new Rectangle(px, py, pw, ph));
LeptonicaApi.Native.boxDestroy(ref box);
}

LeptonicaApi.Native.boxaDestroy(ref boxArray);

Logger.LogInformation($"Found {boxList.Count} region{(boxList.Count == 1 ? string.Empty : "s")}");

return boxList;
}
#endregion

#region DetectOrientation
/// <summary>
/// Detects the page orientation, with corresponding confidence when using <see cref="PageSegMode.OsdOnly" />
1 change: 0 additions & 1 deletion TesseractOCR/Pix/Image.cs
Original file line number Diff line number Diff line change
@@ -28,7 +28,6 @@
using TesseractOCR.Helpers;
using TesseractOCR.Internal;
using TesseractOCR.Interop;
using TesseractOCR.Loggers;
using Math = System.Math;

// ReSharper disable InconsistentNaming
15 changes: 15 additions & 0 deletions TesseractOCR/TesseractOCR.csproj
Original file line number Diff line number Diff line change
@@ -47,4 +47,19 @@
<PackagePath>\</PackagePath>
</None>
</ItemGroup>

<ItemGroup>
<Content Update="x64\leptonica-1.82.0.dll">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
<Content Update="x64\tesseract51.dll">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
<Content Update="x86\leptonica-1.82.0.dll">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
<Content Update="x86\tesseract51.dll">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</Content>
</ItemGroup>
</Project>

0 comments on commit b62bb27

Please sign in to comment.