From e3cfe2f000b90683af2077530e7ba98690745292 Mon Sep 17 00:00:00 2001 From: Charles Weld Date: Sat, 20 Sep 2014 16:07:25 +1000 Subject: [PATCH] Back ported setvariable fix from 3.03 to dev (Issues #120 and #68) --- .gitignore | 1 + src/Tesseract.Tests/EngineTests.cs | 24 +++++++++++++++++++ src/Tesseract/Interop/BaseApi.cs | 33 +++++++++++++++++++++++--- src/Tesseract/Interop/MarshalHelper.cs | 14 +++++++++++ src/Tesseract/TesseractEngine.cs | 10 ++++---- 5 files changed, 74 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index bf282a4c..effb3fcf 100644 --- a/.gitignore +++ b/.gitignore @@ -187,3 +187,4 @@ pip-log.txt /*.Log *.sdsettings +*.orig diff --git a/src/Tesseract.Tests/EngineTests.cs b/src/Tesseract.Tests/EngineTests.cs index 9c5bb5ed..29896aca 100644 --- a/src/Tesseract.Tests/EngineTests.cs +++ b/src/Tesseract.Tests/EngineTests.cs @@ -314,6 +314,7 @@ public void CanSetDoubleVariable(string variableName, double variableValue) [TestCase("tessedit_char_whitelist", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")] [TestCase("tessedit_char_whitelist", "")] [TestCase("tessedit_char_whitelist", "Test")] + [TestCase("tessedit_char_whitelist", "chinese 漢字")] // Issue 68 public void CanSetStringVariable(string variableName, string variableValue) { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { @@ -328,6 +329,29 @@ public void CanSetStringVariable(string variableName, string variableValue) } } + + /// + /// As per Bug #52 setting 'classify_bln_numeric_mode' variable to '1' causes the engine to fail on processing. + /// + [Test, + Ignore("Broken in Tesseract 3.02")] + public void CanSetClassifyBlnNumericModeVariable() + { + using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { + engine.SetVariable("classify_bln_numeric_mode", 1); + + using(var img = Pix.LoadFromFile("./Data/processing/numbers.png")) { + using(var page = engine.Process(img)) { + var text = page.GetText(); + + const string expectedText = "1234567890\n\n"; + + Assert.That(text, Is.EqualTo(expectedText)); + } + } + } + } + #endregion #region File Helpers diff --git a/src/Tesseract/Interop/BaseApi.cs b/src/Tesseract/Interop/BaseApi.cs index 3446d227..9b198050 100644 --- a/src/Tesseract/Interop/BaseApi.cs +++ b/src/Tesseract/Interop/BaseApi.cs @@ -59,11 +59,11 @@ int BaseApiInit(HandleRef handle, [RuntimeDllImport(Constants.TesseractDllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPISetVariable")] - int BaseApiSetVariable(HandleRef handle, string name, string value); + int BaseApiSetVariable(HandleRef handle, string name, IntPtr valPtr); [RuntimeDllImport(Constants.TesseractDllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPISetDebugVariable")] - int BaseApiSetDebugVariable(HandleRef handle, string name, string value); + int BaseApiSetDebugVariable(HandleRef handle, string name, IntPtr valPtr); [RuntimeDllImport(Constants.TesseractDllName, CallingConvention = CallingConvention.Cdecl, EntryPoint = "TessBaseAPIGetIntVariable")] int BaseApiGetIntVariable(HandleRef handle, string name, out int value); @@ -205,12 +205,39 @@ public static void Initialize() native = InteropRuntimeImplementer.CreateInstance(); } } + + public static int BaseApiSetVariable(HandleRef handle, string name, string value) + { + IntPtr valuePtr = IntPtr.Zero; + try { + valuePtr = MarshalHelper.StringToPtr(value, Encoding.UTF8); + return Native.BaseApiSetVariable(handle, name, valuePtr); + } finally { + if(valuePtr != IntPtr.Zero) { + Marshal.FreeHGlobal(valuePtr); + } + } + } + + public static int BaseApiSetDebugVariable(HandleRef handle, string name, string value) + { + IntPtr valuePtr = IntPtr.Zero; + try { + valuePtr = MarshalHelper.StringToPtr(value, Encoding.UTF8); + return Native.BaseApiSetDebugVariable(handle, name, valuePtr); + } finally { + if(valuePtr != IntPtr.Zero) { + Marshal.FreeHGlobal(valuePtr); + } + } + } + public static string BaseApiGetStringVariable(HandleRef handle, string name) { var resultHandle = Native.BaseApiGetStringVariableInternal(handle, name); + return MarshalHelper.PtrToString(resultHandle, Encoding.UTF8); - return Marshal.PtrToStringAnsi(resultHandle); } public static string BaseAPIGetUTF8Text(HandleRef handle) diff --git a/src/Tesseract/Interop/MarshalHelper.cs b/src/Tesseract/Interop/MarshalHelper.cs index db00e8c4..d75b9e0c 100644 --- a/src/Tesseract/Interop/MarshalHelper.cs +++ b/src/Tesseract/Interop/MarshalHelper.cs @@ -1,11 +1,25 @@ using System; using System.Collections.Generic; +using System.Runtime.InteropServices; using System.Text; namespace Tesseract.Interop { unsafe static class MarshalHelper { + public static IntPtr StringToPtr(string value, Encoding encoding) + { + var encoder = encoding.GetEncoder(); + var length = encoding.GetByteCount(value); + // The encoded value is null terminated that's the reason for the '+1'. + var encodedValue = new byte[length + 1]; + encoding.GetBytes(value, 0, value.Length, encodedValue, 0); + var handle = Marshal.AllocHGlobal(new IntPtr(encodedValue.Length)); + Marshal.Copy(encodedValue, 0, handle, encodedValue.Length); + return handle; + } + + public static string PtrToString(IntPtr handle, Encoding encoding) { var length = StrLength(handle); diff --git a/src/Tesseract/TesseractEngine.cs b/src/Tesseract/TesseractEngine.cs index c0674bdc..3a411c15 100644 --- a/src/Tesseract/TesseractEngine.cs +++ b/src/Tesseract/TesseractEngine.cs @@ -82,7 +82,7 @@ public string Version /// Returns True if successful; otherwise False. public bool SetVariable(string name, string value) { - return Interop.TessApi.Native.BaseApiSetVariable(handle, name, value) != 0; + return Interop.TessApi.BaseApiSetVariable(handle, name, value) != 0; } /// @@ -94,7 +94,7 @@ public bool SetVariable(string name, string value) public bool SetVariable(string name, bool value) { var strEncodedValue = value ? "TRUE" : "FALSE"; - return Interop.TessApi.Native.BaseApiSetVariable(handle, name, strEncodedValue) != 0; + return Interop.TessApi.BaseApiSetVariable(handle, name, strEncodedValue) != 0; } /// @@ -106,7 +106,7 @@ public bool SetVariable(string name, bool value) public bool SetVariable(string name, int value) { var strEncodedValue = value.ToString("D", CultureInfo.InvariantCulture.NumberFormat); - return Interop.TessApi.Native.BaseApiSetVariable(handle, name, strEncodedValue) != 0; + return Interop.TessApi.BaseApiSetVariable(handle, name, strEncodedValue) != 0; } /// @@ -118,12 +118,12 @@ public bool SetVariable(string name, int value) public bool SetVariable(string name, double value) { var strEncodedValue = value.ToString("R", CultureInfo.InvariantCulture.NumberFormat); - return Interop.TessApi.Native.BaseApiSetVariable(handle, name, strEncodedValue) != 0; + return Interop.TessApi.BaseApiSetVariable(handle, name, strEncodedValue) != 0; } public bool SetDebugVariable(string name, string value) { - return Interop.TessApi.Native.BaseApiSetDebugVariable(handle, name, value) != 0; + return Interop.TessApi.BaseApiSetDebugVariable(handle, name, value) != 0; } ///