From 1c65c5dcb2e081d0beaa4beaeb0f5f4b9e40e5e3 Mon Sep 17 00:00:00 2001 From: nguyenq Date: Fri, 4 Nov 2016 22:00:09 -0500 Subject: [PATCH] Use multiple renderers in same process --- src/Tesseract.Tests/ResultRendererTests.cs | 31 +++++++-- src/Tesseract/ResultRenderer.cs | 81 ++++++++++++++++++++++ 2 files changed, 106 insertions(+), 6 deletions(-) diff --git a/src/Tesseract.Tests/ResultRendererTests.cs b/src/Tesseract.Tests/ResultRendererTests.cs index 9c39494c..0faf4198 100644 --- a/src/Tesseract.Tests/ResultRendererTests.cs +++ b/src/Tesseract.Tests/ResultRendererTests.cs @@ -42,7 +42,7 @@ public void CanRenderResultsIntoTextFile() } var expectedOutputFilename = Path.ChangeExtension(resultPath, "txt"); - Assert.That(File.Exists(expectedOutputFilename), $"Expected a Text file \"{expectedOutputFilename}\" to have been created; but non was found."); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a Text file \"{expectedOutputFilename}\" to have been created; but none was found."); } [Test] @@ -55,7 +55,7 @@ public void CanRenderResultsIntoPdfFile() } var expectedOutputFilename = Path.ChangeExtension(resultPath, "pdf"); - Assert.That(File.Exists(expectedOutputFilename), $"Expected a PDF file \"{expectedOutputFilename}\" to have been created; but non was found."); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a PDF file \"{expectedOutputFilename}\" to have been created; but none was found."); } [Test] @@ -68,7 +68,7 @@ public void CanRenderMultiplePageDocumentToPdfFile() } var expectedOutputFilename = Path.ChangeExtension(resultPath, "pdf"); - Assert.That(File.Exists(expectedOutputFilename), $"Expected a PDF file \"{expectedOutputFilename}\" to have been created; but non was found."); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a PDF file \"{expectedOutputFilename}\" to have been created; but none was found."); } [Test] @@ -81,7 +81,7 @@ public void CanRenderResultsIntoHOcrFile() } var expectedOutputFilename = Path.ChangeExtension(resultPath, "hocr"); - Assert.That(File.Exists(expectedOutputFilename), $"Expected a HOCR file \"{expectedOutputFilename}\" to have been created; but non was found."); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a HOCR file \"{expectedOutputFilename}\" to have been created; but none was found."); } [Test] @@ -94,7 +94,7 @@ public void CanRenderResultsIntoUnlvFile() } var expectedOutputFilename = Path.ChangeExtension(resultPath, "unlv"); - Assert.That(File.Exists(expectedOutputFilename), $"Expected a Unlv file \"{expectedOutputFilename}\" to have been created; but non was found."); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a Unlv file \"{expectedOutputFilename}\" to have been created; but none was found."); } [Test] @@ -107,7 +107,26 @@ public void CanRenderResultsIntoBoxFile() } var expectedOutputFilename = Path.ChangeExtension(resultPath, "box"); - Assert.That(File.Exists(expectedOutputFilename), $"Expected a Box file \"{expectedOutputFilename}\" to have been created; but non was found."); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a Box file \"{expectedOutputFilename}\" to have been created; but none was found."); + } + + [Test] + public void CanRenderResultsIntoMultipleOutputFormats() + { + var resultPath = TestResultRunFile(@"ResultRenderers\PDF\phototest"); + List formats = new List { RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT }; + using (var renderer = ResultRenderer.CreateRenderers(resultPath, DataPath, formats)) + { + var examplePixPath = this.TestFilePath("Ocr/phototest.tif"); + ProcessFile(renderer, examplePixPath); + } + + var expectedOutputFilename = Path.ChangeExtension(resultPath, "pdf"); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a PDF file \"{expectedOutputFilename}\" to have been created; but none was found."); + expectedOutputFilename = Path.ChangeExtension(resultPath, "hocr"); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a HOCR file \"{expectedOutputFilename}\" to have been created; but none was found."); + expectedOutputFilename = Path.ChangeExtension(resultPath, "txt"); + Assert.That(File.Exists(expectedOutputFilename), $"Expected a TEXT file \"{expectedOutputFilename}\" to have been created; but none was found."); } private void ProcessMultipageTiff(IResultRenderer renderer, string filename) diff --git a/src/Tesseract/ResultRenderer.cs b/src/Tesseract/ResultRenderer.cs index 028b8bdb..46a5ce90 100644 --- a/src/Tesseract/ResultRenderer.cs +++ b/src/Tesseract/ResultRenderer.cs @@ -1,9 +1,18 @@ using System; +using System.Collections.Generic; using System.Runtime.InteropServices; using Tesseract.Internal; namespace Tesseract { + /// + /// Rendered formats supported by Tesseract. + /// + public enum RenderedFormat + { + TEXT, HOCR, PDF, UNLV, BOX + } + /// /// Represents a native result renderer (e.g. text, pdf, etc). /// @@ -16,6 +25,78 @@ public abstract class ResultRenderer : DisposableBase, IResultRenderer { #region Factory Methods + /// + /// Creates renderers for specified output formats. + /// + /// + /// + /// + /// + public static IResultRenderer CreateRenderers(string outputbase, string dataPath, List outputFormats) + { + IResultRenderer renderer = null; + + foreach (RenderedFormat format in outputFormats) + { + switch (format) + { + case RenderedFormat.TEXT: + if (renderer == null) + { + renderer = CreateTextRenderer(outputbase); + } + else + { + Interop.TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new TextResultRenderer(outputbase).Handle); + } + break; + case RenderedFormat.HOCR: + if (renderer == null) + { + renderer = CreateHOcrRenderer(outputbase); + } + else + { + Interop.TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new HOcrResultRenderer(outputbase).Handle); + } + break; + case RenderedFormat.PDF: + //dataPath = Interop.TessApi.Native.BaseAPIGetDatapath(handle); + if (renderer == null) + { + renderer = CreatePdfRenderer(outputbase, dataPath); + } + else + { + Interop.TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new PdfResultRenderer(outputbase, dataPath).Handle); + } + break; + case RenderedFormat.BOX: + if (renderer == null) + { + renderer = CreateBoxRenderer(outputbase); + } + else + { + Interop.TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new BoxResultRenderer(outputbase).Handle); + } + break; + case RenderedFormat.UNLV: + if (renderer == null) + { + renderer = CreateUnlvRenderer(outputbase); + } + else + { + Interop.TessApi.Native.ResultRendererInsert(((ResultRenderer)renderer).Handle, new UnlvResultRenderer(outputbase).Handle); + } + break; + } + } + + return renderer; + } + /// /// Creates a result renderer that render that generates a searchable /// pdf file from tesseract's output.