Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FR] Add Language to Recognize Content #16889

Merged
merged 1 commit into from
Nov 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/formrecognizer/Azure.AI.FormRecognizer/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
- Added support for pre-built business card recognition.
- Added support for pre-built invoices recognition.
- Added support for providing locale info when recognizing receipts and business cards. Supported locales include support EN-US, EN-AU, EN-CA, EN-GB, EN-IN.
- Added support for providing the document language in `StartRecognizeContent` when recognizing a form.
- Added support to train and recognize custom forms with selection marks such as check boxes and radio buttons. This functionality is only available in train with labels scenarios.
- Added support to `StartRecognizeContent` to recognize selection marks such as check boxes and radio buttons.
- Added ability to create a composed model from the `FormTrainingClient` by calling method `StartCreateComposedModel`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ public partial class RecognizeContentOptions
{
public RecognizeContentOptions() { }
public Azure.AI.FormRecognizer.FormContentType? ContentType { get { throw null; } set { } }
public string Language { get { throw null; } set { } }
public System.Collections.Generic.IEnumerable<string> Pages { get { throw null; } set { } }
}
public partial class RecognizeCustomFormsOptions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ public virtual RecognizeContentOperation StartRecognizeContent(Stream form, Reco
Response response = ServiceClient.AnalyzeLayoutAsync(
formContentType.ConvertToContentType1(),
form,
null,
recognizeContentOptions.Language == null ? (Language?)null : recognizeContentOptions.Language,
recognizeContentOptions.Pages,
cancellationToken);
string location = ClientCommon.GetResponseHeader(response.Headers, Constants.OperationLocationHeader);
Expand Down Expand Up @@ -178,7 +178,7 @@ public virtual async Task<RecognizeContentOperation> StartRecognizeContentAsync(
Response response = await ServiceClient.AnalyzeLayoutAsyncAsync(
formContentType.ConvertToContentType1(),
form,
null,
recognizeContentOptions.Language == null ? (Language?)null : recognizeContentOptions.Language,
recognizeContentOptions.Pages,
cancellationToken).ConfigureAwait(false);
string location = ClientCommon.GetResponseHeader(response.Headers, Constants.OperationLocationHeader);
Expand Down Expand Up @@ -213,7 +213,7 @@ public virtual RecognizeContentOperation StartRecognizeContentFromUri(Uri formUr
{
SourcePath sourcePath = new SourcePath() { Source = formUri.AbsoluteUri };
Response response = ServiceClient.AnalyzeLayoutAsync(
null,
recognizeContentOptions.Language == null ? (Language?)null : recognizeContentOptions.Language,
recognizeContentOptions.Pages,
sourcePath,
cancellationToken);
Expand Down Expand Up @@ -249,7 +249,7 @@ public virtual async Task<RecognizeContentOperation> StartRecognizeContentFromUr
{
SourcePath sourcePath = new SourcePath() { Source = formUri.AbsoluteUri };
Response response = await ServiceClient.AnalyzeLayoutAsyncAsync(
null,
recognizeContentOptions.Language == null ? (Language?)null : recognizeContentOptions.Language,
recognizeContentOptions.Pages,
sourcePath,
cancellationToken).ConfigureAwait(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ public RecognizeContentOptions()
/// </summary>
public FormContentType? ContentType { get; set; } = null;

/// <summary>
/// The BCP-47 language code of the text in the document.
/// Recognize Content supports auto language identification and multi language documents, so only
/// provide a language code if you would like to force the documented to be processed as
/// that specific language.
/// <para>See supported language codes <a href="https://docs.microsoft.com/azure/cognitive-services/form-recognizer/language-support">here</a>.</para>
/// </summary>
public string Language { get; set; }

/// <summary>
/// Custom page numbers for multi-page documents(PDF/TIFF). Input the number of the
/// pages you want to get OCR result. For a range of pages, use a hyphen.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -489,6 +489,35 @@ public async Task StartRecognizeContentWithMultiplePageArgument(string page1, st
Assert.AreEqual(expected, formPages.Count);
}

[Test]
[TestCase("en")]
[TestCase("")]
public async Task StartRecognizeContentWithLanguage(string language)
{
var client = CreateFormRecognizerClient();
RecognizeContentOperation operation;

var uri = FormRecognizerTestEnvironment.CreateUri(TestFile.Form1);
operation = await client.StartRecognizeContentFromUriAsync(uri, new RecognizeContentOptions() { Language = language } );

await operation.WaitForCompletionAsync(PollingInterval);
Assert.IsTrue(operation.HasValue);

var formPage = operation.Value.Single();

ValidateFormPage(formPage, includeFieldElements: true, expectedPageNumber: 1);
}

[Test]
public void StartRecognizeContentWithNoSupporttedLanguage()
{
var client = CreateFormRecognizerClient();
var uri = FormRecognizerTestEnvironment.CreateUri(TestFile.Form1);

RequestFailedException ex = Assert.ThrowsAsync<RequestFailedException>(async () => await client.StartRecognizeContentFromUriAsync(uri, new RecognizeContentOptions() { Language = "not language" }) );
Assert.AreEqual("NotSupportedLanguage", ex.ErrorCode);
}

#endregion

#region StartRecognizeReceipts
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,30 @@ public async Task StartRecognizeContentFromUriEncodesBlankSpaces()
}
}

[Test]
[TestCase("")]
[TestCase("en")]
public async Task StartRecognizeContentSendsUserSpecifiedLanguage(string language)
{
var mockResponse = new MockResponse(202);
mockResponse.AddHeader(new HttpHeader(Constants.OperationLocationHeader, "host/layout/analyzeResults/00000000000000000000000000000000"));

var mockTransport = new MockTransport(new[] { mockResponse, mockResponse });
var options = new FormRecognizerClientOptions() { Transport = mockTransport };
var client = CreateInstrumentedClient(options);

using var stream = FormRecognizerTestEnvironment.CreateStream(TestFile.Form1);
var recognizeOptions = new RecognizeContentOptions { Language = language };
await client.StartRecognizeContentAsync(stream, recognizeOptions);

var requestUriQuery = mockTransport.Requests.Single().Uri.Query;

var languageQuery = "language=";
var index = requestUriQuery.IndexOf(languageQuery);
var length = requestUriQuery.Length - (index + languageQuery.Length);
Assert.AreEqual(language, requestUriQuery.Substring(index + languageQuery.Length, length));
}

#endregion

#region Recognize Receipt
Expand Down
Loading