From a2f5bd10ed3b68ed2544239343ec2bccb4593253 Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Thu, 15 Oct 2020 17:49:22 -0700 Subject: [PATCH 1/3] removing all the spots where we say US sales receipts now that locale is supported --- sdk/formrecognizer/azure-ai-formrecognizer/README.md | 6 +++--- .../ai/formrecognizer/_form_recognizer_client.py | 12 +++++------- .../aio/_form_recognizer_client_async.py | 12 +++++------- .../async_samples/sample_recognize_receipts_async.py | 2 +- .../sample_recognize_receipts_from_url_async.py | 2 +- .../samples/sample_recognize_receipts.py | 2 +- .../samples/sample_recognize_receipts_from_url.py | 2 +- 7 files changed, 17 insertions(+), 21 deletions(-) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/README.md b/sdk/formrecognizer/azure-ai-formrecognizer/README.md index 72fc883c7338..71b6e47374a6 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/README.md +++ b/sdk/formrecognizer/azure-ai-formrecognizer/README.md @@ -5,7 +5,7 @@ from form documents. It includes the following main functionalities: * Custom models - Recognize field values and table data from forms. These models are trained with your own data, so they're tailored to your forms. * Content API - Recognize text and table structures, along with their bounding box coordinates, from documents. Corresponds to the REST service's Layout API. -* Prebuilt receipt model - Recognize data from USA sales receipts using a prebuilt model. +* Prebuilt receipt model - Recognize data from sales receipts using a prebuilt model. * Prebuilt business card model - Recognize data from business cards using a prebuilt model. [Source code][python-fr-src] | [Package (PyPI)][python-fr-pypi] | [API reference documentation][python-fr-ref-docs]| [Product documentation][python-fr-product-docs] | [Samples][python-fr-samples] @@ -132,7 +132,7 @@ form_recognizer_client = FormRecognizerClient( `FormRecognizerClient` provides operations for: - Recognizing form fields and content using custom models trained to recognize your custom forms. These values are returned in a collection of `RecognizedForm` objects. - - Recognizing common fields from US receipts, using a pre-trained receipt model. These fields and metadata are returned in a collection of `RecognizedForm` objects. + - Recognizing common fields from sales receipts, using a pre-trained receipt model. These fields and metadata are returned in a collection of `RecognizedForm` objects. - Recognizing common fields from business cards, using a pre-trained business card model. These fields and metadata are returned in a collection of `RecognizedForm` objects. - Recognizing form content, including tables, lines and words, without the need to train a model. Form content is returned in a collection of `FormPage` objects. @@ -242,7 +242,7 @@ for cell in table.cells: ``` ### Recognize Receipts -Recognize data from USA sales receipts using a prebuilt model. Receipt fields recognized by the service can be found [here][service_recognize_receipt]. +Recognize data from sales receipts using a prebuilt model. Receipt fields recognized by the service can be found [here][service_recognize_receipt]. ```python from azure.ai.formrecognizer import FormRecognizerClient diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py index 802ef6dc43c4..959c2fda7dc1 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py @@ -71,7 +71,7 @@ def _prebuilt_callback(self, raw_response, _, headers): # pylint: disable=unuse @distributed_trace def begin_recognize_receipts(self, receipt, **kwargs): # type: (Union[bytes, IO[bytes]], Any) -> LROPoller[List[RecognizedForm]] - """Extract field text and semantic values from a given US sales receipt. + """Extract field text and semantic values from a given sales receipt. The input document must be of one of the supported content types - 'application/pdf', 'image/jpeg', 'image/png' or 'image/tiff'. @@ -79,7 +79,6 @@ def begin_recognize_receipts(self, receipt, **kwargs): https://aka.ms/formrecognizer/receiptfields :param receipt: JPEG, PNG, PDF and TIFF type file stream or bytes. - Currently only supports US sales receipts. :type receipt: bytes or IO[bytes] :keyword bool include_field_elements: Whether or not to include field elements such as lines and words in addition to form fields. @@ -106,7 +105,7 @@ def begin_recognize_receipts(self, receipt, **kwargs): :end-before: [END recognize_receipts] :language: python :dedent: 8 - :caption: Recognize US sales receipt fields. + :caption: Recognize sales receipt fields. """ locale = kwargs.pop("locale", None) content_type = kwargs.pop("content_type", None) @@ -137,15 +136,14 @@ def begin_recognize_receipts(self, receipt, **kwargs): @distributed_trace def begin_recognize_receipts_from_url(self, receipt_url, **kwargs): # type: (str, Any) -> LROPoller[List[RecognizedForm]] - """Extract field text and semantic values from a given US sales receipt. + """Extract field text and semantic values from a given sales receipt. The input document must be the location (URL) of the receipt to be analyzed. See fields found on a receipt here: https://aka.ms/formrecognizer/receiptfields :param str receipt_url: The URL of the receipt to analyze. The input must be a valid, encoded URL - of one of the supported formats: JPEG, PNG, PDF and TIFF. Currently only supports - US sales receipts. + of one of the supported formats: JPEG, PNG, PDF and TIFF. :keyword bool include_field_elements: Whether or not to include field elements such as lines and words in addition to form fields. :keyword int polling_interval: Waiting time between two polls for LRO operations @@ -167,7 +165,7 @@ def begin_recognize_receipts_from_url(self, receipt_url, **kwargs): :end-before: [END recognize_receipts_from_url] :language: python :dedent: 8 - :caption: Recognize US sales receipt fields from a URL. + :caption: Recognize sales receipt fields from a URL. """ locale = kwargs.pop("locale", None) include_field_elements = kwargs.pop("include_field_elements", False) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py index 83127fb23e8c..e31d5dbde8a4 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py @@ -71,7 +71,7 @@ async def begin_recognize_receipts( receipt: Union[bytes, IO[bytes]], **kwargs: Any ) -> AsyncLROPoller[List[RecognizedForm]]: - """Extract field text and semantic values from a given US sales receipt. + """Extract field text and semantic values from a given sales receipt. The input document must be of one of the supported content types - 'application/pdf', 'image/jpeg', 'image/png' or 'image/tiff'. @@ -79,7 +79,6 @@ async def begin_recognize_receipts( https://aka.ms/formrecognizer/receiptfields :param receipt: JPEG, PNG, PDF and TIFF type file stream or bytes. - Currently only supports US sales receipts. :type receipt: bytes or IO[bytes] :keyword bool include_field_elements: Whether or not to include field elements such as lines and words in addition to form fields. @@ -106,7 +105,7 @@ async def begin_recognize_receipts( :end-before: [END recognize_receipts_async] :language: python :dedent: 8 - :caption: Recognize US sales receipt fields. + :caption: Recognize sales receipt fields. """ locale = kwargs.pop("locale", None) content_type = kwargs.pop("content_type", None) @@ -140,15 +139,14 @@ async def begin_recognize_receipts_from_url( receipt_url: str, **kwargs: Any ) -> AsyncLROPoller[List[RecognizedForm]]: - """Extract field text and semantic values from a given US sales receipt. + """Extract field text and semantic values from a given sales receipt. The input document must be the location (URL) of the receipt to be analyzed. See fields found on a receipt here: https://aka.ms/formrecognizer/receiptfields :param str receipt_url: The URL of the receipt to analyze. The input must be a valid, encoded URL - of one of the supported formats: JPEG, PNG, PDF and TIFF. Currently only supports - US sales receipts. + of one of the supported formats: JPEG, PNG, PDF and TIFF. :keyword bool include_field_elements: Whether or not to include field elements such as lines and words in addition to form fields. :keyword int polling_interval: Waiting time between two polls for LRO operations @@ -170,7 +168,7 @@ async def begin_recognize_receipts_from_url( :end-before: [END recognize_receipts_from_url_async] :language: python :dedent: 8 - :caption: Recognize US sales receipt fields from a URL. + :caption: Recognize sales receipt fields from a URL. """ locale = kwargs.pop("locale", None) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_async.py index 99bb4031e4f3..d61aca783d02 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_async.py @@ -10,7 +10,7 @@ FILE: sample_recognize_receipts_async.py DESCRIPTION: - This sample demonstrates how to recognize and extract common fields from US receipts, + This sample demonstrates how to recognize and extract common fields from receipts, using a pre-trained receipt model. For a suggested approach to extracting information from receipts, see sample_strongly_typed_recognized_form_async.py. diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_from_url_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_from_url_async.py index c941db33d218..ed5cfaa10f9d 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_from_url_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/async_samples/sample_recognize_receipts_from_url_async.py @@ -10,7 +10,7 @@ FILE: sample_recognize_receipts_from_url_async.py DESCRIPTION: - This sample demonstrates how to recognize and extract common fields from a US receipt URL, + This sample demonstrates how to recognize and extract common fields from a receipt URL, using a pre-trained receipt model. For a suggested approach to extracting information from receipts, see sample_strongly_typed_recognized_form_async.py. diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts.py index 12778847ab7d..7918e7b7fefe 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts.py @@ -10,7 +10,7 @@ FILE: sample_recognize_receipts.py DESCRIPTION: - This sample demonstrates how to recognize and extract common fields from US receipts, + This sample demonstrates how to recognize and extract common fields from receipts, using a pre-trained receipt model. For a suggested approach to extracting information from receipts, see sample_strongly_typed_recognized_form.py. diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts_from_url.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts_from_url.py index f9437a3bde95..a30efc8f33e3 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts_from_url.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/sample_recognize_receipts_from_url.py @@ -10,7 +10,7 @@ FILE: sample_recognize_receipts_from_url.py DESCRIPTION: - This sample demonstrates how to recognize and extract common fields from a US receipt URL, + This sample demonstrates how to recognize and extract common fields from a receipt URL, using a pre-trained receipt model. For a suggested approach to extracting information from receipts, see sample_strongly_typed_recognized_form.py. From 1a4a366ad3077113c5056b9b4a436cb500fede52 Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Thu, 15 Oct 2020 19:29:44 -0700 Subject: [PATCH 2/3] set page number on ContactNames --- .../ai/formrecognizer/_form_recognizer_client.py | 12 ++++++++---- .../azure/ai/formrecognizer/_models.py | 15 ++++++++++----- .../azure/ai/formrecognizer/_response_handlers.py | 4 ++-- .../aio/_form_recognizer_client_async.py | 12 ++++++++---- .../tests/test_business_card.py | 6 ++++-- .../tests/test_business_card_async.py | 6 ++++-- .../tests/test_business_card_from_url.py | 6 ++++-- .../tests/test_business_card_from_url_async.py | 6 ++++-- 8 files changed, 44 insertions(+), 23 deletions(-) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py index 959c2fda7dc1..540b793642e2 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_form_recognizer_client.py @@ -64,9 +64,9 @@ class FormRecognizerClient(FormRecognizerClientBase): :caption: Creating the FormRecognizerClient with a token credential. """ - def _prebuilt_callback(self, raw_response, _, headers): # pylint: disable=unused-argument + def _prebuilt_callback(self, raw_response, _, headers, **kwargs): # pylint: disable=unused-argument analyze_result = self._deserialize(self._generated_models.AnalyzeOperationResult, raw_response) - return prepare_prebuilt_models(analyze_result) + return prepare_prebuilt_models(analyze_result, **kwargs) @distributed_trace def begin_recognize_receipts(self, receipt, **kwargs): @@ -232,7 +232,9 @@ def begin_recognize_business_cards( file_stream=business_card, content_type=content_type, include_text_details=include_field_elements, - cls=kwargs.pop("cls", self._prebuilt_callback), + cls=kwargs.pop("cls", lambda pipeline_response, _, response_headers: self._prebuilt_callback( + pipeline_response, _, response_headers, business_card=True + )), polling=True, **kwargs ) @@ -277,7 +279,9 @@ def begin_recognize_business_cards_from_url( return self._client.begin_analyze_business_card_async( # type: ignore file_stream={"source": business_card_url}, include_text_details=include_field_elements, - cls=kwargs.pop("cls", self._prebuilt_callback), + cls=kwargs.pop("cls", lambda pipeline_response, _, response_headers: self._prebuilt_callback( + pipeline_response, _, response_headers, business_card=True + )), polling=True, **kwargs ) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py index f68228b70c19..a7a05170b5bb 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py @@ -35,7 +35,7 @@ def resolve_element(element, read_result): raise ValueError("Failed to parse element reference.") -def get_field_value(field, value, read_result): # pylint: disable=too-many-return-statements +def get_field_value(field, value, read_result, **kwargs): # pylint: disable=too-many-return-statements if value is None: return value if value.type == "string": @@ -51,13 +51,18 @@ def get_field_value(field, value, read_result): # pylint: disable=too-many-retu if value.type == "time": return value.value_time if value.type == "array": + # business cards pre-built model doesn't return a page number for the `ContactNames` field + if "business_card" in kwargs and field == "ContactNames": + for val in value.value_array: + page_number = val.value_object["FirstName"].page + val.page = page_number return [ - FormField._from_generated(field, value, read_result) + FormField._from_generated(field, value, read_result, **kwargs) for value in value.value_array ] if value.type == "object": return { - key: FormField._from_generated(key, value, read_result) + key: FormField._from_generated(key, value, read_result, **kwargs) for key, value in value.value_object.items() } return None @@ -238,12 +243,12 @@ def __init__(self, **kwargs): self.confidence = kwargs.get("confidence", None) @classmethod - def _from_generated(cls, field, value, read_result): + def _from_generated(cls, field, value, read_result, **kwargs): return cls( value_type=adjust_value_type(value.type) if value else None, label_data=None, # not returned with receipt/supervised value_data=FieldData._from_generated(value, read_result), - value=get_field_value(field, value, read_result), + value=get_field_value(field, value, read_result, **kwargs), name=field, confidence=adjust_confidence(value.confidence) if value else None, ) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_response_handlers.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_response_handlers.py index da9b8140ac12..5bbb9ed0cd4c 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_response_handlers.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_response_handlers.py @@ -18,7 +18,7 @@ ) -def prepare_prebuilt_models(response): +def prepare_prebuilt_models(response, **kwargs): prebuilt_models = [] read_result = response.analyze_result.read_results document_result = response.analyze_result.document_results @@ -32,7 +32,7 @@ def prepare_prebuilt_models(response): pages=form_page[page.page_range[0]-1:page.page_range[1]], form_type=page.doc_type, fields={ - key: FormField._from_generated(key, value, read_result) + key: FormField._from_generated(key, value, read_result, **kwargs) for key, value in page.fields.items() } if page.fields else None, form_type_confidence=page.doc_type_confidence, diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py index e31d5dbde8a4..6ff00141c8ce 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_form_recognizer_client_async.py @@ -61,9 +61,9 @@ class FormRecognizerClient(FormRecognizerClientBaseAsync): :caption: Creating the FormRecognizerClient with a token credential. """ - def _prebuilt_callback(self, raw_response, _, headers): # pylint: disable=unused-argument + def _prebuilt_callback(self, raw_response, _, headers, **kwargs): # pylint: disable=unused-argument analyze_result = self._deserialize(self._generated_models.AnalyzeOperationResult, raw_response) - return prepare_prebuilt_models(analyze_result) + return prepare_prebuilt_models(analyze_result, **kwargs) @distributed_trace_async async def begin_recognize_receipts( @@ -235,7 +235,9 @@ async def begin_recognize_business_cards( file_stream=business_card, content_type=content_type, include_text_details=include_field_elements, - cls=kwargs.pop("cls", self._prebuilt_callback), + cls=kwargs.pop("cls", lambda pipeline_response, _, response_headers: self._prebuilt_callback( + pipeline_response, _, response_headers, business_card=True + )), polling=True, **kwargs ) @@ -278,7 +280,9 @@ async def begin_recognize_business_cards_from_url( return await self._client.begin_analyze_business_card_async( # type: ignore file_stream={"source": business_card_url}, include_text_details=include_field_elements, - cls=kwargs.pop("cls", self._prebuilt_callback), + cls=kwargs.pop("cls", lambda pipeline_response, _, response_headers: self._prebuilt_callback( + pipeline_response, _, response_headers, business_card=True + )), polling=True, **kwargs ) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card.py index eb151b0fde2b..bf32204a4889 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card.py @@ -242,6 +242,7 @@ def test_business_card_jpg(self, client): business_card = result[0] # check dict values self.assertEqual(len(business_card.fields.get("ContactNames").value), 1) + self.assertEqual(business_card.fields.get("ContactNames").value[0].value_data.page_number, 1) self.assertEqual(business_card.fields.get("ContactNames").value[0].value['FirstName'].value, 'Avery') self.assertEqual(business_card.fields.get("ContactNames").value[0].value['LastName'].value, 'Smith') @@ -285,6 +286,7 @@ def test_business_card_png(self, client): business_card = result[0] # check dict values self.assertEqual(len(business_card.fields.get("ContactNames").value), 1) + self.assertEqual(business_card.fields.get("ContactNames").value[0].value_data.page_number, 1) self.assertEqual(business_card.fields.get("ContactNames").value[0].value['FirstName'].value, 'Avery') self.assertEqual(business_card.fields.get("ContactNames").value[0].value['LastName'].value, 'Smith') @@ -330,8 +332,8 @@ def test_business_card_jpg_include_field_elements(self, client): self.assertFormPagesHasValues(business_card.pages) for name, field in business_card.fields.items(): - if field.value_type not in ["list", "dictionary"]: - self.assertFieldElementsHasValues(field.value_data.field_elements, receipt.page_range.first_page_number) + for f in field.value: + self.assertFieldElementsHasValues(f.value_data.field_elements, business_card.page_range.first_page_number) @GlobalFormRecognizerAccountPreparer() @GlobalClientPreparer() diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_async.py index e2fdab1e34eb..04fcb17f29ac 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_async.py @@ -258,6 +258,7 @@ async def test_business_card_jpg(self, client): business_card = result[0] # check dict values self.assertEqual(len(business_card.fields.get("ContactNames").value), 1) + self.assertEqual(business_card.fields.get("ContactNames").value[0].value_data.page_number, 1) self.assertEqual(business_card.fields.get("ContactNames").value[0].value['FirstName'].value, 'Avery') self.assertEqual(business_card.fields.get("ContactNames").value[0].value['LastName'].value, 'Smith') @@ -304,6 +305,7 @@ async def test_business_card_png(self, client): business_card = result[0] # check dict values self.assertEqual(len(business_card.fields.get("ContactNames").value), 1) + self.assertEqual(business_card.fields.get("ContactNames").value[0].value_data.page_number, 1) self.assertEqual(business_card.fields.get("ContactNames").value[0].value['FirstName'].value, 'Avery') self.assertEqual(business_card.fields.get("ContactNames").value[0].value['LastName'].value, 'Smith') @@ -349,8 +351,8 @@ async def test_business_card_jpg_include_field_elements(self, client): self.assertFormPagesHasValues(business_card.pages) for name, field in business_card.fields.items(): - if field.value_type not in ["list", "dictionary"]: - self.assertFieldElementsHasValues(field.value_data.field_elements, receipt.page_range.first_page_number) + for f in field.value: + self.assertFieldElementsHasValues(f.value_data.field_elements, business_card.page_range.first_page_number) @GlobalFormRecognizerAccountPreparer() @GlobalClientPreparer() diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_from_url.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_from_url.py index 5761dad143b9..b0f0c52715d4 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_from_url.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_from_url.py @@ -172,6 +172,7 @@ def test_business_card_jpg(self, client): business_card = result[0] # check dict values self.assertEqual(len(business_card.fields.get("ContactNames").value), 1) + self.assertEqual(business_card.fields.get("ContactNames").value[0].value_data.page_number, 1) self.assertEqual(business_card.fields.get("ContactNames").value[0].value['FirstName'].value, 'Avery') self.assertEqual(business_card.fields.get("ContactNames").value[0].value['LastName'].value, 'Smith') @@ -213,6 +214,7 @@ def test_business_card_png(self, client): business_card = result[0] # check dict values self.assertEqual(len(business_card.fields.get("ContactNames").value), 1) + self.assertEqual(business_card.fields.get("ContactNames").value[0].value_data.page_number, 1) self.assertEqual(business_card.fields.get("ContactNames").value[0].value['FirstName'].value, 'Avery') self.assertEqual(business_card.fields.get("ContactNames").value[0].value['LastName'].value, 'Smith') @@ -256,8 +258,8 @@ def test_business_card_jpg_include_field_elements(self, client): self.assertFormPagesHasValues(business_card.pages) for name, field in business_card.fields.items(): - if field.value_type not in ["list", "dictionary"]: - self.assertFieldElementsHasValues(field.value_data.field_elements, receipt.page_range.first_page_number) + for f in field.value: + self.assertFieldElementsHasValues(f.value_data.field_elements, business_card.page_range.first_page_number) @GlobalFormRecognizerAccountPreparer() @GlobalClientPreparer() diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_from_url_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_from_url_async.py index bca47effc513..11e1df4d382b 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_from_url_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/tests/test_business_card_from_url_async.py @@ -184,6 +184,7 @@ async def test_business_card_jpg(self, client): business_card = result[0] # check dict values self.assertEqual(len(business_card.fields.get("ContactNames").value), 1) + self.assertEqual(business_card.fields.get("ContactNames").value[0].value_data.page_number, 1) self.assertEqual(business_card.fields.get("ContactNames").value[0].value['FirstName'].value, 'Avery') self.assertEqual(business_card.fields.get("ContactNames").value[0].value['LastName'].value, 'Smith') @@ -226,6 +227,7 @@ async def test_business_card_png(self, client): business_card = result[0] # check dict values self.assertEqual(len(business_card.fields.get("ContactNames").value), 1) + self.assertEqual(business_card.fields.get("ContactNames").value[0].value_data.page_number, 1) self.assertEqual(business_card.fields.get("ContactNames").value[0].value['FirstName'].value, 'Avery') self.assertEqual(business_card.fields.get("ContactNames").value[0].value['LastName'].value, 'Smith') @@ -270,8 +272,8 @@ async def test_business_card_jpg_include_field_elements(self, client): self.assertFormPagesHasValues(business_card.pages) for name, field in business_card.fields.items(): - if field.value_type not in ["list", "dictionary"]: - self.assertFieldElementsHasValues(field.value_data.field_elements, receipt.page_range.first_page_number) + for f in field.value: + self.assertFieldElementsHasValues(f.value_data.field_elements, business_card.page_range.first_page_number) @GlobalFormRecognizerAccountPreparer() @GlobalClientPreparer() From 24ee5fa8ab5ba7afd05199d03f8551b1cbc6b7ee Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Thu, 15 Oct 2020 19:39:17 -0700 Subject: [PATCH 3/3] move to helper function --- .../azure/ai/formrecognizer/_helpers.py | 12 ++++++++++++ .../azure/ai/formrecognizer/_models.py | 5 ++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_helpers.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_helpers.py index 4a2236d3a5fd..32500f619639 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_helpers.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_helpers.py @@ -74,6 +74,18 @@ def adjust_text_angle(text_angle): return text_angle +def adjust_page_number(value): + """Adjusts the page number on the business card field + `ContactNames` to be set to the page number value found on `FirstName` + """ + for val in value.value_array: + if val.value_object.get("FirstName", None) and val.value_object.get("LastName", None): + if val.value_object["FirstName"].page == val.value_object["LastName"].page: + page_number = val.value_object["FirstName"].page + val.page = page_number + return value + + def get_authentication_policy(credential): authentication_policy = None if credential is None: diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py index a7a05170b5bb..0e8e293521f4 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py @@ -12,6 +12,7 @@ adjust_value_type, adjust_text_angle, adjust_confidence, + adjust_page_number, get_element ) @@ -53,9 +54,7 @@ def get_field_value(field, value, read_result, **kwargs): # pylint: disable=too if value.type == "array": # business cards pre-built model doesn't return a page number for the `ContactNames` field if "business_card" in kwargs and field == "ContactNames": - for val in value.value_array: - page_number = val.value_object["FirstName"].page - val.page = page_number + value = adjust_page_number(value) return [ FormField._from_generated(field, value, read_result, **kwargs) for value in value.value_array