From 4ff9bb5db2ac433caa1b754137008ba2aa41987d Mon Sep 17 00:00:00 2001 From: Abby Hartman Date: Thu, 4 Feb 2021 10:16:25 -0800 Subject: [PATCH 1/6] Exposed the length property in classes impacted by exposing --- .../azure/ai/textanalytics/_models.py | 112 ++++++++++++++---- 1 file changed, 86 insertions(+), 26 deletions(-) diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py index 9fc9097f3119..cef8ffe0e4ee 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py @@ -289,8 +289,13 @@ class CategorizedEntity(DictMixin): :vartype category: str :ivar subcategory: Entity subcategory, such as Age/Year/TimeRange etc :vartype subcategory: str + :ivar int length: The entity text length. This value depends on the value of the + `string_index_type` parameter set in the original request, which is UnicodeCodePoints + by default. Only returned for API versions v3.1-preview and up. :ivar int offset: The entity text offset from the start of the document. - Returned in unicode code points. Only returned for API versions v3.1-preview and up. + The value depends on the value of the `string_index_type` parameter + set in the original request, which is UnicodeCodePoints by default. Only returned for + API versions v3.1-preview and up. :ivar confidence_score: Confidence score between 0 and 1 of the extracted entity. :vartype confidence_score: float @@ -302,30 +307,35 @@ def __init__(self, **kwargs): self.text = kwargs.get('text', None) self.category = kwargs.get('category', None) self.subcategory = kwargs.get('subcategory', None) + self.length = kwargs.get('length', None) self.offset = kwargs.get('offset', None) self.confidence_score = kwargs.get('confidence_score', None) @classmethod def _from_generated(cls, entity): offset = entity.offset + length = entity.length if isinstance(entity, _v3_0_models.Entity): # we do not return offset for v3.0 since # the correct encoding was not introduced for v3.0 offset = None + length = None return cls( text=entity.text, category=entity.category, subcategory=entity.subcategory, + length=length, offset=offset, confidence_score=entity.confidence_score, ) def __repr__(self): return "CategorizedEntity(text={}, category={}, subcategory={}, "\ - "offset={}, confidence_score={})".format( + "length={}, offset={}, confidence_score={})".format( self.text, self.category, self.subcategory, + self.length, self.offset, self.confidence_score )[:1024] @@ -340,8 +350,12 @@ class PiiEntity(DictMixin): Identification/Social Security Number/Phone Number, etc. :ivar str subcategory: Entity subcategory, such as Credit Card/EU Phone number/ABA Routing Numbers, etc. + :ivar int length: The PII entity text length. This value depends on the value + of the `string_index_type` parameter specified in the original request, which + is UnicodeCodePoints by default. :ivar int offset: The PII entity text offset from the start of the document. - Returned in unicode code points. + This value depends on the value of the `string_index_type` parameter specified + in the original request, which is UnicodeCodePoints by default. :ivar float confidence_score: Confidence score between 0 and 1 of the extracted entity. """ @@ -350,6 +364,7 @@ def __init__(self, **kwargs): self.text = kwargs.get('text', None) self.category = kwargs.get('category', None) self.subcategory = kwargs.get('subcategory', None) + self.length = kwargs.get('length', None) self.offset = kwargs.get('offset', None) self.confidence_score = kwargs.get('confidence_score', None) @@ -359,17 +374,19 @@ def _from_generated(cls, entity): text=entity.text, category=entity.category, subcategory=entity.subcategory, + length=entity.length, offset=entity.offset, confidence_score=entity.confidence_score, ) def __repr__(self): return ( - "PiiEntity(text={}, category={}, subcategory={}, offset={}, "\ - "confidence_score={})".format( + "PiiEntity(text={}, category={}, subcategory={}, length={}, "\ + "offset={}, confidence_score={})".format( self.text, self.category, self.subcategory, + self.length, self.offset, self.confidence_score )[:1024] @@ -379,20 +396,26 @@ def __repr__(self): class HealthcareEntity(DictMixin): """HealthcareEntity contains information about a Healthcare entity found in text. - :ivar str text: Entity text as appears in the request. - :ivar str category: Entity category, such as Dosage or MedicationName, etc. - :ivar str subcategory: Entity subcategory. # TODO: add subcategory examples - :ivar int offset: The Healthcare entity text offset from the start of the document. - :ivar float confidence_score: Confidence score between 0 and 1 of the extracted + :ivar str text: Entity text as appears in the request. + :ivar str category: Entity category, such as Dosage or MedicationName, etc. + :ivar str subcategory: Entity subcategory. # TODO: add subcategory examples + :ivar int length: The entity text length. This value depends on the value + of the `string_index_type` parameter specified in the original request, which is + UnicodeCodePoints by default. + :ivar int offset: The entity text offset from the start of the document. + This value depends on the value of the `string_index_type` parameter specified + in the original request, which is UnicodeCodePoints by default. + :ivar float confidence_score: Confidence score between 0 and 1 of the extracted entity. - :ivar links: A collection of entity references in known data sources. - :vartype links: list[~azure.ai.textanalytics.HealthcareEntityLink] + :ivar links: A collection of entity references in known data sources. + :vartype links: list[~azure.ai.textanalytics.HealthcareEntityLink] """ def __init__(self, **kwargs): self.text = kwargs.get("text", None) self.category = kwargs.get("category", None) self.subcategory = kwargs.get("subcategory", None) + self.length = kwargs.get("length", None) self.offset = kwargs.get("offset", None) self.confidence_score = kwargs.get("confidence_score", None) self.links = kwargs.get("links", []) @@ -403,6 +426,7 @@ def _from_generated(cls, healthcare_entity): text=healthcare_entity.text, category=healthcare_entity.category, subcategory=healthcare_entity.subcategory, + length=healthcare_entity.length, offset=healthcare_entity.offset, confidence_score=healthcare_entity.confidence_score, links=[ @@ -411,11 +435,12 @@ def _from_generated(cls, healthcare_entity): ) def __repr__(self): - return "HealthcareEntity(text={}, category={}, subcategory={}, offset={}, confidence_score={},\ - links={})".format( + return "HealthcareEntity(text={}, category={}, subcategory={}, length={}, offset={}, \ + confidence_score={}, links={})".format( self.text, self.category, self.subcategory, + self.length, self.offset, self.confidence_score, repr(self.links) @@ -835,8 +860,13 @@ class LinkedEntityMatch(DictMixin): returned. :vartype confidence_score: float :ivar text: Entity text as appears in the request. + :ivar int length: The linked entity match text length. This value depends on the value of the + `string_index_type` parameter set in the original request, which is UnicodeCodePoints by default. + Only returned for API versions v3.1-preview and up. :ivar int offset: The linked entity match text offset from the start of the document. - Returned in unicode code points. Only returned for API versions v3.1-preview and up. + The value depends on the value of the `string_index_type` parameter + set in the original request, which is UnicodeCodePoints by default. + Only returned for API versions v3.1-preview and up. :vartype text: str .. versionadded:: v3.1-preview The *offset* property. @@ -845,24 +875,28 @@ class LinkedEntityMatch(DictMixin): def __init__(self, **kwargs): self.confidence_score = kwargs.get("confidence_score", None) self.text = kwargs.get("text", None) + self.length = kwargs.get("length", None) self.offset = kwargs.get("offset", None) @classmethod def _from_generated(cls, match): offset = match.offset + length = match.length if isinstance(match, _v3_0_models.Match): # we do not return offset for v3.0 since # the correct encoding was not introduced for v3.0 offset = None + length = None return cls( confidence_score=match.confidence_score, text=match.text, + length=length, offset=offset, ) def __repr__(self): - return "LinkedEntityMatch(confidence_score={}, text={}, offset={})".format( - self.confidence_score, self.text, self.offset + return "LinkedEntityMatch(confidence_score={}, text={}, length={}, offset={})".format( + self.confidence_score, self.text, self.length, self.offset )[:1024] @@ -943,8 +977,13 @@ class SentenceSentiment(DictMixin): and 1 for the sentence for all labels. :vartype confidence_scores: ~azure.ai.textanalytics.SentimentConfidenceScores - :ivar int offset: The sentence offset from the start of the document. Returned - in unicode code points. Only returned for API versions v3.1-preview and up. + :ivar int length: The sentence text length. This value depends on the value of the + `string_index_type` parameter set in the original request, which is UnicodeCodePoints + by default. Only returned for API versions v3.1-preview and up. + :ivar int offset: The sentence text offset from the start of the document. + The value depends on the value of the `string_index_type` parameter + set in the original request, which is UnicodeCodePoints by default. Only returned for + API versions v3.1-preview and up. :ivar mined_opinions: The list of opinions mined from this sentence. For example in the sentence "The food is good, but the service is bad", we would mine the two opinions "food is good" and "service is bad". Only returned @@ -960,16 +999,19 @@ def __init__(self, **kwargs): self.text = kwargs.get("text", None) self.sentiment = kwargs.get("sentiment", None) self.confidence_scores = kwargs.get("confidence_scores", None) + self.length = kwargs.get("length", None) self.offset = kwargs.get("offset", None) self.mined_opinions = kwargs.get("mined_opinions", None) @classmethod def _from_generated(cls, sentence, results, sentiment): offset = sentence.offset + length = sentence.length if isinstance(sentence, _v3_0_models.SentenceSentiment): # we do not return offset for v3.0 since # the correct encoding was not introduced for v3.0 offset = None + length = None if hasattr(sentence, "aspects"): mined_opinions = ( [MinedOpinion._from_generated(aspect, results, sentiment) for aspect in sentence.aspects] # pylint: disable=protected-access @@ -981,16 +1023,18 @@ def _from_generated(cls, sentence, results, sentiment): text=sentence.text, sentiment=sentence.sentiment, confidence_scores=SentimentConfidenceScores._from_generated(sentence.confidence_scores), # pylint: disable=protected-access + length=length, offset=offset, mined_opinions=mined_opinions ) def __repr__(self): return "SentenceSentiment(text={}, sentiment={}, confidence_scores={}, "\ - "offset={}, mined_opinions={})".format( + "length={}, offset={}, mined_opinions={})".format( self.text, self.sentiment, repr(self.confidence_scores), + self.length, self.offset, repr(self.mined_opinions) )[:1024] @@ -1057,14 +1101,19 @@ class AspectSentiment(DictMixin): for 'neutral' will always be 0 :vartype confidence_scores: ~azure.ai.textanalytics.SentimentConfidenceScores - :ivar int offset: The aspect offset from the start of the document. Returned - in unicode code points. + :ivar int length: The aspect text length. This value depends on the value of the + `string_index_type` parameter set in the original request, which is UnicodeCodePoints + by default. + :ivar int offset: The aspect text offset from the start of the document. + The value depends on the value of the `string_index_type` parameter + set in the original request, which is UnicodeCodePoints by default. """ def __init__(self, **kwargs): self.text = kwargs.get("text", None) self.sentiment = kwargs.get("sentiment", None) self.confidence_scores = kwargs.get("confidence_scores", None) + self.length = kwargs.get("length", None) self.offset = kwargs.get("offset", None) @classmethod @@ -1073,14 +1122,17 @@ def _from_generated(cls, aspect): text=aspect.text, sentiment=aspect.sentiment, confidence_scores=SentimentConfidenceScores._from_generated(aspect.confidence_scores), # pylint: disable=protected-access + length=aspect.length, offset=aspect.offset, ) def __repr__(self): - return "AspectSentiment(text={}, sentiment={}, confidence_scores={}, offset={})".format( + return "AspectSentiment(text={}, sentiment={}, confidence_scores={}, \ + length={}, offset={})".format( self.text, self.sentiment, repr(self.confidence_scores), + self.length, self.offset, )[:1024] @@ -1099,8 +1151,12 @@ class OpinionSentiment(DictMixin): for 'neutral' will always be 0 :vartype confidence_scores: ~azure.ai.textanalytics.SentimentConfidenceScores - :ivar int offset: The opinion offset from the start of the document. Returned - in unicode code points. + :ivar int length: The opinion text length. This value depends on the value of the + `string_index_type` parameter set in the original request, which is UnicodeCodePoints + by default. + :ivar int offset: The opinion text offset from the start of the document. + The value depends on the value of the `string_index_type` parameter + set in the original request, which is UnicodeCodePoints by default. :ivar bool is_negated: Whether the opinion is negated. For example, in "The food is not good", the opinion "good" is negated. """ @@ -1109,6 +1165,7 @@ def __init__(self, **kwargs): self.text = kwargs.get("text", None) self.sentiment = kwargs.get("sentiment", None) self.confidence_scores = kwargs.get("confidence_scores", None) + self.length = kwargs.get("length", None) self.offset = kwargs.get("offset", None) self.is_negated = kwargs.get("is_negated", None) @@ -1118,16 +1175,19 @@ def _from_generated(cls, opinion): text=opinion.text, sentiment=opinion.sentiment, confidence_scores=SentimentConfidenceScores._from_generated(opinion.confidence_scores), # pylint: disable=protected-access + length=opinion.length, offset=opinion.offset, is_negated=opinion.is_negated ) def __repr__(self): return ( - "OpinionSentiment(text={}, sentiment={}, confidence_scores={}, offset={}, is_negated={})".format( + "OpinionSentiment(text={}, sentiment={}, confidence_scores={}, length={}, offset={}, \ + is_negated={})".format( self.text, self.sentiment, repr(self.confidence_scores), + self.length, self.offset, self.is_negated )[:1024] From ecb5aa77cea06e6fd38f26a46f0ebde34c25c148 Mon Sep 17 00:00:00 2001 From: Abby Hartman Date: Thu, 4 Feb 2021 10:45:11 -0800 Subject: [PATCH 2/6] Updated test_repr.py --- .../azure-ai-textanalytics/tests/test_repr.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py b/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py index b25648e1c0ba..3a675e9ef93e 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py +++ b/sdk/textanalytics/azure-ai-textanalytics/tests/test_repr.py @@ -69,12 +69,13 @@ def categorized_entity(): text="Bill Gates", category="Person", subcategory="Age", + length=10, offset=0, confidence_score=0.899 ) model_repr = ( "CategorizedEntity(text=Bill Gates, category=Person, subcategory=Age, " - "offset=0, confidence_score=0.899)" + "length=10, offset=0, confidence_score=0.899)" ) assert repr(model) == model_repr return model, model_repr @@ -86,10 +87,11 @@ def pii_entity(): text="859-98-0987", category="SSN", subcategory=None, + length=11, offset=0, confidence_score=0.899 ) - model_repr = "PiiEntity(text=859-98-0987, category=SSN, subcategory=None, offset=0, confidence_score=0.899)" + model_repr = "PiiEntity(text=859-98-0987, category=SSN, subcategory=None, length=11, offset=0, confidence_score=0.899)" assert repr(model) == model_repr return model, model_repr @@ -100,8 +102,9 @@ def linked_entity_match(): confidence_score=0.999, text="Bill Gates", offset=0, + length=10 ) - model_repr = "LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, offset=0)" + model_repr = "LinkedEntityMatch(confidence_score=0.999, text=Bill Gates, length=10, offset=0)" assert repr(model) == model_repr return model, model_repr @@ -155,9 +158,10 @@ def aspect_sentiment(aspect_opinion_confidence_score): text="aspect", sentiment="positive", confidence_scores=aspect_opinion_confidence_score[0], + length=6, offset=10, ) - model_repr = "AspectSentiment(text=aspect, sentiment=positive, confidence_scores={}, offset=10)".format( + model_repr = "AspectSentiment(text=aspect, sentiment=positive, confidence_scores={}, length=6, offset=10)".format( aspect_opinion_confidence_score[1] ) assert repr(model) == model_repr @@ -169,10 +173,11 @@ def opinion_sentiment(aspect_opinion_confidence_score): text="opinion", sentiment="positive", confidence_scores=aspect_opinion_confidence_score[0], + length=7, offset=3, is_negated=False ) - model_repr = "OpinionSentiment(text=opinion, sentiment=positive, confidence_scores={}, offset=3, is_negated=False)".format( + model_repr = "OpinionSentiment(text=opinion, sentiment=positive, confidence_scores={}, length=7, offset=3, is_negated=False)".format( aspect_opinion_confidence_score[1] ) assert repr(model) == model_repr @@ -194,12 +199,13 @@ def sentence_sentiment(sentiment_confidence_scores, mined_opinion): text="This is a sentence.", sentiment="neutral", confidence_scores=sentiment_confidence_scores[0], + length=19, offset=0, mined_opinions=[mined_opinion[0]] ) model_repr = ( "SentenceSentiment(text=This is a sentence., sentiment=neutral, confidence_scores={}, "\ - "offset=0, mined_opinions=[{}])".format( + "length=19, offset=0, mined_opinions=[{}])".format( sentiment_confidence_scores[1], mined_opinion[1] ) ) From 29d698a115152b370e6b60919eefe371eb12bac8 Mon Sep 17 00:00:00 2001 From: Abby Hartman Date: Thu, 4 Feb 2021 11:48:33 -0800 Subject: [PATCH 3/6] Fixed sentiment-related __repr__ method outputs --- .../azure/ai/textanalytics/_models.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py index cef8ffe0e4ee..1cca6f5510c8 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py +++ b/sdk/textanalytics/azure-ai-textanalytics/azure/ai/textanalytics/_models.py @@ -435,8 +435,8 @@ def _from_generated(cls, healthcare_entity): ) def __repr__(self): - return "HealthcareEntity(text={}, category={}, subcategory={}, length={}, offset={}, \ - confidence_score={}, links={})".format( + return "HealthcareEntity(text={}, category={}, subcategory={}, length={}, offset={}, "\ + "confidence_score={}, links={})".format( self.text, self.category, self.subcategory, @@ -1127,8 +1127,8 @@ def _from_generated(cls, aspect): ) def __repr__(self): - return "AspectSentiment(text={}, sentiment={}, confidence_scores={}, \ - length={}, offset={})".format( + return "AspectSentiment(text={}, sentiment={}, confidence_scores={}, "\ + "length={}, offset={})".format( self.text, self.sentiment, repr(self.confidence_scores), @@ -1182,8 +1182,8 @@ def _from_generated(cls, opinion): def __repr__(self): return ( - "OpinionSentiment(text={}, sentiment={}, confidence_scores={}, length={}, offset={}, \ - is_negated={})".format( + "OpinionSentiment(text={}, sentiment={}, confidence_scores={}, length={}, offset={}, "\ + "is_negated={})".format( self.text, self.sentiment, repr(self.confidence_scores), From 001942aa283e1496801724cfd49e6c24c4496a7e Mon Sep 17 00:00:00 2001 From: Abby Hartman Date: Thu, 4 Feb 2021 14:27:57 -0800 Subject: [PATCH 4/6] Updated samples and changelog --- sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md | 4 +++- .../azure-ai-textanalytics/samples/sample_analyze.py | 7 +++++-- .../samples/sample_analyze_healthcare.py | 1 + 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md index 54d8067955de..ec47b71e09db 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md +++ b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md @@ -4,7 +4,9 @@ **New Features** - No longer need to specify `api_version=TextAnalyticsApiVersion.V3_1_PREVIEW_3` when calling `begin_analyze` and `begin_analyze_healthcare`. `begin_analyze_healthcare` is still in gated preview though. - +- Added a new parameter `string_index_type` to the service client methods `begin_analyze_healthcare`, `analyze_sentiment`, `recognize_entities`, `recognize_pii_entities`, and `recognize_linked_entities`. +- Added property `length` from `CategorizedEntity`, `SentenceSentiment`, `LinkedEntityMatch`, `AspectSentiment`, `OpinionSentiment`, `PiiEntity` and +`HealthcareEntity`. ## 5.1.0b4 (2021-01-12) diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze.py index 7b487e8bd4fc..d953ddd0f1c1 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze.py @@ -74,6 +74,7 @@ def analyze(self): print("Entity: {}".format(entity.text)) print("...Category: {}".format(entity.category)) print("...Confidence Score: {}".format(entity.confidence_score)) + print("...Length: {}".format(entity.length)) print("...Offset: {}".format(entity.offset)) print("------------------------------------------") @@ -85,8 +86,10 @@ def analyze(self): print("Document text: {}".format(documents[idx])) for entity in doc.entities: print("Entity: {}".format(entity.text)) - print("Category: {}".format(entity.category)) - print("Confidence Score: {}\n".format(entity.confidence_score)) + print("...Category: {}".format(entity.category)) + print("...Confidence Score: {}\n".format(entity.confidence_score)) + print("...Length: {}".format(entity.length)) + print("...Offset: {}".format(entity.offset)) print("------------------------------------------") for task in page.key_phrase_extraction_results: diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare.py index 6b73431d86e5..81e255a61d84 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare.py @@ -56,6 +56,7 @@ def analyze_healthcare(self): print("Entity: {}".format(entity.text)) print("...Category: {}".format(entity.category)) print("...Subcategory: {}".format(entity.subcategory)) + print("...Length: {}".format(entity.length)) print("...Offset: {}".format(entity.offset)) print("...Confidence score: {}".format(entity.confidence_score)) if entity.links is not None: From cd5bc4cdc947ea15eea2541d0c1bc3e82a0acd76 Mon Sep 17 00:00:00 2001 From: iscai-msft Date: Fri, 5 Feb 2021 12:33:24 -0500 Subject: [PATCH 5/6] remove length from samples --- .../samples/sample_analyze.py | 109 ------------------ .../samples/sample_analyze_healthcare.py | 1 - 2 files changed, 110 deletions(-) delete mode 100644 sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze.py diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze.py deleted file mode 100644 index d953ddd0f1c1..000000000000 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze.py +++ /dev/null @@ -1,109 +0,0 @@ -# coding: utf-8 - -# ------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for -# license information. -# -------------------------------------------------------------------------- - -""" -FILE: sample_analyze_text.py - -DESCRIPTION: - This sample demonstrates how to submit a collection of text documents for analysis, which consists of a variety - of text analysis tasks, such as Entity Recognition, PII Entity Recognition, Entity Linking, Sentiment Analysis, - or Key Phrase Extraction. The response will contain results from each of the individual tasks specified in the request. - -USAGE: - python sample_analyze_text.py - - Set the environment variables with your own values before running the sample: - 1) AZURE_TEXT_ANALYTICS_ENDPOINT - the endpoint to your Cognitive Services resource. - 2) AZURE_TEXT_ANALYTICS_KEY - your Text Analytics subscription key -""" - - -import os - - -class AnalyzeSample(object): - - def analyze(self): - # [START analyze] - from azure.core.credentials import AzureKeyCredential - from azure.ai.textanalytics import TextAnalyticsClient, \ - EntitiesRecognitionTask, \ - PiiEntitiesRecognitionTask, \ - KeyPhraseExtractionTask - - endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"] - key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] - - text_analytics_client = TextAnalyticsClient( - endpoint=endpoint, - credential=AzureKeyCredential(key), - ) - - documents = [ - "We went to Contoso Steakhouse located at midtown NYC last week for a dinner party, and we adore the spot! \ - They provide marvelous food and they have a great menu. The chief cook happens to be the owner (I think his name is John Doe) \ - and he is super nice, coming out of the kitchen and greeted us all. We enjoyed very much dining in the place! \ - The Sirloin steak I ordered was tender and juicy, and the place was impeccably clean. You can even pre-order from their \ - online menu at www.contososteakhouse.com, call 312-555-0176 or send email to order@contososteakhouse.com! \ - The only complaint I have is the food didn't come fast enough. Overall I highly recommend it!" - ] - - poller = text_analytics_client.begin_analyze( - documents, - display_name="Sample Text Analysis", - entities_recognition_tasks=[EntitiesRecognitionTask()], - pii_entities_recognition_tasks=[PiiEntitiesRecognitionTask()], - key_phrase_extraction_tasks=[KeyPhraseExtractionTask()] - ) - - result = poller.result() - - for page in result: - for task in page.entities_recognition_results: - print("Results of Entities Recognition task:") - - docs = [doc for doc in task.results if not doc.is_error] - for idx, doc in enumerate(docs): - print("\nDocument text: {}".format(documents[idx])) - for entity in doc.entities: - print("Entity: {}".format(entity.text)) - print("...Category: {}".format(entity.category)) - print("...Confidence Score: {}".format(entity.confidence_score)) - print("...Length: {}".format(entity.length)) - print("...Offset: {}".format(entity.offset)) - print("------------------------------------------") - - for task in page.pii_entities_recognition_results: - print("Results of PII Entities Recognition task:") - - docs = [doc for doc in task.results if not doc.is_error] - for idx, doc in enumerate(docs): - print("Document text: {}".format(documents[idx])) - for entity in doc.entities: - print("Entity: {}".format(entity.text)) - print("...Category: {}".format(entity.category)) - print("...Confidence Score: {}\n".format(entity.confidence_score)) - print("...Length: {}".format(entity.length)) - print("...Offset: {}".format(entity.offset)) - print("------------------------------------------") - - for task in page.key_phrase_extraction_results: - print("Results of Key Phrase Extraction task:") - - docs = [doc for doc in task.results if not doc.is_error] - for idx, doc in enumerate(docs): - print("Document text: {}\n".format(documents[idx])) - print("Key Phrases: {}\n".format(doc.key_phrases)) - print("------------------------------------------") - - # [END analyze] - - -if __name__ == "__main__": - sample = AnalyzeSample() - sample.analyze() \ No newline at end of file diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare.py index 81e255a61d84..6b73431d86e5 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_analyze_healthcare.py @@ -56,7 +56,6 @@ def analyze_healthcare(self): print("Entity: {}".format(entity.text)) print("...Category: {}".format(entity.category)) print("...Subcategory: {}".format(entity.subcategory)) - print("...Length: {}".format(entity.length)) print("...Offset: {}".format(entity.offset)) print("...Confidence score: {}".format(entity.confidence_score)) if entity.links is not None: From 2a196bb3eaea63caaafeaf338107c74302991e64 Mon Sep 17 00:00:00 2001 From: iscai-msft Date: Fri, 5 Feb 2021 12:34:02 -0500 Subject: [PATCH 6/6] fix grammer nit --- sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md index 65ff940755b6..0a988192e7a0 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md +++ b/sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md @@ -14,7 +14,7 @@ each action result. **New Features** - No longer need to specify `api_version=TextAnalyticsApiVersion.V3_1_PREVIEW_3` when calling `begin_analyze` and `begin_analyze_healthcare`. `begin_analyze_healthcare` is still in gated preview though. - Added a new parameter `string_index_type` to the service client methods `begin_analyze_healthcare`, `analyze_sentiment`, `recognize_entities`, `recognize_pii_entities`, and `recognize_linked_entities`. -- Added property `length` from `CategorizedEntity`, `SentenceSentiment`, `LinkedEntityMatch`, `AspectSentiment`, `OpinionSentiment`, `PiiEntity` and +- Added property `length` to `CategorizedEntity`, `SentenceSentiment`, `LinkedEntityMatch`, `AspectSentiment`, `OpinionSentiment`, `PiiEntity` and `HealthcareEntity`. ## 5.1.0b4 (2021-01-12)