From 70e35a992d0508bf96d17f2c6ca251680a54e473 Mon Sep 17 00:00:00 2001 From: Mohamed Shaban Date: Tue, 24 Aug 2021 23:47:58 +0200 Subject: [PATCH] improve docs and samples for glossaries and custom models (#18587) * update the readme * update readme file * added custom translation samples * fix 'no-locale' thing in links * update glossary docs * update glossaries * link to sample glossaries instead of writing code in readme * update custom model sample linking * remove relative linking in readme * make subheadings in bold text to be more readable * conform with 'Document Translation' naming * disambiguate container sas url * capitaliz Azure name * remove misplaced period * update samples -> custom model * update async sample -> custom model * remove localization from url * update readme with new file types for glossaries * adding sample glossaries -> xlf * white space * use simplified single input method * update 'job' terminology * update azure-core naming * update glossary blob file reference name * link to supported glossaries table * remove locale from url --- .../azure-ai-translation-document/README.md | 45 ++++++++++ .../samples/assets/glossary_sample.csv | 4 + .../samples/assets/glossary_sample.tsv | 4 + .../samples/assets/glossary_sample.xlf | 23 ++++++ ...ple_translation_with_custom_model_async.py | 82 +++++++++++++++++++ .../sample_translation_with_custom_model.py | 75 +++++++++++++++++ 6 files changed, 233 insertions(+) create mode 100644 sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv create mode 100644 sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv create mode 100644 sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf create mode 100644 sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py create mode 100644 sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py diff --git a/sdk/translation/azure-ai-translation-document/README.md b/sdk/translation/azure-ai-translation-document/README.md index 80dcc5ebec30..d533f60e5585 100644 --- a/sdk/translation/azure-ai-translation-document/README.md +++ b/sdk/translation/azure-ai-translation-document/README.md @@ -323,6 +323,40 @@ To see how to use the Document Translation client library with Azure Storage Blo for your containers, and download the finished translated documents, see this [sample][sample_translation_with_azure_blob]. Note that you will need to install the [azure-storage-blob][azure_storage_blob] library to run this sample. +## Advanced Topics + +The following section provides some insights for some of the advanced translation features such as glossaries and custom translation models. + +### **Glossaries** +Glossaries are domain-specific dictionaries. For example, if you want to translate some medical-related documents, you may need support for the many words, terminology, and idioms in the medical field which you can't find in the standard translation dictionary or you simply need specific translation. This is why Document Translation provides support for glossaries. + +#### **How To Create Glossary File** + +Document Translation supports glossaries in the following formats: + +|**File Type**|**Extension**|**Description**|**Samples**| +|---------------|---------------|---------------|---------------| +|Tab-Separated Values/TAB|.tsv, .tab|Read more on [wikipedia][tsv_files_wikipedia]|[glossary_sample.tsv][sample_tsv_file]| +|Comma-Seperated Values|.csv|Read more on [wikipedia][csv_files_wikipedia]|[glossary_sample.csv][sample_csv_file]| +|Localization Interchange File Format|.xlf, .xliff|Read more on [wikipedia][xlf_files_wikipedia]|[glossary_sample.xlf][sample_xlf_file]| + +View all supported formats [here][supported_glossary_formats]. + +#### **How Use Glossaries in Document Translation** +In order to use glossaries with Document Translation, you first need to upload your glossaries file to some blob container, and then provide the SaS url to of this glossary file to Document Translation as in the code samples [sample_translation_with_glossaries.py][sample_translation_with_glossaries]. + + +### **Custom Translation Models** +Instead of using Document Translation's engine for translation, you can use your own custom Azure machine/deep learning model. + +#### **How To Create a Custom Translation Model** +For more info on how to create, provision, and deploy your own custom Azure translation model, please follow the instructions here: [Build, deploy, and use a custom model for translation][custom_translation_article] + +#### **How To Use a Custom Translation Model With Document Translation** +In order to use a custom translation model with Document Translation, you first +need to create and deploy your model, then follow the code sample [sample_translation_with_custom_model.py][sample_translation_with_custom_model] to use with Document Translation. + + ## Troubleshooting ### General @@ -436,6 +470,17 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con [sample_translation_with_glossaries_async]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_glossaries_async.py [sample_translation_with_azure_blob]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_azure_blob.py [sample_translation_with_azure_blob_async]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_azure_blob_async.py +[sample_translation_with_custom_model]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py +[sample_translation_with_custom_model_async]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py + +[supported_glossary_formats]: https://docs.microsoft.com/azure/cognitive-services/translator/document-translation/overview#supported-glossary-formats +[custom_translation_article]: https://docs.microsoft.com/azure/cognitive-services/translator/custom-translator/quickstart-build-deploy-custom-model +[tsv_files_wikipedia]: https://wikipedia.org/wiki/Tab-separated_values +[xlf_files_wikipedia]: https://wikipedia.org/wiki/XLIFF +[csv_files_wikipedia]: https://wikipedia.org/wiki/Comma-separated_values +[sample_tsv_file]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv +[sample_csv_file]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv +[sample_xlf_file]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf [cla]: https://cla.microsoft.com [code_of_conduct]: https://opensource.microsoft.com/codeofconduct/ diff --git a/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv new file mode 100644 index 000000000000..6883ab5d2d6d --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv @@ -0,0 +1,4 @@ +skull,le crâne +body,corps +heart,cœur +lungs,poumons diff --git a/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv new file mode 100644 index 000000000000..91ba49dd2374 --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv @@ -0,0 +1,4 @@ +skull le crâne +body corps +heart cœur +lungs poumons diff --git a/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf new file mode 100644 index 000000000000..ef8fa9f37bdc --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf @@ -0,0 +1,23 @@ + + + + + + skull + le crâne + + + body + corps + + + heart + cœur + + + lungs + poumons + + + + \ No newline at end of file diff --git a/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py b/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py new file mode 100644 index 000000000000..7456aaff0e72 --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py @@ -0,0 +1,82 @@ +# coding=utf-8 +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +FILE: sample_translation_with_custom_model_async.py + +DESCRIPTION: + This sample demonstrates how to create a translation operation and apply custom azure translation model when doing the translation. + + To set up your containers for translation and generate SAS tokens to your containers (or files) + with the appropriate permissions, see the README. + +USAGE: + python sample_translation_with_custom_model_async.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_DOCUMENT_TRANSLATION_ENDPOINT - the endpoint to your Document Translation resource. + 2) AZURE_DOCUMENT_TRANSLATION_KEY - your Document Translation API key. + 3) AZURE_SOURCE_CONTAINER_URL - the container SAS URL to your source container which has the documents + to be translated. + 4) AZURE_TARGET_CONTAINER_URL - the container SAS URL to your target container where the translated documents + will be written. + 5) AZURE_CUSTOM_MODEL_ID - the URL to your Azure custom translation model. +""" + +import asyncio + + +async def sample_translation_with_custom_model_async(): + import os + from azure.core.credentials import AzureKeyCredential + from azure.ai.translation.document.aio import DocumentTranslationClient + + endpoint = os.environ["AZURE_DOCUMENT_TRANSLATION_ENDPOINT"] + key = os.environ["AZURE_DOCUMENT_TRANSLATION_KEY"] + source_container_url = os.environ["AZURE_SOURCE_CONTAINER_URL"] + target_container_url = os.environ["AZURE_TARGET_CONTAINER_URL"] + custom_model_id = os.environ["AZURE_CUSTOM_MODEL_ID"] + + client = DocumentTranslationClient(endpoint, AzureKeyCredential(key)) + + + + async with client: + poller = await client.begin_translation( + source_container_url, + target_container_url, + "es", + category_id=custom_model_id + ) + result = await poller.result() + + print("Operation status: {}".format(result.status)) + print("Operation created on: {}".format(result.created_on)) + print("Operation last updated on: {}".format(result.last_updated_on)) + print("Total number of translations on documents: {}".format(result.documents_total_count)) + + print("\nOf total documents...") + print("{} failed".format(result.documents_failed_count)) + print("{} succeeded".format(result.documents_succeeded_count)) + + doc_results = client.list_all_document_statuses(result.id) + async for document in doc_results: + print("Document ID: {}".format(document.id)) + print("Document status: {}".format(document.status)) + if document.status == "Succeeded": + print("Source document location: {}".format(document.source_document_url)) + print("Translated document location: {}".format(document.translated_document_url)) + print("Translated to language: {}\n".format(document.translate_to)) + else: + print("Error Code: {}, Message: {}\n".format(document.error.code, document.error.message)) + + +async def main(): + await sample_translation_with_custom_model_async() + +if __name__ == '__main__': + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) diff --git a/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py b/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py new file mode 100644 index 000000000000..cf73444961b2 --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py @@ -0,0 +1,75 @@ +# coding=utf-8 +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +FILE: sample_translation_with_custom_model.py + +DESCRIPTION: + This sample demonstrates how to create a translation operation and apply custom azure translation model when doing the translation. + + To set up your containers for translation and generate SAS tokens to your containers (or files) + with the appropriate permissions, see the README. + +USAGE: + python sample_translation_with_custom_model.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_DOCUMENT_TRANSLATION_ENDPOINT - the endpoint to your Document Translation resource. + 2) AZURE_DOCUMENT_TRANSLATION_KEY - your Document Translation API key. + 3) AZURE_SOURCE_CONTAINER_URL - the container SAS URL to your source container which has the documents + to be translated. + 4) AZURE_TARGET_CONTAINER_URL - the container SAS URL to your target container where the translated documents + will be written. + 5) AZURE_CUSTOM_MODEL_ID - the URL to your Azure custom translation model. +""" + + +def sample_translation_with_custom_model(): + import os + from azure.core.credentials import AzureKeyCredential + from azure.ai.translation.document import ( + DocumentTranslationClient + ) + + endpoint = os.environ["AZURE_DOCUMENT_TRANSLATION_ENDPOINT"] + key = os.environ["AZURE_DOCUMENT_TRANSLATION_KEY"] + source_container_url = os.environ["AZURE_SOURCE_CONTAINER_URL"] + target_container_url = os.environ["AZURE_TARGET_CONTAINER_URL"] + custom_model_id = os.environ["AZURE_CUSTOM_MODEL_ID"] + + client = DocumentTranslationClient(endpoint, AzureKeyCredential(key)) + + poller = client.begin_translation( + source_container_url, + target_container_url, + "es", + category_id=custom_model_id + ) + result = poller.result() + + print("Operation status: {}".format(result.status)) + print("Operation created on: {}".format(result.created_on)) + print("Operation last updated on: {}".format(result.last_updated_on)) + print("Total number of translations on documents: {}".format(result.documents_total_count)) + + print("\nOf total documents...") + print("{} failed".format(result.documents_failed_count)) + print("{} succeeded".format(result.documents_succeeded_count)) + + doc_results = client.list_all_document_statuses(result.id) + for document in doc_results: + print("Document ID: {}".format(document.id)) + print("Document status: {}".format(document.status)) + if document.status == "Succeeded": + print("Source document location: {}".format(document.source_document_url)) + print("Translated document location: {}".format(document.translated_document_url)) + print("Translated to language: {}\n".format(document.translate_to)) + else: + print("Error Code: {}, Message: {}\n".format(document.error.code, document.error.message)) + + +if __name__ == '__main__': + sample_translation_with_custom_model()