diff --git a/sdk/translation/azure-ai-translation-document/README.md b/sdk/translation/azure-ai-translation-document/README.md index 80dcc5ebec30..d533f60e5585 100644 --- a/sdk/translation/azure-ai-translation-document/README.md +++ b/sdk/translation/azure-ai-translation-document/README.md @@ -323,6 +323,40 @@ To see how to use the Document Translation client library with Azure Storage Blo for your containers, and download the finished translated documents, see this [sample][sample_translation_with_azure_blob]. Note that you will need to install the [azure-storage-blob][azure_storage_blob] library to run this sample. +## Advanced Topics + +The following section provides some insights for some of the advanced translation features such as glossaries and custom translation models. + +### **Glossaries** +Glossaries are domain-specific dictionaries. For example, if you want to translate some medical-related documents, you may need support for the many words, terminology, and idioms in the medical field which you can't find in the standard translation dictionary or you simply need specific translation. This is why Document Translation provides support for glossaries. + +#### **How To Create Glossary File** + +Document Translation supports glossaries in the following formats: + +|**File Type**|**Extension**|**Description**|**Samples**| +|---------------|---------------|---------------|---------------| +|Tab-Separated Values/TAB|.tsv, .tab|Read more on [wikipedia][tsv_files_wikipedia]|[glossary_sample.tsv][sample_tsv_file]| +|Comma-Seperated Values|.csv|Read more on [wikipedia][csv_files_wikipedia]|[glossary_sample.csv][sample_csv_file]| +|Localization Interchange File Format|.xlf, .xliff|Read more on [wikipedia][xlf_files_wikipedia]|[glossary_sample.xlf][sample_xlf_file]| + +View all supported formats [here][supported_glossary_formats]. + +#### **How Use Glossaries in Document Translation** +In order to use glossaries with Document Translation, you first need to upload your glossaries file to some blob container, and then provide the SaS url to of this glossary file to Document Translation as in the code samples [sample_translation_with_glossaries.py][sample_translation_with_glossaries]. + + +### **Custom Translation Models** +Instead of using Document Translation's engine for translation, you can use your own custom Azure machine/deep learning model. + +#### **How To Create a Custom Translation Model** +For more info on how to create, provision, and deploy your own custom Azure translation model, please follow the instructions here: [Build, deploy, and use a custom model for translation][custom_translation_article] + +#### **How To Use a Custom Translation Model With Document Translation** +In order to use a custom translation model with Document Translation, you first +need to create and deploy your model, then follow the code sample [sample_translation_with_custom_model.py][sample_translation_with_custom_model] to use with Document Translation. + + ## Troubleshooting ### General @@ -436,6 +470,17 @@ This project has adopted the [Microsoft Open Source Code of Conduct][code_of_con [sample_translation_with_glossaries_async]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_glossaries_async.py [sample_translation_with_azure_blob]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_azure_blob.py [sample_translation_with_azure_blob_async]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_azure_blob_async.py +[sample_translation_with_custom_model]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py +[sample_translation_with_custom_model_async]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py + +[supported_glossary_formats]: https://docs.microsoft.com/azure/cognitive-services/translator/document-translation/overview#supported-glossary-formats +[custom_translation_article]: https://docs.microsoft.com/azure/cognitive-services/translator/custom-translator/quickstart-build-deploy-custom-model +[tsv_files_wikipedia]: https://wikipedia.org/wiki/Tab-separated_values +[xlf_files_wikipedia]: https://wikipedia.org/wiki/XLIFF +[csv_files_wikipedia]: https://wikipedia.org/wiki/Comma-separated_values +[sample_tsv_file]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv +[sample_csv_file]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv +[sample_xlf_file]: https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf [cla]: https://cla.microsoft.com [code_of_conduct]: https://opensource.microsoft.com/codeofconduct/ diff --git a/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv new file mode 100644 index 000000000000..6883ab5d2d6d --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.csv @@ -0,0 +1,4 @@ +skull,le crâne +body,corps +heart,cœur +lungs,poumons diff --git a/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv new file mode 100644 index 000000000000..91ba49dd2374 --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.tsv @@ -0,0 +1,4 @@ +skull le crâne +body corps +heart cœur +lungs poumons diff --git a/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf new file mode 100644 index 000000000000..ef8fa9f37bdc --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/assets/glossary_sample.xlf @@ -0,0 +1,23 @@ + + + + + + skull + le crâne + + + body + corps + + + heart + cœur + + + lungs + poumons + + + + \ No newline at end of file diff --git a/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py b/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py new file mode 100644 index 000000000000..7456aaff0e72 --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/async_samples/sample_translation_with_custom_model_async.py @@ -0,0 +1,82 @@ +# coding=utf-8 +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +FILE: sample_translation_with_custom_model_async.py + +DESCRIPTION: + This sample demonstrates how to create a translation operation and apply custom azure translation model when doing the translation. + + To set up your containers for translation and generate SAS tokens to your containers (or files) + with the appropriate permissions, see the README. + +USAGE: + python sample_translation_with_custom_model_async.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_DOCUMENT_TRANSLATION_ENDPOINT - the endpoint to your Document Translation resource. + 2) AZURE_DOCUMENT_TRANSLATION_KEY - your Document Translation API key. + 3) AZURE_SOURCE_CONTAINER_URL - the container SAS URL to your source container which has the documents + to be translated. + 4) AZURE_TARGET_CONTAINER_URL - the container SAS URL to your target container where the translated documents + will be written. + 5) AZURE_CUSTOM_MODEL_ID - the URL to your Azure custom translation model. +""" + +import asyncio + + +async def sample_translation_with_custom_model_async(): + import os + from azure.core.credentials import AzureKeyCredential + from azure.ai.translation.document.aio import DocumentTranslationClient + + endpoint = os.environ["AZURE_DOCUMENT_TRANSLATION_ENDPOINT"] + key = os.environ["AZURE_DOCUMENT_TRANSLATION_KEY"] + source_container_url = os.environ["AZURE_SOURCE_CONTAINER_URL"] + target_container_url = os.environ["AZURE_TARGET_CONTAINER_URL"] + custom_model_id = os.environ["AZURE_CUSTOM_MODEL_ID"] + + client = DocumentTranslationClient(endpoint, AzureKeyCredential(key)) + + + + async with client: + poller = await client.begin_translation( + source_container_url, + target_container_url, + "es", + category_id=custom_model_id + ) + result = await poller.result() + + print("Operation status: {}".format(result.status)) + print("Operation created on: {}".format(result.created_on)) + print("Operation last updated on: {}".format(result.last_updated_on)) + print("Total number of translations on documents: {}".format(result.documents_total_count)) + + print("\nOf total documents...") + print("{} failed".format(result.documents_failed_count)) + print("{} succeeded".format(result.documents_succeeded_count)) + + doc_results = client.list_all_document_statuses(result.id) + async for document in doc_results: + print("Document ID: {}".format(document.id)) + print("Document status: {}".format(document.status)) + if document.status == "Succeeded": + print("Source document location: {}".format(document.source_document_url)) + print("Translated document location: {}".format(document.translated_document_url)) + print("Translated to language: {}\n".format(document.translate_to)) + else: + print("Error Code: {}, Message: {}\n".format(document.error.code, document.error.message)) + + +async def main(): + await sample_translation_with_custom_model_async() + +if __name__ == '__main__': + loop = asyncio.get_event_loop() + loop.run_until_complete(main()) diff --git a/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py b/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py new file mode 100644 index 000000000000..cf73444961b2 --- /dev/null +++ b/sdk/translation/azure-ai-translation-document/samples/sample_translation_with_custom_model.py @@ -0,0 +1,75 @@ +# coding=utf-8 +# ------------------------------------ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. +# ------------------------------------ + +""" +FILE: sample_translation_with_custom_model.py + +DESCRIPTION: + This sample demonstrates how to create a translation operation and apply custom azure translation model when doing the translation. + + To set up your containers for translation and generate SAS tokens to your containers (or files) + with the appropriate permissions, see the README. + +USAGE: + python sample_translation_with_custom_model.py + + Set the environment variables with your own values before running the sample: + 1) AZURE_DOCUMENT_TRANSLATION_ENDPOINT - the endpoint to your Document Translation resource. + 2) AZURE_DOCUMENT_TRANSLATION_KEY - your Document Translation API key. + 3) AZURE_SOURCE_CONTAINER_URL - the container SAS URL to your source container which has the documents + to be translated. + 4) AZURE_TARGET_CONTAINER_URL - the container SAS URL to your target container where the translated documents + will be written. + 5) AZURE_CUSTOM_MODEL_ID - the URL to your Azure custom translation model. +""" + + +def sample_translation_with_custom_model(): + import os + from azure.core.credentials import AzureKeyCredential + from azure.ai.translation.document import ( + DocumentTranslationClient + ) + + endpoint = os.environ["AZURE_DOCUMENT_TRANSLATION_ENDPOINT"] + key = os.environ["AZURE_DOCUMENT_TRANSLATION_KEY"] + source_container_url = os.environ["AZURE_SOURCE_CONTAINER_URL"] + target_container_url = os.environ["AZURE_TARGET_CONTAINER_URL"] + custom_model_id = os.environ["AZURE_CUSTOM_MODEL_ID"] + + client = DocumentTranslationClient(endpoint, AzureKeyCredential(key)) + + poller = client.begin_translation( + source_container_url, + target_container_url, + "es", + category_id=custom_model_id + ) + result = poller.result() + + print("Operation status: {}".format(result.status)) + print("Operation created on: {}".format(result.created_on)) + print("Operation last updated on: {}".format(result.last_updated_on)) + print("Total number of translations on documents: {}".format(result.documents_total_count)) + + print("\nOf total documents...") + print("{} failed".format(result.documents_failed_count)) + print("{} succeeded".format(result.documents_succeeded_count)) + + doc_results = client.list_all_document_statuses(result.id) + for document in doc_results: + print("Document ID: {}".format(document.id)) + print("Document status: {}".format(document.status)) + if document.status == "Succeeded": + print("Source document location: {}".format(document.source_document_url)) + print("Translated document location: {}".format(document.translated_document_url)) + print("Translated to language: {}\n".format(document.translate_to)) + else: + print("Error Code: {}, Message: {}\n".format(document.error.code, document.error.message)) + + +if __name__ == '__main__': + sample_translation_with_custom_model()