From 595c724d172bd61fd5a9c61d14265076c1118562 Mon Sep 17 00:00:00 2001 From: Laurent Mazuel Date: Fri, 12 Jan 2018 10:51:02 -0800 Subject: [PATCH] Add TextAnalytics --- README.md | 4 +- requirements.txt | 1 + samples/language/text_analytics_samples.py | 125 +++++++++++++++++++++ 3 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 samples/language/text_analytics_samples.py diff --git a/README.md b/README.md index 2e59799..a2f394a 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ This project framework provides examples for the following services: ### Language * Using the **Bing Spell Check SDK** [azure-cognititiveservices-language-spellcheck](http://pypi.python.org/pypi/azure-cognititiveservices-language-spellcheck) for the [Bing Spell Check API](https://azure.microsoft.com/services/cognitive-services/spell-check/) +* Using the **Text Analytics SDK** [azure-cognititiveservices-language-textanalytics](http://pypi.python.org/pypi/azure-cognititiveservices-language-textanalytics) for the [Tet Analytics API](https://azure.microsoft.com/services/cognitive-services/text-analytics/) ### Search @@ -63,6 +64,7 @@ We provide several meta-packages to help you install several packages at a time. ``` 4. Set up the environment variable `SPELLCHECK_SUBSCRIPTION_KEY` with your key if you want to execute SpellCheck tests. +4. Set up the environment variable `TEXTANALYTICS_SUBSCRIPTION_KEY` with your key if you want to execute SpellCheck tests. You might override too `TEXTANALYTICS_LOCATION` (westcentralus by default). 3. Set up the environment variable `CUSTOMSEARCH_SUBSCRIPTION_KEY` with your key if you want to execute CustomSearch tests. 3. Set up the environment variable `ENTITYSEARCH_SUBSCRIPTION_KEY` with your key if you want to execute EntitySearch tests. 4. Set up the environment variable `IMAGESEARCH_SUBSCRIPTION_KEY` with your key if you want to execute ImageSearch tests. @@ -78,7 +80,7 @@ To run the complete demo, execute `python example.py` To run each individual demo, point directly to the file. For example (i.e. not complete list): -2. `python samples/language/spellchack_samples.py` +2. `python samples/language/spellcheck_samples.py` 1. `python samples/search/entity_search_samples.py` 2. `python samples/search/video_search_samples.py` diff --git a/requirements.txt b/requirements.txt index ad5a535..867889b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ azure-cognitiveservices-language-spellcheck +azure-cognitiveservices-language-textanalytics azure-cognitiveservices-search-customsearch azure-cognitiveservices-search-entitysearch azure-cognitiveservices-search-imagesearch diff --git a/samples/language/text_analytics_samples.py b/samples/language/text_analytics_samples.py new file mode 100644 index 0000000..866a9f0 --- /dev/null +++ b/samples/language/text_analytics_samples.py @@ -0,0 +1,125 @@ +import os + +from azure.cognitiveservices.language.textanalytics import TextAnalyticsAPI +from msrest.authentication import CognitiveServicesCredentials + +SUBSCRIPTION_KEY_ENV_NAME = "TEXTANALYTICS_SUBSCRIPTION_KEY" +TEXTANALYTICS_LOCATION = os.environ.get("TEXTANALYTICS_LOCATION", "westcentralus") + +def language_extraction(subscription_key): + """Language extraction. + + This will detect the language of a few strings. + """ + client = TextAnalyticsAPI(TEXTANALYTICS_LOCATION, CognitiveServicesCredentials(subscription_key)) + + try: + documents = [{ + 'id': 1, + 'text': 'This is a document written in English.' + }, { + 'id': 2, + 'text': 'Este es un document escrito en Español.' + }, { + 'id': 3, + 'text': '这是一个用中文写的文件' + }] + for document in documents: + print("Asking language detection on '{}' (id: {})".format(document['text'], document['id'])) + response = client.detect_language( + documents=documents + ) + + for document in response.documents: + print("Found out that {} is {}".format(document.id, document.detected_languages[0].name)) + + except Exception as err: + print("Encountered exception. {}".format(err)) + +def key_phrases(subscription_key): + """Key-phrases. + + The API returns a list of strings denoting the key talking points in the input text. + """ + client = TextAnalyticsAPI(TEXTANALYTICS_LOCATION, CognitiveServicesCredentials(subscription_key)) + + try: + documents = [{ + 'language': 'ja', + 'id': 1, + 'text': "猫は幸せ" + }, { + 'language': 'de', + 'id': 2, + 'text': "Fahrt nach Stuttgart und dann zum Hotel zu Fu." + }, { + 'language': 'en', + 'id': 3, + 'text': "My cat is stiff as a rock." + }, { + 'language': 'es', + 'id': 4, + 'text': "A mi me encanta el fútbol!" + }] + + for document in documents: + print("Asking key-phrases on '{}' (id: {})".format(document['text'], document['id'])) + + response = client.key_phrases( + documents=documents + ) + + for document in response.documents: + print("Found out that in document {}, key-phrases are:".format(document.id)) + for phrase in document.key_phrases: + print("- {}".format(phrase)) + + except Exception as err: + print("Encountered exception. {}".format(err)) + + +def sentiment(subscription_key): + """Sentiment. + + Scores close to 1 indicate positive sentiment, while scores close to 0 indicate negative sentiment. + """ + client = TextAnalyticsAPI(TEXTANALYTICS_LOCATION, CognitiveServicesCredentials(subscription_key)) + + try: + documents = [{ + 'language': 'en', + 'id': 0, + 'text': "I had the best day of my life." + }, { + 'language': 'en', + 'id': 1, + 'text': "This was a waste of my time. The speaker put me to sleep." + }, { + 'language': 'es', + 'id': 2, + 'text': "No tengo dinero ni nada que dar..." + }, { + 'language': 'it', + 'id': 3, + 'text': "L'hotel veneziano era meraviglioso. È un bellissimo pezzo di architettura." + }] + + for document in documents: + print("Asking sentiment on '{}' (id: {})".format(document['text'], document['id'])) + + response = client.sentiment( + documents=documents + ) + + for document in response.documents: + print("Found out that in document {}, sentimet score is {}:".format(document.id, document.score)) + + except Exception as err: + print("Encountered exception. {}".format(err)) + + +if __name__ == "__main__": + import sys, os.path + sys.path.append(os.path.abspath(os.path.join(__file__, "..", ".."))) + from tools import execute_samples + execute_samples(globals(), SUBSCRIPTION_KEY_ENV_NAME)