diff --git a/docs/index.rst b/docs/index.rst index a8401af0aff4..595fd4703b8d 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -178,6 +178,7 @@ Client speech-encoding speech-operation + speech-result speech-sample speech-alternative diff --git a/docs/speech-result.rst b/docs/speech-result.rst new file mode 100644 index 000000000000..d4759b704199 --- /dev/null +++ b/docs/speech-result.rst @@ -0,0 +1,7 @@ +Speech Result +============= + +.. automodule:: google.cloud.speech.result + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/speech-usage.rst b/docs/speech-usage.rst index f73475ae57ea..aedd5fa9990c 100644 --- a/docs/speech-usage.rst +++ b/docs/speech-usage.rst @@ -171,10 +171,10 @@ speech data to possible text alternatives on the fly. ... sample = client.sample(content=stream, ... encoding=speech.Encoding.LINEAR16, ... sample_rate=16000) - ... alternatives = list(client.streaming_recognize(sample)) - >>> print(alternatives[0].transcript) + ... results = list(client.streaming_recognize(sample)) + >>> print(results[0].alternatives[0].transcript) 'hello' - >>> print(alternatives[0].confidence) + >>> print(results[0].alternatives[0].confidence) 0.973458576 @@ -196,10 +196,10 @@ See: `Single Utterance`_ ... sample_rate=16000) ... responses = client.streaming_recognize(sample, ... single_utterance=True) - ... alternatives = list(responses) - >>> print(alternatives[0].transcript) + ... results = list(responses) + >>> print(results[0].alternatives[0].transcript) hello - >>> print(alternatives[0].confidence) + >>> print(results[0].alternatives[0].confidence) 0.96523453546 @@ -214,20 +214,28 @@ If ``interim_results`` is set to :data:`True`, interim results ... sample = client.sample(content=stream, ... encoding=speech.Encoding.LINEAR16, ... sample_rate=16000) - ... for alternatives in client.streaming_recognize(sample, - ... interim_results=True): + ... for results in client.streaming_recognize(sample, + ... interim_results=True): ... print('=' * 20) - ... print(alternatives[0].transcript) - ... print(alternatives[0].confidence) + ... print(results[0].alternatives[0].transcript) + ... print(results[0].alternatives[0].confidence) + ... print(results[0].is_final) + ... print(results[0].stability) ==================== 'he' None + False + 0.113245 ==================== 'hell' None + False + 0.132454 ==================== 'hello' 0.973458576 + True + 0.982345 .. _Single Utterance: https://cloud.google.com/speech/reference/rpc/google.cloud.speech.v1beta1#streamingrecognitionconfig diff --git a/speech/google/cloud/speech/client.py b/speech/google/cloud/speech/client.py index 94716086b3a2..ce93ad785880 100644 --- a/speech/google/cloud/speech/client.py +++ b/speech/google/cloud/speech/client.py @@ -27,6 +27,7 @@ from google.cloud.speech.connection import Connection from google.cloud.speech.encoding import Encoding from google.cloud.speech.operation import Operation +from google.cloud.speech.result import StreamingSpeechResult from google.cloud.speech.sample import Sample @@ -170,7 +171,8 @@ def streaming_recognize(self, sample, language_code=None, Streaming recognition requests are limited to 1 minute of audio. See: https://cloud.google.com/speech/limits#content - Yields: list of :class:`~google.cloud.speech.alternative.Alternatives` + Yields: Instance of + :class:`~google.cloud.speech.result.StreamingSpeechResult` containing results and metadata from the streaming request. :type sample: :class:`~google.cloud.speech.sample.Sample` @@ -242,8 +244,7 @@ def streaming_recognize(self, sample, language_code=None, for response in responses: for result in response.results: if result.is_final or interim_results: - yield [Alternative.from_pb(alternative) - for alternative in result.alternatives] + yield StreamingSpeechResult.from_pb(result) def sync_recognize(self, sample, language_code=None, max_alternatives=None, profanity_filter=None, diff --git a/speech/google/cloud/speech/result.py b/speech/google/cloud/speech/result.py new file mode 100644 index 000000000000..11efb93626b6 --- /dev/null +++ b/speech/google/cloud/speech/result.py @@ -0,0 +1,54 @@ +# Copyright 2016 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Speech result representations.""" + +from google.cloud.speech.alternative import Alternative + + +class StreamingSpeechResult(object): + """Streaming speech result representation. + + :type alternatives: list + :param alternatives: List of + :class:`~google.cloud.speech.alternative.Alternative`. + + :type is_final: bool + :param is_final: Boolean indicator of results finality. + + :type stability: float + :param stability: 0.0-1.0 stability score for the results returned. + """ + def __init__(self, alternatives, is_final=False, stability=0.0): + self.alternatives = alternatives + self.is_final = is_final + self.stability = stability + + @classmethod + def from_pb(cls, response): + """Factory: construct instance of ``StreamingSpeechResult``. + + :type response: :class:`~google.cloud.grpc.speech.v1beta1\ + .cloud_speech_pb2.StreamingRecognizeResult` + :param response: Instance of ``StreamingRecognizeResult`` protobuf. + + :rtype: :class:`~google.cloud.speech.result.StreamingSpeechResult` + :returns: Instance of ``StreamingSpeechResult``. + """ + alternatives = [Alternative.from_pb(alternative) + for alternative in response.alternatives] + is_final = response.is_final + stability = response.stability + return cls(alternatives=alternatives, is_final=is_final, + stability=stability) diff --git a/speech/unit_tests/test_client.py b/speech/unit_tests/test_client.py index b108fcaab579..96eb82e729c4 100644 --- a/speech/unit_tests/test_client.py +++ b/speech/unit_tests/test_client.py @@ -28,7 +28,7 @@ def _make_result(alternatives=()): ) -def _make_streaming_result(alternatives=(), is_final=True): +def _make_streaming_result(alternatives=(), is_final=True, stability=1.0): from google.cloud.grpc.speech.v1beta1 import cloud_speech_pb2 return cloud_speech_pb2.StreamingRecognitionResult( @@ -39,6 +39,7 @@ def _make_streaming_result(alternatives=(), is_final=True): ) for alternative in alternatives ], is_final=is_final, + stability=stability, ) @@ -476,6 +477,7 @@ def test_stream_recognize_interim_results(self): from google.cloud.speech import _gax from google.cloud.speech.encoding import Encoding + from google.cloud.speech.client import StreamingSpeechResult stream = BytesIO(b'Some audio data...') credentials = _Credentials() @@ -491,11 +493,13 @@ def test_stream_recognize_interim_results(self): 'confidence': 0.0123456, }] first_response = _make_streaming_response( - _make_streaming_result([], is_final=False)) + _make_streaming_result([], is_final=False, stability=0.122435)) second_response = _make_streaming_response( - _make_streaming_result(alternatives, is_final=False)) + _make_streaming_result(alternatives, is_final=False, + stability=0.1432343)) last_response = _make_streaming_response( - _make_streaming_result(alternatives, is_final=True)) + _make_streaming_result(alternatives, is_final=True, + stability=0.9834534)) responses = [first_response, second_response, last_response] channel_args = [] @@ -521,15 +525,28 @@ def speech_api(channel=None): results = list(client.streaming_recognize(sample, interim_results=True)) - self.assertEqual(results[0], []) - self.assertEqual(results[1][0].transcript, + + self.assertEqual(len(results), 3) + self.assertIsInstance(results[0], StreamingSpeechResult) + self.assertEqual(results[0].alternatives, []) + self.assertFalse(results[0].is_final) + self.assertEqual(results[0].stability, 0.122435) + self.assertEqual(results[1].stability, 0.1432343) + self.assertFalse(results[1].is_final) + self.assertEqual(results[1].alternatives[0].transcript, alternatives[0]['transcript']) - self.assertEqual(results[1][0].confidence, + self.assertEqual(results[1].alternatives[0].confidence, alternatives[0]['confidence']) - self.assertEqual(results[1][1].transcript, + self.assertEqual(results[1].alternatives[1].transcript, alternatives[1]['transcript']) - self.assertEqual(results[1][1].confidence, + self.assertEqual(results[1].alternatives[1].confidence, alternatives[1]['confidence']) + self.assertTrue(results[2].is_final) + self.assertEqual(results[2].stability, 0.9834534) + self.assertEqual(results[2].alternatives[0].transcript, + alternatives[0]['transcript']) + self.assertEqual(results[2].alternatives[0].confidence, + alternatives[0]['confidence']) def test_stream_recognize(self): from io import BytesIO @@ -582,9 +599,9 @@ def speech_api(channel=None): results = list(client.streaming_recognize(sample)) self.assertEqual(len(results), 1) - self.assertEqual(results[0][0].transcript, + self.assertEqual(results[0].alternatives[0].transcript, alternatives[0]['transcript']) - self.assertEqual(results[0][0].confidence, + self.assertEqual(results[0].alternatives[0].confidence, alternatives[0]['confidence']) def test_stream_recognize_no_results(self): diff --git a/system_tests/speech.py b/system_tests/speech.py index 175674dbc96b..25db94cf98e1 100644 --- a/system_tests/speech.py +++ b/system_tests/speech.py @@ -127,15 +127,15 @@ def _make_streaming_request(self, file_obj, single_utterance=True, single_utterance=single_utterance, interim_results=interim_results) - def _check_results(self, results, num_results=1): - self.assertEqual(len(results), num_results) - top_result = results[0] + def _check_results(self, alternatives, num_results=1): + self.assertEqual(len(alternatives), num_results) + top_result = alternatives[0] self.assertIsInstance(top_result, Alternative) self.assertEqual(top_result.transcript, 'hello ' + self.ASSERT_TEXT) self.assertGreater(top_result.confidence, 0.90) if num_results == 2: - second_alternative = results[1] + second_alternative = alternatives[1] self.assertIsInstance(second_alternative, Alternative) self.assertEqual(second_alternative.transcript, self.ASSERT_TEXT) self.assertIsNone(second_alternative.confidence) @@ -192,7 +192,7 @@ def test_stream_recognize(self): with open(AUDIO_FILE, 'rb') as file_obj: for results in self._make_streaming_request(file_obj): - self._check_results(results) + self._check_results(results.alternatives) def test_stream_recognize_interim_results(self): if not Config.USE_GAX: @@ -207,12 +207,12 @@ def test_stream_recognize_interim_results(self): interim_results=True) responses = list(recognize) for response in responses: - if response[0].transcript: - self.assertIn(response[0].transcript, + if response.alternatives[0].transcript: + self.assertIn(response.alternatives[0].transcript, extras + self.ASSERT_TEXT) self.assertGreater(len(responses), 5) - self._check_results(responses[-1]) + self._check_results(responses[-1].alternatives) def test_stream_recognize_single_utterance(self): if not Config.USE_GAX: @@ -221,4 +221,4 @@ def test_stream_recognize_single_utterance(self): with open(AUDIO_FILE, 'rb') as file_obj: for results in self._make_streaming_request( file_obj, single_utterance=False): - self._check_results(results) + self._check_results(results.alternatives)