Skip to content

Commit

Permalink
Merge pull request #2644 from daspecster/speech-streaming-part-3
Browse files Browse the repository at this point in the history
Add _stream_requests() for managing speech streaming configuration
  • Loading branch information
daspecster authored Oct 31, 2016
2 parents 8b22482 + 702f0bd commit 3b3c9ce
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 2 deletions.
76 changes: 76 additions & 0 deletions packages/google-cloud-speech/google/cloud/speech/_gax.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,82 @@ def sync_recognize(self, sample, language_code=None, max_alternatives=None,
raise ValueError('More than one result or none returned from API.')


def _stream_requests(sample, language_code=None, max_alternatives=None,
profanity_filter=None, speech_context=None,
single_utterance=None, interim_results=None):
"""Generate stream of requests from sample.
:type sample: :class:`~google.cloud.speech.sample.Sample`
:param sample: Instance of ``Sample`` containing audio information.
:type language_code: str
:param language_code: (Optional) The language of the supplied audio as
BCP-47 language tag. Example: ``'en-GB'``.
If omitted, defaults to ``'en-US'``.
:type max_alternatives: int
:param max_alternatives: (Optional) Maximum number of recognition
hypotheses to be returned. The server may
return fewer than maxAlternatives.
Valid values are 0-30. A value of 0 or 1
will return a maximum of 1. Defaults to 1
:type profanity_filter: bool
:param profanity_filter: (Optional) If True, the server will attempt to
filter out profanities, replacing all but the
initial character in each filtered word with
asterisks, e.g. ``'f***'``. If False or
omitted, profanities won't be filtered out.
:type speech_context: list
:param speech_context: (Optional) A list of strings (max 50) containing
words and phrases "hints" so that the speech
recognition is more likely to recognize them.
This can be used to improve the accuracy for
specific words and phrases. This can also be used to
add new words to the vocabulary of the recognizer.
:type single_utterance: bool
:param single_utterance: (Optional) If false or omitted, the recognizer
will perform continuous recognition
(continuing to process audio even if the user
pauses speaking) until the client closes the
output stream (gRPC API) or when the maximum
time limit has been reached. Multiple
SpeechRecognitionResults with the is_final
flag set to true may be returned.
If true, the recognizer will detect a single
spoken utterance. When it detects that the
user has paused or stopped speaking, it will
return an END_OF_UTTERANCE event and cease
recognition. It will return no more than one
SpeechRecognitionResult with the is_final flag
set to true.
:type interim_results: bool
:param interim_results: (Optional) If true, interim results (tentative
hypotheses) may be returned as they become
available (these interim results are indicated
with the is_final=false flag). If false or
omitted, only is_final=true result(s) are
returned.
"""
config_request = _make_streaming_request(
sample, language_code=language_code, max_alternatives=max_alternatives,
profanity_filter=profanity_filter, speech_context=speech_context,
single_utterance=single_utterance, interim_results=interim_results)

# The config request MUST go first and not contain any audio data.
yield config_request

while True:
data = sample.content.read(sample.chunk_size)
if not data:
break
yield StreamingRecognizeRequest(audio_content=data)


def _make_streaming_request(sample, language_code,
max_alternatives, profanity_filter,
speech_context, single_utterance,
Expand Down
9 changes: 9 additions & 0 deletions packages/google-cloud-speech/google/cloud/speech/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,15 @@ def __init__(self, content=None, source_uri=None,
else:
raise ValueError('Invalid encoding: %s' % (encoding,))

@property
def chunk_size(self):
"""Chunk size to send over gRPC. ~100ms
:rtype: int
:returns: Optimized chunk size.
"""
return int(self.sample_rate / 10.0)

@property
def source_uri(self):
"""Google Cloud Storage URI of audio source.
Expand Down
63 changes: 61 additions & 2 deletions packages/google-cloud-speech/unit_tests/test__gax.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
import unittest


class TestSpeechGAX(unittest.TestCase):
class TestSpeechGAXMakeRequests(unittest.TestCase):
SAMPLE_RATE = 16000
HINTS = ['hi']
AUDIO_CONTENT = '/9j/4QNURXhpZgAASUkq'
AUDIO_CONTENT = b'/9j/4QNURXhpZgAASUkq'

def _callFUT(self, sample, language_code, max_alternatives,
profanity_filter, speech_context, single_utterance,
Expand Down Expand Up @@ -78,3 +78,62 @@ def test_ctor(self):
self.assertEqual(config.max_alternatives, max_alternatives)
self.assertTrue(config.profanity_filter)
self.assertEqual(config.speech_context.phrases, self.HINTS)


class TestSpeechGAXMakeRequestsStream(unittest.TestCase):
SAMPLE_RATE = 16000
HINTS = ['hi']
AUDIO_CONTENT = b'/9j/4QNURXhpZgAASUkq'

def _callFUT(self, sample, language_code, max_alternatives,
profanity_filter, speech_context, single_utterance,
interim_results):
from google.cloud.speech._gax import _stream_requests
return _stream_requests(sample=sample,
language_code=language_code,
max_alternatives=max_alternatives,
profanity_filter=profanity_filter,
speech_context=speech_context,
single_utterance=single_utterance,
interim_results=interim_results)

def test_stream_requests(self):
from io import BytesIO
from google.cloud import speech
from google.cloud.speech.sample import Sample
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
SpeechContext)
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
StreamingRecognitionConfig)
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
StreamingRecognizeRequest)

sample = Sample(content=BytesIO(self.AUDIO_CONTENT),
encoding=speech.Encoding.FLAC,
sample_rate=self.SAMPLE_RATE)
language_code = 'US-en'
max_alternatives = 2
profanity_filter = True
speech_context = SpeechContext(phrases=self.HINTS)
single_utterance = True
interim_results = False
streaming_requests = self._callFUT(sample, language_code,
max_alternatives, profanity_filter,
speech_context, single_utterance,
interim_results)
all_requests = []
for streaming_request in streaming_requests:
self.assertIsInstance(streaming_request, StreamingRecognizeRequest)
all_requests.append(streaming_request)

self.assertEqual(len(all_requests), 2)

config_request = all_requests[0]
streaming_request = all_requests[1]
# This isn't set by _make_streaming_request().
# The first request can only have `streaming_config` set.
# The following requests can only have `audio_content` set.
self.assertEqual(config_request.audio_content, b'')
self.assertEqual(streaming_request.audio_content, self.AUDIO_CONTENT)
self.assertIsInstance(config_request.streaming_config,
StreamingRecognitionConfig)

0 comments on commit 3b3c9ce

Please sign in to comment.