-
Notifications
You must be signed in to change notification settings - Fork 17
/
Assistant.cs
284 lines (233 loc) · 10.3 KB
/
Assistant.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using Google.Assistant.Embedded.V1Alpha1;
using Google.Protobuf;
using Grpc.Core;
using NAudio.Wave;
namespace GoogleAssistantWindows
{
public class Assistant
{
public delegate void DebugOutputDelegate(string debug, bool consoleOnly = false);
public event DebugOutputDelegate OnDebug;
public delegate void AssistantWorkingDelegate(AssistantState state);
public event AssistantWorkingDelegate OnAssistantStateChanged;
private Channel _channel;
private EmbeddedAssistant.EmbeddedAssistantClient _assistant;
private IClientStreamWriter<ConverseRequest> _requestStream;
private IAsyncStreamReader<ConverseResponse> _responseStream;
// todo this doesn't seem to be needed anymore...
private bool _writing;
private readonly List<byte[]> _writeBuffer = new List<byte[]>();
private WaveIn _waveIn;
private readonly AudioOut _audioOut = new AudioOut();
// todo tidy this mess of flags up
private bool _requestStreamAvailable = false;
private bool _assistantResponseReceived = false;
private bool _sendSpeech = false;
private bool _followOn = false;
// If this documentation was a flow chart it would have been much better
// https://developers.google.com/assistant/sdk/reference/rpc/google.assistant.embedded.v1alpha1#google.assistant.embedded.v1alpha1.EmbeddedAssistant
public Assistant()
{
_audioOut.OnAudioPlaybackStateChanged += OnAudioPlaybackStateChanged;
}
public void InitAssistantForUser(ChannelCredentials channelCreds)
{
_channel = new Channel(Const.AssistantEndpoint, channelCreds);
_assistant = new EmbeddedAssistant.EmbeddedAssistantClient(_channel);
}
public async void NewConversation()
{
try
{
OnAssistantStateChanged?.Invoke(AssistantState.Listening);
_followOn = false;
_assistantResponseReceived = false;
AsyncDuplexStreamingCall<ConverseRequest, ConverseResponse> converse = _assistant.Converse();
_requestStream = converse.RequestStream;
_responseStream = converse.ResponseStream;
OnDebug?.Invoke("New Conversation - New Config Request");
// Once this opening request is issued if its not followed by audio an error of 'code: 14, message: Service Unavaible.' comes back, really not helpful Google!
await _requestStream.WriteAsync(CreateNewRequest());
_requestStreamAvailable = true;
ResetSendingAudio(true);
// note recreating the WaveIn each time otherwise the recording just stops on follow ups
_waveIn = new WaveIn { WaveFormat = new WaveFormat(Const.SampleRateHz, 1) };
_waveIn.DataAvailable += ProcessInAudio;
_waveIn.StartRecording();
await WaitForResponse();
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
OnDebug?.Invoke($"Error {ex.Message}");
StopRecording();
}
}
private ConverseRequest CreateNewRequest()
{
// initial request is the config this then gets followed by all out audio
var converseRequest = new ConverseRequest();
var audioIn = new AudioInConfig()
{
Encoding = AudioInConfig.Types.Encoding.Linear16,
SampleRateHertz = Const.SampleRateHz
};
var audioOut = new AudioOutConfig()
{
Encoding = AudioOutConfig.Types.Encoding.Linear16,
SampleRateHertz = Const.SampleRateHz,
VolumePercentage = 75
};
ConverseState state = new ConverseState() { ConversationState = ByteString.Empty };
converseRequest.Config = new ConverseConfig() { AudioInConfig = audioIn, AudioOutConfig = audioOut, ConverseState = state };
return converseRequest;
}
private void StopRecording()
{
if (_waveIn != null)
{
OnDebug?.Invoke("Stop Recording");
_waveIn.StopRecording();
_waveIn.Dispose();
_waveIn = null;
OnAssistantStateChanged?.Invoke(AssistantState.Processing);
OnDebug?.Invoke("Send Request Complete");
_requestStreamAvailable = false;
_requestStream.CompleteAsync();
}
}
private void ProcessInAudio(object sender, WaveInEventArgs e)
{
OnDebug?.Invoke($"Process Audio {e.Buffer.Length} SendSpeech={_sendSpeech} Writing={_writing}", true);
if (_sendSpeech)
{
// cannot do more than one write at a time so if its writing already add the new data to the queue
if (_writing)
_writeBuffer.Add(e.Buffer);
else
WriteAudioData(e.Buffer);
}
}
private async Task WriteAudioData(byte[] bytes)
{
_writing = true;
await WriteAudioIn(bytes);
while (_writeBuffer.Count > 0)
{
var buffer = _writeBuffer[0];
_writeBuffer.RemoveAt(0);
if (_requestStreamAvailable && _sendSpeech)
{
// don't write after the RequestComplete is sent or get an gRPC error.
await WriteAudioIn(buffer);
}
}
_writing = false;
}
private async Task WriteAudioIn(byte[] buffer)
{
OnDebug?.Invoke("Write Audio " + buffer.Length, true);
var request = new ConverseRequest() {AudioIn = ByteString.CopyFrom(buffer)};
await _requestStream.WriteAsync(request);
}
private async Task WaitForResponse()
{
var response = await _responseStream.MoveNext();
if (response)
{
// multiple response elements are received per response, each can contain one of the Result, AudioOut or EventType fields
ConverseResponse currentResponse = _responseStream.Current;
// Debug output the whole response, useful for.. debugging.
OnDebug?.Invoke(ResponseToOutput(currentResponse));
// EndOfUtterance, Assistant has recognised something so stop sending audio
if (currentResponse.EventType == ConverseResponse.Types.EventType.EndOfUtterance)
ResetSendingAudio(false);
if (currentResponse.AudioOut != null)
_audioOut.AddBytesToPlay(currentResponse.AudioOut.AudioData.ToByteArray());
if (currentResponse.Result != null)
{
// if the assistant has recognised something, flag this so the failure notification isn't played
if (!String.IsNullOrEmpty(currentResponse.Result.SpokenRequestText))
_assistantResponseReceived = true;
switch (currentResponse.Result.MicrophoneMode)
{
// this is the end of the current conversation
case ConverseResult.Types.MicrophoneMode.CloseMicrophone:
StopRecording();
// play failure notification if nothing recognised.
if (!_assistantResponseReceived)
{
_audioOut.PlayNegativeNotification();
OnAssistantStateChanged?.Invoke(AssistantState.Inactive);
}
break;
case ConverseResult.Types.MicrophoneMode.DialogFollowOn:
// stop recording as the follow on is in a whole new conversation, so may as well restart the same flow
StopRecording();
_followOn = true;
break;
}
}
await WaitForResponse();
}
else
{
OnDebug?.Invoke("Response End");
// if we've received any audio... play it.
_audioOut.Play();
}
}
private void ResetSendingAudio(bool send)
{
_writing = false;
_writeBuffer.Clear();
_sendSpeech = send;
}
private void OnAudioPlaybackStateChanged(bool started)
{
if (started)
OnAssistantStateChanged?.Invoke(AssistantState.Speaking);
else
{
// stopped
if (_followOn)
NewConversation();
else
OnAssistantStateChanged?.Invoke(AssistantState.Inactive);
}
}
public void Shutdown()
{
if (_channel != null)
{
_channel.ShutdownAsync();
_requestStream = null;
_responseStream = null;
_assistant = null;
}
}
private string ResponseToOutput(ConverseResponse currentResponse)
{
if (currentResponse.AudioOut != null)
return $"Response - AudioOut {currentResponse.AudioOut.AudioData.Length}";
if (currentResponse.Error != null)
return $"Response - Error:{currentResponse.Error}";
if (currentResponse.Result != null)
return $"Response - Result:{currentResponse.Result}";
if (currentResponse.EventType != ConverseResponse.Types.EventType.Unspecified)
return $"Response - EventType:{currentResponse.EventType}";
return "Response Empty?";
}
public bool IsInitialised() => _assistant != null;
}
public enum AssistantState
{
Inactive,
Processing,
Listening,
Speaking,
}
}