From 863e59ed0c30790eb7172054b98e188e11da7eea Mon Sep 17 00:00:00 2001 From: Senthil Nathan Date: Wed, 24 Feb 2021 09:52:38 -0500 Subject: [PATCH] Changes done for v2.2.9. --- README.md | 3 +- com.ibm.streamsx.sttgateway/CHANGELOG.md | 5 + .../IBMVoiceGatewaySource.xml | 45 +- .../IBMVoiceGatewaySource_cpp.cgt | 117 ++-- com.ibm.streamsx.sttgateway/info.xml | 2 +- .../STTGatewayUtils.spl | 20 +- samples/STTGatewayUtils/info.xml | 2 +- .../VgwDataRouter.spl | 380 ++++++------ samples/VgwDataRouter/data/.gitignore | 8 + samples/VgwDataRouter/info.xml | 6 +- .../VgwDataRouterToWatsonS2T.spl | 547 +++++++++-------- .../VgwDataRouterToWatsonS2T/data/.gitignore | 8 + samples/VgwDataRouterToWatsonS2T/info.xml | 4 +- .../VgwDataRouterToWatsonSTT.spl | 560 ++++++++--------- .../VgwDataRouterToWatsonSTT/data/.gitignore | 8 + samples/VgwDataRouterToWatsonSTT/info.xml | 6 +- .../VoiceGatewayToStreamsToWatsonS2T.spl | 561 ++++++++--------- .../VoiceGatewayToStreamsToWatsonS2T/info.xml | 4 +- .../VoiceGatewayToStreamsToWatsonSTT.spl | 571 +++++++++--------- .../VoiceGatewayToStreamsToWatsonSTT/info.xml | 4 +- sttgateway-tech-brief.txt | 2 +- 21 files changed, 1472 insertions(+), 1391 deletions(-) create mode 100644 samples/VgwDataRouter/data/.gitignore create mode 100644 samples/VgwDataRouterToWatsonS2T/data/.gitignore create mode 100644 samples/VgwDataRouterToWatsonSTT/data/.gitignore diff --git a/README.md b/README.md index 7fe2d0a..d4104b7 100644 --- a/README.md +++ b/README.md @@ -142,8 +142,7 @@ st submitjob -d -i output/co If you are planning to ingest the speech data from live voice calls, then you can invoke the **IBMVoiceGatewaySource** operator as shown below. ``` -(stream BinarySpeechData as BSD; - stream EndOfCallSignal as EOCS) as VoiceGatewayInferface = +(stream BinarySpeechData as BSD) as VoiceGatewayInferface = IBMVoiceGatewaySource() { logic state: { diff --git a/com.ibm.streamsx.sttgateway/CHANGELOG.md b/com.ibm.streamsx.sttgateway/CHANGELOG.md index 6b2652b..11fd27b 100644 --- a/com.ibm.streamsx.sttgateway/CHANGELOG.md +++ b/com.ibm.streamsx.sttgateway/CHANGELOG.md @@ -1,5 +1,10 @@ # Changes +## v2.2.9 +* Feb/11/2021 +* Removed the EndOfCallSignal (EOCS) output stream completely to avoid port locks and out of order processing between the binary speech data (BSD) and the EOCS tuples. Now, a single output stream will deliver both the BSD and EOCS tuples in the correct sequence for downstream processing. +* The change described above triggered foundational changes in the IBMVoiceGatewaySource operator and in the examples that invoke that operator. + ## v2.2.8 * Feb/07/2021 * Modified the IBMVoiceGatewaySource operator to handle the exception thrown when a given websocket connection handle can't be found in the connection metadata map. diff --git a/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/IBMVoiceGatewaySource/IBMVoiceGatewaySource.xml b/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/IBMVoiceGatewaySource/IBMVoiceGatewaySource.xml index 410d8f2..cc5b4b4 100644 --- a/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/IBMVoiceGatewaySource/IBMVoiceGatewaySource.xml +++ b/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/IBMVoiceGatewaySource/IBMVoiceGatewaySource.xml @@ -304,6 +304,25 @@ information and assign that meta data values to other optional attributes in this output port. + In addition to sending the binary speech data on this port, this operator will + also send End Of Call Signal (EOCS) on this port whenever a particular + voice channel of an ongoing call closes its WebSocket connection. So, this operator + produces periodic output tuples to give an indication about the end of a + specific speaker (i.e. channel) in a voice call that was in progress moments ago for + the given IBM Voice Gateway session id. When it sends EOCS, it only sets values to + certain attributes of the output stream as shown here. + rstring vgwSessionId, boolean isCustomerSpeechData, int32 vgwVoiceChannelNumber, boolean endOfCallSignal + This source operator will set the appropriate values for these attributes to + indicate which particular speaker (i.e. voice channel number) of a given voice call + (i.e. session id) just ended the conversation. This tuple also has an attribute + (i.e. isCustomerSpeechData) to tell whether that recently ended voice channel + carried the speech data of a customer or an agent. More importantly, it will set + a value of true for the endOfCallSignal attribute to indicate that it is an EOCS message and not a + binary speech message. It was decided to use the same output port to send both of these + messages in order to avoid any port locks and/or tuple ordering issues that may happen if we choose to + do it using two different output ports. Downstream operators can make use of this + "End Of Voice Call" signal as they see fit. + **There are multiple available output functions**, and output attributes can also be assigned values with any SPL expression that evaluates to the proper type. @@ -319,31 +338,7 @@ false 1 false - - - - - This port produces periodic output tuples to give an indication about the end of a - specific speaker (i.e. channel) in a voice call that was in progress moments ago for - the given IBM Voice Gateway session id. The schema for this port must have these - three attributes with their correct data types as shown here. - rstring vgwSessionId, boolean isCustomerSpeechData, int32 vgwVoiceChannelNumber - This source operator will set the appropriate values for these attributes to - indicate which particular speaker (i.e. voice channel number) of a given voice call - (i.e. session id) just ended the conversation. This tuple also has an attribute - (i.e. isCustomerSpeechData) to tell whether that recently ended voice channel - carried the speech data of a customer or an agent. Downstream operators can make - use of this "End Of Voice Call" signal as they see fit. - - Expression - false - false - true - Free - false - 1 - false - + diff --git a/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/IBMVoiceGatewaySource/IBMVoiceGatewaySource_cpp.cgt b/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/IBMVoiceGatewaySource/IBMVoiceGatewaySource_cpp.cgt index e5123b9..c80e214 100644 --- a/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/IBMVoiceGatewaySource/IBMVoiceGatewaySource_cpp.cgt +++ b/com.ibm.streamsx.sttgateway/com.ibm.streamsx.sttgateway.watson/IBMVoiceGatewaySource/IBMVoiceGatewaySource_cpp.cgt @@ -8,7 +8,7 @@ /* ============================================================ First created on: Sep/20/2019 -Last modified on: Feb/07/2021 +Last modified on: Feb/09/2021 Please refer to the sttgateway-tech-brief.txt file in the top-level directory of this toolkit to read about @@ -87,6 +87,15 @@ using websocketpp::lib::bind; my $audioOutputAsBlob = undef; my $outputAttrs1 = $outputPort1->getAttributes(); my $speechAttributeFound = 0; + my $vgwSessionIdAsString = undef; + my $vgwSessionIdAttributeFound = 0; + my $isCustomerSpeechDataAsBoolean = undef; + my $isCustomerSpeechDataAttributeFound = 0; + my $vgwVoiceChannelNumberAsInt32 = undef; + my $vgwVoiceChannelNumberAttributeFound = 0; + my $endOfCallSignalAsBoolean = undef; + my $endOfCallSignalAttributeFound = 0; + foreach my $outputAttr (@$outputAttrs1) { my $outAttrName = $outputAttr->getName(); @@ -100,46 +109,20 @@ using websocketpp::lib::bind; $audioOutputAsBlob = 1; } } - } - - if ($speechAttributeFound == 0 ) { - SPL::CodeGen::exitln(SttGatewayResource::STTGW_OUT_ATTRIBUTE_CHECK1("IBMVoiceGatewaySource", "speech"), - $model->getContext()->getSourceLocation()); - } - - if (!(defined($audioOutputAsBlob))) { - SPL::CodeGen::exitln(SttGatewayResource::STTGW_OUT_ATTRIBUTE_TYPE_CHECK1("IBMVoiceGatewaySource", "speech", "blob"), - $model->getContext()->getSourceLocation()); - } - - # Check the output port number 1 i.e. the second output port. - my $outputPort2 = $model->getOutputPortAt(1); - my $outputTupleName2 = $outputPort2->getCppTupleName(); - my $vgwSessionIdAsString = undef; - my $outputAttrs2 = $outputPort2->getAttributes(); - my $vgwSessionIdAttributeFound = 0; - my $isCustomerSpeechDataAsBoolean = undef; - my $isCustomerSpeechDataAttributeFound = 0; - my $vgwVoiceChannelNumberAsInt32 = undef; - my $vgwVoiceChannelNumberAttributeFound = 0; - - foreach my $outputAttr2 (@$outputAttrs2) { - my $outAttrName2 = $outputAttr2->getName(); - my $outAttrType2 = $outputAttr2->getSPLType(); - if ($outAttrName2 eq "vgwSessionId") { + if ($outAttrName eq "vgwSessionId") { $vgwSessionIdAttributeFound = 1; - if ($outAttrType2 eq "rstring") { + if ($outAttrType eq "rstring") { # This tuple attribute will carry the Voice Gateway Session Id. $vgwSessionIdAsString = 1; } } - if ($outAttrName2 eq "isCustomerSpeechData") { + if ($outAttrName eq "isCustomerSpeechData") { $isCustomerSpeechDataAttributeFound = 1; - if ($outAttrType2 eq "boolean") { + if ($outAttrType eq "boolean") { # This tuple attribute will indicate whether the # given channel of a given voice call carried the # speech data of a customer or an agent. @@ -147,17 +130,38 @@ using websocketpp::lib::bind; } } - if ($outAttrName2 eq "vgwVoiceChannelNumber") { + if ($outAttrName eq "vgwVoiceChannelNumber") { $vgwVoiceChannelNumberAttributeFound = 1; - if ($outAttrType2 eq "int32") { + if ($outAttrType eq "int32") { # This tuple attribute will indicate the # channel number of given voice call. $vgwVoiceChannelNumberAsInt32 = 1; } } + + if ($outAttrName eq "endOfCallSignal") { + $endOfCallSignalAttributeFound = 1; + + if ($outAttrType eq "boolean") { + # This tuple attribute will indicate whether the + # given channel of a given voice call has ended + # sending speech data by closing its WebSocket connection. + $endOfCallSignalAsBoolean = 1; + } + } + } + + if ($speechAttributeFound == 0 ) { + SPL::CodeGen::exitln(SttGatewayResource::STTGW_OUT_ATTRIBUTE_CHECK1("IBMVoiceGatewaySource", "speech"), + $model->getContext()->getSourceLocation()); } + if (!(defined($audioOutputAsBlob))) { + SPL::CodeGen::exitln(SttGatewayResource::STTGW_OUT_ATTRIBUTE_TYPE_CHECK1("IBMVoiceGatewaySource", "speech", "blob"), + $model->getContext()->getSourceLocation()); + } + if ($vgwSessionIdAttributeFound == 0 ) { SPL::CodeGen::exitln(SttGatewayResource::STTGW_OUT_ATTRIBUTE_CHECK2("IBMVoiceGatewaySource", "vgwSessionId"), $model->getContext()->getSourceLocation()); @@ -187,6 +191,16 @@ using websocketpp::lib::bind; SPL::CodeGen::exitln(SttGatewayResource::STTGW_OUT_ATTRIBUTE_TYPE_CHECK2("IBMVoiceGatewaySource", "vgwVoiceChannelNumber", "int32"), $model->getContext()->getSourceLocation()); } + + if ($endOfCallSignalAttributeFound == 0 ) { + SPL::CodeGen::exitln(SttGatewayResource::STTGW_OUT_ATTRIBUTE_CHECK2("IBMVoiceGatewaySource", "endOfCallSignal"), + $model->getContext()->getSourceLocation()); + } + + if (!(defined($endOfCallSignalAsBoolean))) { + SPL::CodeGen::exitln(SttGatewayResource::STTGW_OUT_ATTRIBUTE_TYPE_CHECK2("IBMVoiceGatewaySource", "endOfCallSignal", "boolean"), + $model->getContext()->getSourceLocation()); + } # Following are the operator parameters. my $tlsPort = $model->getParameterByName("tlsPort"); @@ -1035,11 +1049,12 @@ void MY_OPERATOR::on_message(EndpointType* s, websocketpp::connection_hdl hdl, if (vgwSessionIdFoundInMap == true) { // Send the "End of Voice Call" signal now for this // vgwSessionId_vgwVoiceChannelNumber combo. - OPort1Type oTuple; + OPort0Type oTuple; oTuple.set_vgwSessionId(con_metadata.vgwSessionId); oTuple.set_isCustomerSpeechData(con_metadata.vgwIsCaller); oTuple.set_vgwVoiceChannelNumber(con_metadata.vgwVoiceChannelNumber); - submit(oTuple, 1); + oTuple.set_endOfCallSignal(true); + submit(oTuple, 0); if (vgwSessionLoggingNeeded == true) { SPLAPPTRC(L_ERROR, "Operator " << operatorPhysicalName << @@ -1135,17 +1150,19 @@ void MY_OPERATOR::on_message(EndpointType* s, websocketpp::connection_hdl hdl, // vgwSessionId_vgwVoiceChannelNumber combo. // Send it for voice channel 1 which is an // agent channel most of the time. - OPort1Type oTuple; + OPort0Type oTuple; oTuple.set_vgwSessionId(*it); oTuple.set_isCustomerSpeechData(false); oTuple.set_vgwVoiceChannelNumber(1); - submit(oTuple, 1); + oTuple.set_endOfCallSignal(true); + submit(oTuple, 0); // Do the same for voice channel 2 which is a // customer channel most of the time. oTuple.set_vgwSessionId(*it); oTuple.set_isCustomerSpeechData(true); oTuple.set_vgwVoiceChannelNumber(2); - submit(oTuple, 1); + oTuple.set_endOfCallSignal(true); + submit(oTuple, 0); // We have a map where the agent and caller phone numbers of a given // call session id are stored. Since this call has gone stale, @@ -1205,11 +1222,12 @@ void MY_OPERATOR::on_message(EndpointType* s, websocketpp::connection_hdl hdl, // do its own clean-up and release of the STT engines. // Send the "End of Voice Call" signal now for this // vgwSessionId_vgwVoiceChannelNumber combo. - OPort1Type oTuple; + OPort0Type oTuple; oTuple.set_vgwSessionId(cmd.vgwSessionId); oTuple.set_isCustomerSpeechData(cmd.vgwIsCaller); oTuple.set_vgwVoiceChannelNumber(cmd.vgwVoiceChannelNumber); - submit(oTuple, 1); + oTuple.set_endOfCallSignal(true); + submit(oTuple, 0); // Added this logic on Sep/04/2020. // We have a map where the agent and caller phone numbers of a given @@ -1368,6 +1386,7 @@ void MY_OPERATOR::on_message(EndpointType* s, websocketpp::connection_hdl hdl, speechBlob.setData((unsigned char*)payloadBuffer, (uint64_t)payloadSize); OPort0Type oTuple; oTuple.set_speech(speechBlob); + oTuple.set_endOfCallSignal(false); // Now let us set any attributes that the caller of this operator is trying to // assign through this operator's output functions. @@ -1565,11 +1584,12 @@ void MY_OPERATOR::on_close(websocketpp::connection_hdl hdl) { if (vgwSessionIdFoundInMap == true && con_metadata.vgwVoiceChannelNumber > 0) { // Send the "End of Voice Call" signal now for this // vgwSessionId_vgwVoiceChannelNumber combo. - OPort1Type oTuple; + OPort0Type oTuple; oTuple.set_vgwSessionId(con_metadata.vgwSessionId); oTuple.set_isCustomerSpeechData(con_metadata.vgwIsCaller); oTuple.set_vgwVoiceChannelNumber(con_metadata.vgwVoiceChannelNumber); - submit(oTuple, 1); + oTuple.set_endOfCallSignal(true); + submit(oTuple, 0); if (vgwSessionLoggingNeeded == true) { SPLAPPTRC(L_ERROR, "Operator " << operatorPhysicalName << @@ -1688,17 +1708,19 @@ void MY_OPERATOR::on_close(websocketpp::connection_hdl hdl) { // vgwSessionId_vgwVoiceChannelNumber combo. // Send it for voice channel 1 which is an // agent channel most of the time. - OPort1Type oTuple; + OPort0Type oTuple; oTuple.set_vgwSessionId(*it); oTuple.set_isCustomerSpeechData(false); oTuple.set_vgwVoiceChannelNumber(1); - submit(oTuple, 1); + oTuple.set_endOfCallSignal(true); + submit(oTuple, 0); // Do the same for voice channel 2 which is a // customer channel most of the time. oTuple.set_vgwSessionId(*it); oTuple.set_isCustomerSpeechData(true); oTuple.set_vgwVoiceChannelNumber(2); - submit(oTuple, 1); + oTuple.set_endOfCallSignal(true); + submit(oTuple, 0); // We have a map where the agent and caller phone numbers of a given // call session id are stored. Since this call has gone stale, @@ -1758,11 +1780,12 @@ void MY_OPERATOR::on_close(websocketpp::connection_hdl hdl) { // do its own clean-up and release of the STT engines. // Send the "End of Voice Call" signal now for this // vgwSessionId_vgwVoiceChannelNumber combo. - OPort1Type oTuple; + OPort0Type oTuple; oTuple.set_vgwSessionId(cmd.vgwSessionId); oTuple.set_isCustomerSpeechData(cmd.vgwIsCaller); oTuple.set_vgwVoiceChannelNumber(cmd.vgwVoiceChannelNumber); - submit(oTuple, 1); + oTuple.set_endOfCallSignal(true); + submit(oTuple, 0); // Added this logic on Sep/04/2020. // We have a map where the agent and caller phone numbers of a given diff --git a/com.ibm.streamsx.sttgateway/info.xml b/com.ibm.streamsx.sttgateway/info.xml index ef843f9..ff6cd22 100644 --- a/com.ibm.streamsx.sttgateway/info.xml +++ b/com.ibm.streamsx.sttgateway/info.xml @@ -14,7 +14,7 @@ **Note:** This toolkit requires c++11 support. - 2.2.8 + 2.2.9 4.2.1.6 diff --git a/samples/STTGatewayUtils/com.ibm.streamsx.sttgateway.utils/STTGatewayUtils.spl b/samples/STTGatewayUtils/com.ibm.streamsx.sttgateway.utils/STTGatewayUtils.spl index 461566a..8381ee1 100644 --- a/samples/STTGatewayUtils/com.ibm.streamsx.sttgateway.utils/STTGatewayUtils.spl +++ b/samples/STTGatewayUtils/com.ibm.streamsx.sttgateway.utils/STTGatewayUtils.spl @@ -1,14 +1,14 @@ /* ============================================== # Licensed Materials - Property of IBM -# Copyright IBM Corp. 2018, 2020 +# Copyright IBM Corp. 2018, 2021 ============================================== */ /* ============================================== First created on: Nov/24/2020 -Last modified on: Nov/26/2020 +Last modified on: Feb/09/2021 This is a utility composite that will get used in the following applications. @@ -25,7 +25,7 @@ namespace com.ibm.streamsx.sttgateway.utils; // Code for the C++ native functions can be found in the impl/include directory of this project. // // The following is the schema of the first output stream for the -// IBMVoiceGatewaySource operator. The first four attributes are +// IBMVoiceGatewaySource operator. The first five attributes are // very important and the other ones are purely optional if some // scenarios really require them. // blob speech --> Speech fragments of a live conversation as captured and sent by the IBM Voice Gateway. @@ -36,6 +36,9 @@ namespace com.ibm.streamsx.sttgateway.utils; // Whoever (caller or agent) sends the first round of // speech data bytes will get assigned a voice channel of 1. // The next one to follow will get assigned a voice channel of 2. +// boolean endOfCallSignal --> This attribute will be set to true by the IBMVoiceGatewaySource +// operator when it sends an EOCS for a voice channel. It will be +// set to false by that operator when it sends binary speech data. // rstring id --> This attribute is needed by the WatsonS2T operator. // It is set to vgwSessionId_vgwVoiceChannelNumber // rstring callStartDateTime --> Call start date time i.e. system clock time. @@ -47,18 +50,11 @@ namespace com.ibm.streamsx.sttgateway.utils; // int32 speechEngineId --> This attribute will be set in the speech processor job. (Please, read the comments there.) // int32 speechResultProcessorId --> This attribute will be set in the speech processor job. (Please, read the comments there.) type BinarySpeech_t = blob speech, rstring vgwSessionId, boolean isCustomerSpeechData, - int32 vgwVoiceChannelNumber, rstring id, rstring callStartDateTime, + int32 vgwVoiceChannelNumber, boolean endOfCallSignal, + rstring id, rstring callStartDateTime, rstring callerPhoneNumber, rstring agentPhoneNumber, int32 speechDataFragmentCnt, int32 totalSpeechDataBytesReceived, int32 speechProcessorId, int32 speechEngineId, int32 speechResultProcessorId; -// The following schema is for the second output stream of the -// IBMVoiceGatewaySource operator. It has three attributes indicating -// the speaker channel (vgwVoiceChannelNumber) of a given voice call (vgwSessionId) who -// got completed with the call as well as an indicator (isCustomerSpeechData) to -// denote whether the speech data we received on this channel belonged -// to a caller or an agent. -type EndOfCallSignal_t = rstring vgwSessionId, - boolean isCustomerSpeechData, int32 vgwVoiceChannelNumber; // The following schema will be for the data being sent here by the // VgwDataRouter application. It sends us raw binary data which diff --git a/samples/STTGatewayUtils/info.xml b/samples/STTGatewayUtils/info.xml index 8e3fb5f..142e903 100644 --- a/samples/STTGatewayUtils/info.xml +++ b/samples/STTGatewayUtils/info.xml @@ -4,7 +4,7 @@ STTGatewayUtils A utility composite used by the VgwDataRouter and the speech processor applications. - 1.0.0 + 1.0.1 4.2.1.6 diff --git a/samples/VgwDataRouter/com.ibm.streamsx.sttgateway.sample/VgwDataRouter.spl b/samples/VgwDataRouter/com.ibm.streamsx.sttgateway.sample/VgwDataRouter.spl index 8ae86db..d70db5e 100644 --- a/samples/VgwDataRouter/com.ibm.streamsx.sttgateway.sample/VgwDataRouter.spl +++ b/samples/VgwDataRouter/com.ibm.streamsx.sttgateway.sample/VgwDataRouter.spl @@ -8,7 +8,7 @@ /* ============================================== First created on: Nov/24/2020 -Last modified on: Feb/02/2021 +Last modified on: Feb/10/2021 A) What does this example application do? -------------------------------------- @@ -431,8 +431,7 @@ public composite VgwDataRouter { // your own needs of further analytics on the Speech results as well as // specific ways of delivering the Speech results to other // downstream systems rather than only writing to files as this example does below. - (stream BinarySpeechData as BSD; - stream EndOfCallSignal as EOCS) as VgwDataRouterSource = + (stream BinarySpeechData as BSD) as VgwDataRouterSource = IBMVoiceGatewaySource() { logic state: { @@ -504,15 +503,13 @@ public composite VgwDataRouter { // real-time voice calls. // @parallel(width=$numberOfCallReplayEngines) - (stream PreRecordedBinarySpeechData; - stream PreRecordedEndOfCallSignal) as + (stream PreRecordedBinarySpeechData) as VoiceCallReplayer = CallRecordingReplay(CallReplaySignalFileName) { param callRecordingReadDirectory: $callRecordingReadDirectory; // Pass these stream types as composite operator parameters. callMetaData_t: CallMetaData_t; binarySpeech_t: BinarySpeech_t; - endOfCallSignal_t: EndOfCallSignal_t; } // We have to always route the speech data bytes (fragments) coming from @@ -536,8 +533,7 @@ public composite VgwDataRouter { // That special speech data distribution logic happens inside this operator. (stream CallDataForSpeechProcessor as CDFSP) as VoiceCallDataRouter = Custom( - BinarySpeechData, PreRecordedBinarySpeechData as BSD; - EndOfCallSignal, PreRecordedEndOfCallSignal as EOCS) { + BinarySpeechData, PreRecordedBinarySpeechData as BSD) { logic state: { // This variable tells us how many total concurrent calls can be @@ -569,96 +565,59 @@ public composite VgwDataRouter { // Get the sessionId + channelNumber combo string. _key = BSD.vgwSessionId + "_" + (rstring)BSD.vgwVoiceChannelNumber; - // We have to first check if this speech data belongs to a - // brand new voice call or an already ongoing voice call. - if(has(_vgwSessionIdToSpeechProcessorMap, BSD.vgwSessionId) == false) { - // This is a brand new voice call. Get a speech processor id to - // send the speech data belonging to this call. - // Store this VGW session id to change the status of this - // voice call from "brand new" to "ongoing". - int32 speechProcessorId = - getSpeechProcessorIdForNewCallProcessing( - _numberOfConcurrentCallsAllowedPerSpeechProcessor, - _speechProcessorStatusList); - - if(speechProcessorId == -1) { - // This condition should not happen as long as there are enough - // number of speech processors with more than sufficient number of - // speech engines configured to run in them. - appTrc(Trace.error, - "_XXXXX No speech processor job is available at this time for the " + - "vgwSessionId_vgwVoiceChannelNumber: " + _key + - " We are not going to process the currently received speech data bytes" + - " of this speaker in this voice call." + - " Please start sufficient number of speech processor jobs " + - " next time to handle your maximum expected concurrent calls." + - " A rule of thumb is to have two S2T engines to process" + - " two speakers in every given concurrent voice call."); - return; + // We will get the regular binary speech data and the End Of Call Signal (EOCS) in + // the same input stream. This design change was done on Feb/09/2021 to avoid any + // any port locks and/or tuple ordering issues that may happen if we choose to + // do it using two different output ports. The incoming tuple has an attribute + // that is set to true or false by the IBMVoiceGatewaySource operator to indicate + // whether it is sending binary speech data or an EOCS. + if(BSD.endOfCallSignal == false) { + // The incoming tuple contains binary speech data. + // + // We have to first check if this speech data belongs to a + // brand new voice call or an already ongoing voice call. + if(has(_vgwSessionIdToSpeechProcessorMap, BSD.vgwSessionId) == false) { + // This is a brand new voice call. Get a speech processor id to + // send the speech data belonging to this call. + // Store this VGW session id to change the status of this + // voice call from "brand new" to "ongoing". + int32 speechProcessorId = + getSpeechProcessorIdForNewCallProcessing( + _numberOfConcurrentCallsAllowedPerSpeechProcessor, + _speechProcessorStatusList); + + if(speechProcessorId == -1) { + // This condition should not happen as long as there are enough + // number of speech processors with more than sufficient number of + // speech engines configured to run in them. + appTrc(Trace.error, + "_XXXXX No speech processor job is available at this time for the " + + "vgwSessionId_vgwVoiceChannelNumber: " + _key + + " We are not going to process the currently received speech data bytes" + + " of this speaker in this voice call." + + " Please start sufficient number of speech processor jobs " + + " next time to handle your maximum expected concurrent calls." + + " A rule of thumb is to have two S2T engines to process" + + " two speakers in every given concurrent voice call."); + return; + } + + // Let us store the speech processor id of this call for + // future use as the speech data keeps coming. + insertM(_vgwSessionIdToSpeechProcessorMap, BSD.vgwSessionId, speechProcessorId); + appTrc(Trace.error, "A new call with vgwSessionId=" + BSD.vgwSessionId + + " is being assigned to speech processor id " + (rstring)speechProcessorId); } - // Let us store the speech processor id of this call for - // future use as the speech data keeps coming. - insertM(_vgwSessionIdToSpeechProcessorMap, BSD.vgwSessionId, speechProcessorId); - appTrc(Trace.error, "A new call with vgwSessionId=" + BSD.vgwSessionId + - " is being assigned to speech processor id " + (rstring)speechProcessorId); - } - - // We can prepare this speech data to be sent to the chosen speech processor id. - // We will do double serialization of the speech data as shown below. - // - // 1) Let us serialize the received speech data tuple. - // msgType = 1 means that it carries the binary speech data tuple. - _serializedTuple.msgType = 1; - clearM(_serializedTuple.payload); - // Call a native function to do the Tuple-->Blob conversion. - serializeTuple(BSD, _serializedTuple.payload); - - // 2) Let us now serialize it one more time to be sent as the final tuple. - _oTuple.strData = ""; - // We can tell the downstream WebSocketSink operator about to which - // speech processor id our serialized data should be sent. - // That is done by specifying the URL context path of the WebSocketSink - // operator where a selected speech processor has connected to. - // As shown in the param section of the downstream sink operator, - // we have configured it to allow URL context path such as "1","2","3" and so on. - // With this arrangement, remote speech processor jobs can connect to our - // WebSocketSink via a distinct URL that carries their respective speech processor ids. - _oTuple.sendToUrlContextPaths = - [(rstring)_vgwSessionIdToSpeechProcessorMap[BSD.vgwSessionId]]; - serializeTuple(_serializedTuple, _oTuple.blobData); - submit(_oTuple, CDFSP); - } // End of onTuple BSD - - // Process the end of voice call signal. - // Since there are two channels in every voice call, - // those two channels will carry their own "End STT session" - // message from the Voice Gateway. The logic below takes care of - // handling two End of Call Signals for every voice call. - onTuple EOCS: { - _key = EOCS.vgwSessionId + "_" + (rstring)EOCS.vgwVoiceChannelNumber; - - // Get the allocated speech processor id for a given vgwSessionId. - // We should always have a speech processor id. If not, that is a - // case where the user didn't provision sufficient number of - // Speech engines and there was no idle speech processor available for that given vgwSessionId. - // This situation can be avoided by starting the application with a - // sufficient number of speech processors along with sufficient - // speech engines needed for the anticipated maximum concurrent voice calls. - // A rule of thumb is to have two speech engines to process - // two speakers in every given concurrent voice call. - // - if (has(_vgwSessionIdToSpeechProcessorMap, EOCS.vgwSessionId) == true) { - int32 speechProcessorId = - _vgwSessionIdToSpeechProcessorMap[EOCS.vgwSessionId]; - // Let us send the EOCS tuple to the chosen speech processor id. - // We will do double serialization of the data as shown below. + // We can prepare this speech data to be sent to the chosen speech processor id. + // We will do double serialization of the speech data as shown below. + // // 1) Let us serialize the received speech data tuple. - // msgType = 2 means that it carries the EOCS tuple. - _serializedTuple.msgType = 2; + // msgType = 1 means that it carries the binary speech data tuple. + _serializedTuple.msgType = 1; clearM(_serializedTuple.payload); // Call a native function to do the Tuple-->Blob conversion. - serializeTuple(EOCS, _serializedTuple.payload); + serializeTuple(BSD, _serializedTuple.payload); // 2) Let us now serialize it one more time to be sent as the final tuple. _oTuple.strData = ""; @@ -670,94 +629,140 @@ public composite VgwDataRouter { // we have configured it to allow URL context path such as "1","2","3" and so on. // With this arrangement, remote speech processor jobs can connect to our // WebSocketSink via a distinct URL that carries their respective speech processor ids. - _oTuple.sendToUrlContextPaths = [(rstring)speechProcessorId]; + _oTuple.sendToUrlContextPaths = + [(rstring)_vgwSessionIdToSpeechProcessorMap[BSD.vgwSessionId]]; serializeTuple(_serializedTuple, _oTuple.blobData); - submit(_oTuple, CDFSP); - // Add the _key to the call completed list for the speech processor id to be - // released later in the following if block only after receiving EOCS for - // both the voice channels of this call. - insertM(_vgwSessionVgwVoiceChannelNumberCompletedMap, _key, true); - - rstring key1 = EOCS.vgwSessionId + "_" + "1"; - rstring key2 = EOCS.vgwSessionId + "_" + "2"; - // Since this voice call is ending, let us release the speech processor id - // that was allocated above for this voice call. - // Remove the speech processor id only if the EOCS signal - // was sent for both of the voice channels. That must first - // happen before we can release the speech processor id. - boolean key1Exists = has(_vgwSessionVgwVoiceChannelNumberCompletedMap, key1); - boolean key2Exists = has(_vgwSessionVgwVoiceChannelNumberCompletedMap, key2); - - if ($numberOfEocsNeededForVoiceCallCompletion == 2 && - (key1Exists == true && key2Exists == true)) { - // Since the voice call for this VGW session id has ended completely, - // we can also release the speech processor id assigned for this call so that - // it can be repurposed for handling any new future calls. - // We can go ahead and release the speech processor id by adding it back to - // the speech processor status list. - // Let us decrement the given speech processor's current call handling count. - // It is a zero based indexed array. Hence, we have to subtract by 1 to get the - // current index in that array. - _speechProcessorStatusList[speechProcessorId-1] = - _speechProcessorStatusList[speechProcessorId-1] - 1; + submit(_oTuple, CDFSP); + } else { + //The incoming tuple contains an End of Call Signal (EOCS). + // + // Process the end of voice call signal. + // Since there are two channels in every voice call, + // those two channels will carry their own "End STT session" + // message from the Voice Gateway. The logic below takes care of + // handling two End of Call Signals for every voice call. + // + // Get the allocated speech processor id for a given vgwSessionId. + // We should always have a speech processor id. If not, that is a + // case where the user didn't provision sufficient number of + // Speech engines and there was no idle speech processor available for that given vgwSessionId. + // This situation can be avoided by starting the application with a + // sufficient number of speech processors along with sufficient + // speech engines needed for the anticipated maximum concurrent voice calls. + // A rule of thumb is to have two speech engines to process + // two speakers in every given concurrent voice call. + // + if (has(_vgwSessionIdToSpeechProcessorMap, BSD.vgwSessionId) == true) { + int32 speechProcessorId = + _vgwSessionIdToSpeechProcessorMap[BSD.vgwSessionId]; + // Let us send the EOCS to the chosen speech processor id. + // We will do double serialization of the data as shown below. + // 1) Let us serialize the received speech data tuple. + // msgType = 2 means that it carries the EOCS tuple. + _serializedTuple.msgType = 2; + clearM(_serializedTuple.payload); + // Call a native function to do the Tuple-->Blob conversion. + serializeTuple(BSD, _serializedTuple.payload); - // We can now do the clean-up in our state variables. - removeM(_vgwSessionIdToSpeechProcessorMap, EOCS.vgwSessionId); - removeM(_vgwSessionVgwVoiceChannelNumberCompletedMap, key1); - removeM(_vgwSessionVgwVoiceChannelNumberCompletedMap, key2); - appTrc(Trace.error, "i) A call with vgwSessionId=" + EOCS.vgwSessionId + - " ended and its speech processor id " + (rstring)speechProcessorId + - " got released."); - } else if ($numberOfEocsNeededForVoiceCallCompletion == 1 && - (key1Exists == true || key2Exists == true)) { - // If the user configured this application to handle - // a single EOCS as sufficient to consider a voice call - // completed for a given VGW session id, we will use this - // block of code. Please refer to the constant i.e. expression - // declaration section above to read the commentary about this idea. - // - // Since the voice call for this VGW session id has ended completely, - // we can also release the speech processor id assigned for this call so that - // it can be repurposed for handling any new future calls. - // We can go ahead and release the speech processor id by adding it back to - // the speech processor status list. - // Let us decrement the given speech processor's current call handling count. - // It is a zero based indexed array. Hence, we have to subtract by 1 to get the - // current index in that array. - _speechProcessorStatusList[speechProcessorId-1] = - _speechProcessorStatusList[speechProcessorId-1] - 1; + // 2) Let us now serialize it one more time to be sent as the final tuple. + _oTuple.strData = ""; + // We can tell the downstream WebSocketSink operator about to which + // speech processor id our serialized data should be sent. + // That is done by specifying the URL context path of the WebSocketSink + // operator where a selected speech processor has connected to. + // As shown in the param section of the downstream sink operator, + // we have configured it to allow URL context path such as "1","2","3" and so on. + // With this arrangement, remote speech processor jobs can connect to our + // WebSocketSink via a distinct URL that carries their respective speech processor ids. + _oTuple.sendToUrlContextPaths = [(rstring)speechProcessorId]; + serializeTuple(_serializedTuple, _oTuple.blobData); + submit(_oTuple, CDFSP); + // Add the _key to the call completed list for the speech processor id to be + // released later in the following if block only after receiving EOCS for + // both the voice channels of this call. + insertM(_vgwSessionVgwVoiceChannelNumberCompletedMap, _key, true); - // We can now do the clean-up in our state variables. - removeM(_vgwSessionIdToSpeechProcessorMap, EOCS.vgwSessionId); - - if(key1Exists == true) { - removeM(_vgwSessionVgwVoiceChannelNumberCompletedMap, key1); - } + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; + // Since this voice call is ending, let us release the speech processor id + // that was allocated above for this voice call. + // Remove the speech processor id only if the EOCS signal + // was sent for both of the voice channels. That must first + // happen before we can release the speech processor id. + boolean key1Exists = has(_vgwSessionVgwVoiceChannelNumberCompletedMap, key1); + boolean key2Exists = has(_vgwSessionVgwVoiceChannelNumberCompletedMap, key2); - if(key2Exists == true) { + if ($numberOfEocsNeededForVoiceCallCompletion == 2 && + (key1Exists == true && key2Exists == true)) { + // Since the voice call for this VGW session id has ended completely, + // we can also release the speech processor id assigned for this call so that + // it can be repurposed for handling any new future calls. + // We can go ahead and release the speech processor id by adding it back to + // the speech processor status list. + // Let us decrement the given speech processor's current call handling count. + // It is a zero based indexed array. Hence, we have to subtract by 1 to get the + // current index in that array. + _speechProcessorStatusList[speechProcessorId-1] = + _speechProcessorStatusList[speechProcessorId-1] - 1; + + // We can now do the clean-up in our state variables. + removeM(_vgwSessionIdToSpeechProcessorMap, BSD.vgwSessionId); + removeM(_vgwSessionVgwVoiceChannelNumberCompletedMap, key1); removeM(_vgwSessionVgwVoiceChannelNumberCompletedMap, key2); + appTrc(Trace.error, "i) A call with vgwSessionId=" + BSD.vgwSessionId + + " ended and its speech processor id " + (rstring)speechProcessorId + + " got released."); + } else if ($numberOfEocsNeededForVoiceCallCompletion == 1 && + (key1Exists == true || key2Exists == true)) { + // If the user configured this application to handle + // a single EOCS as sufficient to consider a voice call + // completed for a given VGW session id, we will use this + // block of code. Please refer to the constant i.e. expression + // declaration section above to read the commentary about this idea. + // + // Since the voice call for this VGW session id has ended completely, + // we can also release the speech processor id assigned for this call so that + // it can be repurposed for handling any new future calls. + // We can go ahead and release the speech processor id by adding it back to + // the speech processor status list. + // Let us decrement the given speech processor's current call handling count. + // It is a zero based indexed array. Hence, we have to subtract by 1 to get the + // current index in that array. + _speechProcessorStatusList[speechProcessorId-1] = + _speechProcessorStatusList[speechProcessorId-1] - 1; + + // We can now do the clean-up in our state variables. + removeM(_vgwSessionIdToSpeechProcessorMap, BSD.vgwSessionId); + + if(key1Exists == true) { + removeM(_vgwSessionVgwVoiceChannelNumberCompletedMap, key1); + } + + if(key2Exists == true) { + removeM(_vgwSessionVgwVoiceChannelNumberCompletedMap, key2); + } + + appTrc(Trace.error, "ii) A call with vgwSessionId=" + BSD.vgwSessionId + + " ended and its speech processor id " + (rstring)speechProcessorId + + " got released."); } - - appTrc(Trace.error, "ii) A call with vgwSessionId=" + EOCS.vgwSessionId + - " ended and its speech processor id " + (rstring)speechProcessorId + - " got released."); - } - } else { - // Flag an error only when the user configured for two - // EOCS tuples to be received for considering a voice call - // as complted. - if ($numberOfEocsNeededForVoiceCallCompletion == 2) { - appTrc(Trace.error, - "_YYYYY No speech processor id is available at this time for the " + - "vgwSessionId_vgwVoiceChannelNumber: " + _key + - " We are not going to process the currently received EOCS " + - " of this speaker in this voice call. This is a serious error."); - } - } - } // End of onTuple EOCS. - + } else { + // Flag an error only when the user configured for two + // EOCS tuples to be received for considering a voice call + // as complted. + if ($numberOfEocsNeededForVoiceCallCompletion == 2) { + appTrc(Trace.error, + "_YYYYY No speech processor id is available at this time for the " + + "vgwSessionId_vgwVoiceChannelNumber: " + _key + + " We are not going to process the currently received EOCS " + + " of this speaker in this voice call. This is a serious error."); + } + } + } // End of if(BSD.endOfCallSignal == false) + } // End of onTuple BSD + config - threadedPort: queue(BSD, Sys.Wait), queue(EOCS, Sys.Wait); + threadedPort: queue(BSD, Sys.Wait); } // End of Custom operator. // Invoke one or more instances of the WebSocketSink operator. @@ -828,14 +833,13 @@ public composite VgwDataRouter { // parallel region for the purpose of load testing by // replaying many pre-recorded voice calls at the same time. public composite CallRecordingReplay(input CallReplaySignalFileNameIn; - output PreRecordedBinarySpeechData, PreRecordedEndOfCallSignal) { + output PreRecordedBinarySpeechData) { param expression $callRecordingReadDirectory; // This composite operator receives externally // defined stream types via operator parameters. type $callMetaData_t; type $binarySpeech_t; - type $endOfCallSignal_t; // Replaying the pre-recorded voice calls. // The graph below will perform the logic necessary to @@ -1068,7 +1072,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // mix both of them and send out a tuple for transcription by // downstream operators. (stream<$binarySpeech_t> PreRecordedBinarySpeechData as PRBSD; - stream<$endOfCallSignal_t> PreRecordedEndOfCallSignal as PREOCS; stream Acknowledgement as Ack) as PreRecordedCallReplayer = Custom(CallMetaDataVC1, CallMetaDataVC2 as CMD; CallSpeechDataVC1, CallSpeechDataVC2 as CSD; TimerSignal as TS) { @@ -1094,7 +1097,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // waiting for data to be read from the pre-recorded files. mutable rstring lastObservedReplayMapKey = "abcxyz"; mutable $binarySpeech_t _oTuple1 = {}; - mutable $endOfCallSignal_t _oTuple2 = {}; } onTuple CMD: { @@ -1186,8 +1188,10 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // We have the call meta data. We can create a new // binary speech data tuple now and send it out for transcription. // Copy all the call meta data attributes to the outgoing tuple. + _oTuple1 = ($binarySpeech_t){}; assignFrom(_oTuple1, _callMetaDataMap[key]); _oTuple1.speech = CSD.speech; + _oTuple1.endOfCallSignal = false; _oTuple1.speechDataFragmentCnt = _speechDataFragmentCount[key]; _oTuple1.totalSpeechDataBytesReceived = _speechDataBytesCount[key]; submit(_oTuple1, PRBSD); @@ -1219,12 +1223,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // Send two EOCS signals one for each voice channel in the given call. for (rstring str in _callMetaDataMap) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } // Clear all the state maps to be ready for the @@ -1288,12 +1294,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // we are required to send an EOCS for that voice channel. for (rstring str in _callMetaDataMap) { if (_speechDataTuplesSentCount[str] > 0) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } } diff --git a/samples/VgwDataRouter/data/.gitignore b/samples/VgwDataRouter/data/.gitignore new file mode 100644 index 0000000..4cf1d1c --- /dev/null +++ b/samples/VgwDataRouter/data/.gitignore @@ -0,0 +1,8 @@ +# This directory is empty with no files at the time of committing it to the GitHub. + +# When the users clone this toolkit repository or download a release package from this repository, they will create files into their copy of this folder for compiling, testing and application bundle packaging purposes. So, they will need this empty directory to be present as part of this toolkit. + +# Having this .gitkeep file here will force the git add, git commit and git push commands to keep this empty folder in the remote repository instead of simply ignoring it. + +# ignore all content of this directory +/* diff --git a/samples/VgwDataRouter/info.xml b/samples/VgwDataRouter/info.xml index 7fc9775..5f607be 100644 --- a/samples/VgwDataRouter/info.xml +++ b/samples/VgwDataRouter/info.xml @@ -4,13 +4,13 @@ VgwDataRouter Example that shows how to route VGW speech data to different Speech processor jobs - 1.0.1 + 1.0.2 4.2.1.6 com.ibm.streamsx.sttgateway - [2.2.5,7.0.0] + [2.2.9,7.0.0] com.ibm.streamsx.websocket @@ -18,7 +18,7 @@ STTGatewayUtils - [1.0.0,7.0.0] + [1.0.1,7.0.0] \ No newline at end of file diff --git a/samples/VgwDataRouterToWatsonS2T/com.ibm.streamsx.sttgateway.sample.watsons2t/VgwDataRouterToWatsonS2T.spl b/samples/VgwDataRouterToWatsonS2T/com.ibm.streamsx.sttgateway.sample.watsons2t/VgwDataRouterToWatsonS2T.spl index 0f107e4..659b01d 100644 --- a/samples/VgwDataRouterToWatsonS2T/com.ibm.streamsx.sttgateway.sample.watsons2t/VgwDataRouterToWatsonS2T.spl +++ b/samples/VgwDataRouterToWatsonS2T/com.ibm.streamsx.sttgateway.sample.watsons2t/VgwDataRouterToWatsonS2T.spl @@ -8,7 +8,7 @@ /* ============================================== First created on: Nov/24/2020 -Last modified on: Feb/02/2021 +Last modified on: Feb/10/2021 IMPORTANT NOTE -------------- @@ -657,8 +657,7 @@ public composite VgwDataRouterToWatsonS2T { // We can now (twice) deserialize the data received from the VGW data router application to // get the actual speech data or the EOCS (End Of Call Signal). - (stream BinarySpeechData as BSD; - stream EndOfCallSignal as EOCS) as VgwDataParser = + (stream BinarySpeechData as BSD) as VgwDataParser = Custom(ReceivedData as RD) { logic onTuple RD: { @@ -680,9 +679,9 @@ public composite VgwDataRouterToWatsonS2T { submit(speechData, BSD); } else if(outerTuple.msgType == 2) { // This is End Of Call Signal (EOCS) sent by the Voice Gateway product. - mutable EndOfCallSignal_t eocs = (EndOfCallSignal_t){}; + mutable BinarySpeech_t eocs = (BinarySpeech_t){}; deserializeTuple(eocs, outerTuple.payload); - submit(eocs, EOCS); + submit(eocs, BSD); } else { // Unsupported message type received. appTrc(Trace.error, "Unsupported message type " + @@ -725,15 +724,13 @@ public composite VgwDataRouterToWatsonS2T { // real-time voice calls. // @parallel(width=$numberOfCallReplayEngines) - (stream PreRecordedBinarySpeechData; - stream PreRecordedEndOfCallSignal) as + (stream PreRecordedBinarySpeechData) as VoiceCallReplayer = CallRecordingReplay(CallReplaySignalFileName) { param callRecordingReadDirectory: $callRecordingReadDirectory; // Pass these stream types as composite operator parameters. callMetaData_t: CallMetaData_t; binarySpeech_t: BinarySpeech_t; - endOfCallSignal_t: EndOfCallSignal_t; } // We have to always route the speech data bytes (fragments) coming from @@ -757,8 +754,7 @@ public composite VgwDataRouterToWatsonS2T { // That special logic happens inside this operator. (stream BinarySpeechDataFragment as BSDF) as BinarySpeechDataRouter = Custom( - BinarySpeechData, PreRecordedBinarySpeechData as BSD; - EndOfCallSignal, PreRecordedEndOfCallSignal as EOCS) { + BinarySpeechData, PreRecordedBinarySpeechData as BSD) { logic state: { // This map tells us which UDP channel is processing a @@ -787,208 +783,173 @@ public composite VgwDataRouterToWatsonS2T { // Get the sessionId + channelNumber combo string. _key = BSD.vgwSessionId + "_" + (rstring)BSD.vgwVoiceChannelNumber; - // Check if this vgwSessionId_vgwVoiceChannelNumber combo already - // has an S2T engine allocated for it via an UDP channel. - if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // This is a speaker of an ongoing voice call who has - // already been assigned to an S2T engine. - // Always send this speaker's speech data fragment to - // that same S2T engine. - BSD.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; - // We can always assume that there is a preselected - // S2T result processor UDP channel available for this - // voice call (i.e. vgwSessionId). Because, it is already - // done in the else block below when this voice call's - // first speaker's speech data arrives here. - // Let us fetch and assign it here. - if (has(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId) == true) { - BSD.speechResultProcessorId = - _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; - } else { - // This should never happen since the call will end - // for both the speakers almost at the same time after - // which there will be no speech data from any of the - // speakers participating in a given voice call. - // This else block is just part of defensive coding. - appTrc(Trace.error, - "_XXXXX No S2T result processor engine available at this time for the " + - "vgwSessionId_vgwVoiceChannelNumber: " + _key + - ". This should be a rare occurrence towards the very end of the call." + - " We are not going to process the speech data bytes" + - " of this speaker in this voice call."); - return; - } - } else { - // If we are here, that means this is a brand new speaker of a - // voice call for whom we must find an idle UDP channel a.k.a - // an idle S2T engine that can process this speaker's speech data. - int32 mySpeechEngineId = getAnIdleUdpChannel(_idleUdpChannelsList); - - if (mySpeechEngineId == -1) { - // This is not good and we should never end up in this situation. - // This means we have not provisioned sufficient number of S2T engines to - // handle the maximum planned concurrent calls. We have to ignore this - // speech data fragment and hope that an idle UDP channel number will - // become available by the time the next speech data fragment for this - // particular vgwSessionId_vgwVoiceChannelNumber combo arrives here. - if (BSD.speechDataFragmentCnt == 1) { - // Display this alert only for the very first data fragment of a - // given speaker of a given voice call. - appTrc(Trace.error, "No idle S2T engine available at this time for the " + + // We will get the regular binary speech data and the End Of Call Signal (EOCS) in + // the same input stream. This design change was done on Feb/09/2021 to avoid any + // any port locks and/or tuple ordering issues that may happen if we choose to + // do it using two different output ports. The incoming tuple has an attribute + // that is set to true or false by the IBMVoiceGatewaySource operator to indicate + // whether it is sending binary speech data or an EOCS. + if(BSD.endOfCallSignal == false) { + // The incoming tuple contains binary speech data. + // + // Check if this vgwSessionId_vgwVoiceChannelNumber combo already + // has an S2T engine allocated for it via an UDP channel. + if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { + // This is a speaker of an ongoing voice call who has + // already been assigned to an S2T engine. + // Always send this speaker's speech data fragment to + // that same S2T engine. + BSD.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; + // We can always assume that there is a preselected + // S2T result processor UDP channel available for this + // voice call (i.e. vgwSessionId). Because, it is already + // done in the else block below when this voice call's + // first speaker's speech data arrives here. + // Let us fetch and assign it here. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == true) { + BSD.speechResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + } else { + // This should never happen since the call will end + // for both the speakers almost at the same time after + // which there will be no speech data from any of the + // speakers participating in a given voice call. + // This else block is just part of defensive coding. + appTrc(Trace.error, + "_XXXXX No S2T result processor engine available at this time for the " + "vgwSessionId_vgwVoiceChannelNumber: " + _key + - ". There are " + (rstring)$numberOfS2TEngines + - " S2T engines configured and they are all processing other" + - " voice calls at this time. Please start sufficient number of S2T engines" + - " next time to handle your maximum expected concurrent calls." + - " A rule of thumb is to have two S2T engines to process" + - " two speakers in every given concurrent voice call."); + ". This should be a rare occurrence towards the very end of the call." + + " We are not going to process the speech data bytes" + + " of this speaker in this voice call."); + return; } - - return; } else { - // We got an idle S2T engine. - BSD.speechEngineId = mySpeechEngineId; - - // If this call is just beginning, then we will create a - // tiny text file to indicate that we started receiving - // speech data from the IBM Voice Gateway for this new call. - rstring key1 = BSD.vgwSessionId + "_" + "1"; - rstring key2 = BSD.vgwSessionId + "_" + "2"; + // If we are here, that means this is a brand new speaker of a + // voice call for whom we must find an idle UDP channel a.k.a + // an idle S2T engine that can process this speaker's speech data. + int32 mySpeechEngineId = getAnIdleUdpChannel(_idleUdpChannelsList); - // If we have not yet created any entry in our state map for this call, - // then we can be sure that it is the start of this call. - if (has(_vgwSessionIdToUdpChannelMap, key1) == false && - has(_vgwSessionIdToUdpChannelMap, key2) == false) { - // We can now write a "Start of Call" indicator file in the - // application's data directory. e-g: 5362954-call-started.txt - mutable int32 err = 0ul; - rstring socsFileName = dataDirectory() + "/" + - BSD.vgwSessionId + "-call-started.txt"; - uint64 fileHandle = fopen (socsFileName, "w+", err); + if (mySpeechEngineId == -1) { + // This is not good and we should never end up in this situation. + // This means we have not provisioned sufficient number of S2T engines to + // handle the maximum planned concurrent calls. We have to ignore this + // speech data fragment and hope that an idle UDP channel number will + // become available by the time the next speech data fragment for this + // particular vgwSessionId_vgwVoiceChannelNumber combo arrives here. + if (BSD.speechDataFragmentCnt == 1) { + // Display this alert only for the very first data fragment of a + // given speaker of a given voice call. + appTrc(Trace.error, "No idle S2T engine available at this time for the " + + "vgwSessionId_vgwVoiceChannelNumber: " + _key + + ". There are " + (rstring)$numberOfS2TEngines + + " S2T engines configured and they are all processing other" + + " voice calls at this time. Please start sufficient number of S2T engines" + + " next time to handle your maximum expected concurrent calls." + + " A rule of thumb is to have two S2T engines to process" + + " two speakers in every given concurrent voice call."); + } + + return; + } else { + // We got an idle S2T engine. + BSD.speechEngineId = mySpeechEngineId; + + // If this call is just beginning, then we will create a + // tiny text file to indicate that we started receiving + // speech data from the IBM Voice Gateway for this new call. + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; - if(err == 0) { - fwriteString ("VGW call session id " + BSD.vgwSessionId + - " started at " + ctime(getTimestamp()) + ".", fileHandle, err); - fclose(fileHandle, err); + // If we have not yet created any entry in our state map for this call, + // then we can be sure that it is the start of this call. + if (has(_vgwSessionIdToUdpChannelMap, key1) == false && + has(_vgwSessionIdToUdpChannelMap, key2) == false) { + // We can now write a "Start of Call" indicator file in the + // application's data directory. e-g: 5362954-call-started.txt + mutable int32 err = 0ul; + rstring socsFileName = dataDirectory() + "/" + + BSD.vgwSessionId + "-call-started.txt"; + uint64 fileHandle = fopen (socsFileName, "w+", err); + + if(err == 0) { + fwriteString ("VGW call session id " + BSD.vgwSessionId + + " started at " + ctime(getTimestamp()) + ".", fileHandle, err); + fclose(fileHandle, err); + } + + appTrc(Trace.error, "A new voice call has started. vgwSessionId=" + BSD.vgwSessionId); } - appTrc(Trace.error, "A new voice call has started. vgwSessionId=" + BSD.vgwSessionId); - } - - // Insert into the state map for future reference. - insertM(_vgwSessionIdToUdpChannelMap, _key, mySpeechEngineId); + // Insert into the state map for future reference. + insertM(_vgwSessionIdToUdpChannelMap, _key, mySpeechEngineId); + + // For this voice call (i.e. vgwSessionId), select a + // single result processor UDP channel. Both speakers in this + // same voice call will use that same result processor instance. + // This will ensure that the S2T results for both the speakers + // will reach the same result processor. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == false) { + insertM(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId, mySpeechEngineId); + } - // For this voice call (i.e. vgwSessionId), select a - // single result processor UDP channel. Both speakers in this - // same voice call will use that same result processor instance. - // This will ensure that the S2T results for both the speakers - // will reach the same result processor. - if (has(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId) == false) { - insertM(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId, mySpeechEngineId); - } - - // Set the S2T result processor id. - BSD.speechResultProcessorId = - _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; - } // End of if (mySpeechEngineId == -1) - } // End of if (has(_vgwSessionIdToUdpChannelMap, _key) - - appTrc(Trace.debug, "vgwSessionId=" + BSD.vgwSessionId + - ", isCustomerSpeechData=" + (rstring)BSD.isCustomerSpeechData + - ", vgwVoiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber + - ", speechDataFragmentCnt=" + (rstring)BSD.speechDataFragmentCnt + - ", totalSpeechDataBytesReceived=" + - (rstring)BSD.totalSpeechDataBytesReceived + - ", speechEngineId=" + (rstring)BSD.speechEngineId + - ", speechResultProcessorId=" + (rstring)BSD.speechResultProcessorId); - // Submit this tuple. - submit(BSD, BSDF); - } // End of onTuple BSD - - // Process the end of voice call signal. - // Since there are two channels in every voice call, - // those two channels will carry their own "End S2T session" - // message from the Voice Gateway. The logic below takes care of - // handling two End of Call Signals for every voice call. - onTuple EOCS: { - // Get the allocated S2T engine id for a given - // vgwSessionId_vgwVoiceChannelNumber combo. - // We should always have an S2T engine id. If not, that is a - // case where the user didn't provision sufficient number of - // S2T engines and there was no idle S2T engine available for that - // given vgwSessionId_vgwVoiceChannelNumber combo. - // This situation can be avoided by starting the application with a - // sufficient number of S2T engines needed for the anticipated - // maximum concurrent voice calls. A rule of thumb is to have - // two S2T engines to process two speakers in every given - // concurrent voice call. - // - // Get the sessionId + channelNumber combo string. - _key = EOCS.vgwSessionId + "_" + (rstring)EOCS.vgwVoiceChannelNumber; - - if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // Let us send an empty blob to the WatsonS2T operator to indicate that - // this speaker of a given voice call is done. - _oTuple = (BinarySpeech_t){}; - // Copy the three input tuple attributes that must - // match with that of the outgoing tuple. - assignFrom(_oTuple, EOCS); - // Assign the S2T engine id where this voice channel was - // getting processed until now. - _oTuple.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; - // We have to send this tuple to the result processor as well for - // the call recording logic to work correctly. - _oTuple.speechResultProcessorId = - _vgwSessionToResultProcessorChannelMap[EOCS.vgwSessionId]; - submit(_oTuple, BSDF); - // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. - removeM(_vgwSessionIdToUdpChannelMap, _key); - // Add the S2T engine id to this call completed map to be released later in the - // following if block only after receiving EOCS for both the voice channels of this call. - insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.speechEngineId); - } - - // Senthil added this if block on Feb/01/2020. - if($numberOfEocsNeededForVoiceCallCompletion == 1) { - // If the user configured this application to handle - // only one EOCS to treat a voice call as completed, then we - // will try to clean-up the other voice channel if it exists. - mutable int32 otherVgwVoiceChannelNumber = 1; - - if(EOCS.vgwVoiceChannelNumber == 1) { - otherVgwVoiceChannelNumber = 2; - } - - // Get the sessionId + channelNumber combo string. - _key = EOCS.vgwSessionId + "_" + (rstring)otherVgwVoiceChannelNumber; - + // Set the S2T result processor id. + BSD.speechResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + } // End of if (mySpeechEngineId == -1) + } // End of if (has(_vgwSessionIdToUdpChannelMap, _key) + + appTrc(Trace.debug, "vgwSessionId=" + BSD.vgwSessionId + + ", isCustomerSpeechData=" + (rstring)BSD.isCustomerSpeechData + + ", vgwVoiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber + + ", speechDataFragmentCnt=" + (rstring)BSD.speechDataFragmentCnt + + ", totalSpeechDataBytesReceived=" + + (rstring)BSD.totalSpeechDataBytesReceived + + ", speechEngineId=" + (rstring)BSD.speechEngineId + + ", speechResultProcessorId=" + (rstring)BSD.speechResultProcessorId); + // Submit this tuple. + submit(BSD, BSDF); + } else { + // The incoming tuple contains an End of Call Signal (EOCS). + appTrc(Trace.error, "Received an EOCS at the speech processor id " + + (rstring)$idOfThisSpeechProcessor + + ". vgwSessionId=" + BSD.vgwSessionId + + ", voiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber); + // + // Process the end of voice call signal. + // Since there are two channels in every voice call, + // those two channels will carry their own "End S2T session" + // message from the Voice Gateway. The logic below takes care of + // handling two End of Call Signals for every voice call. + // + // Get the allocated S2T engine id for a given + // vgwSessionId_vgwVoiceChannelNumber combo. + // We should always have an S2T engine id. If not, that is a + // case where the user didn't provision sufficient number of + // S2T engines and there was no idle S2T engine available for that + // given vgwSessionId_vgwVoiceChannelNumber combo. + // This situation can be avoided by starting the application with a + // sufficient number of S2T engines needed for the anticipated + // maximum concurrent voice calls. A rule of thumb is to have + // two S2T engines to process two speakers in every given + // concurrent voice call. + // if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { // Let us send an empty blob to the WatsonS2T operator to indicate that // this speaker of a given voice call is done. _oTuple = (BinarySpeech_t){}; // Copy the three input tuple attributes that must // match with that of the outgoing tuple. - assignFrom(_oTuple, EOCS); - // Override the following two attributes to reflect the other voice channel. - // Flip this attribute value. - if(_oTuple.isCustomerSpeechData == true) { - _oTuple.isCustomerSpeechData = false; - } else { - _oTuple.isCustomerSpeechData = true; - } - - _oTuple.vgwVoiceChannelNumber = otherVgwVoiceChannelNumber; - + assignFrom(_oTuple, BSD); // Assign the S2T engine id where this voice channel was // getting processed until now. _oTuple.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; // We have to send this tuple to the result processor as well for // the call recording logic to work correctly. _oTuple.speechResultProcessorId = - _vgwSessionToResultProcessorChannelMap[EOCS.vgwSessionId]; + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; submit(_oTuple, BSDF); // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. removeM(_vgwSessionIdToUdpChannelMap, _key); @@ -996,80 +957,127 @@ public composite VgwDataRouterToWatsonS2T { // following if block only after receiving EOCS for both the voice channels of this call. insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.speechEngineId); } - } - - // Since this voice call is ending, let us release the S2T result processor - // instance that was allocated above for this voice call. - if (has(_vgwSessionToResultProcessorChannelMap, - EOCS.vgwSessionId) == true) { - // Let us remove the result processor id only after the logic - // in the previous if-block took care of sending the EOCS for - // both the voice channels in a given voice call. - // Checking for this condition is important for the - // call recording logic inside the S2T result processor - // composite to work correctly. - rstring key1 = EOCS.vgwSessionId + "_" + "1"; - rstring key2 = EOCS.vgwSessionId + "_" + "2"; - // Remove the result processor id only if the EOCS signal - // was sent for both of the voice channels. That must first - // happen before we can release the result processor id. - // - // This if condition was changed by Senthil on - // Feb/01/2021 for the following reason. - // If the user configured this application to handle - // a single EOCS as sufficient to consider a voice call - // completed for a given VGW session id, we will use the - // second || i.e. OR condition. Please refer to the - // constant i.e. expression declaration section above to - // read the commentary about this idea. - // - if (($numberOfEocsNeededForVoiceCallCompletion == 2 && - (has(_vgwSessionIdToUdpChannelMap, key1) == false && - has(_vgwSessionIdToUdpChannelMap, key2) == false)) || - ($numberOfEocsNeededForVoiceCallCompletion == 1 && - (has(_vgwSessionIdToUdpChannelMap, key1) == false || - has(_vgwSessionIdToUdpChannelMap, key2) == false))) { - removeM(_vgwSessionToResultProcessorChannelMap, EOCS.vgwSessionId); + // Senthil added this if block on Feb/01/2020. + if($numberOfEocsNeededForVoiceCallCompletion == 1) { + // If the user configured this application to handle + // only one EOCS to treat a voice call as completed, then we + // will try to clean-up the other voice channel if it exists. + mutable int32 otherVgwVoiceChannelNumber = 1; - // Since the voice call for this VGW session id has ended completely, - // we can also release the S2T engine(s) assigned for this call so that - // they can be repurposed for handling any new future calls. - // We can go ahead and release the S2T engine by adding it back to - // the idle UDP channels list. - if(has(_vgwSessionToCompletedUdpChannelMap, key1) == true) { - appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key1]); - // We are done. Remove it from the map as well. - removeM(_vgwSessionToCompletedUdpChannelMap, key1); - } - - if(has(_vgwSessionToCompletedUdpChannelMap, key2) == true) { - appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key2]); - // We are done. Remove it from the map as well. - removeM(_vgwSessionToCompletedUdpChannelMap, key2); + if(BSD.vgwVoiceChannelNumber == 1) { + otherVgwVoiceChannelNumber = 2; } - // At this time, the voice call for this VGW session id has ended. - // We can now write an "End of Call" indicator file in the - // application's data directory. e-g: 5362954-call-completed.txt - mutable int32 err = 0ul; - rstring eocsFileName = dataDirectory() + "/" + - EOCS.vgwSessionId + "-call-completed.txt"; - uint64 fileHandle = fopen (eocsFileName, "w+", err); + // Get the sessionId + channelNumber combo string. + _key = BSD.vgwSessionId + "_" + (rstring)otherVgwVoiceChannelNumber; - if(err == 0) { - fwriteString ("VGW call session id " + EOCS.vgwSessionId + - " ended at " + ctime(getTimestamp()) + ".", fileHandle, err); - fclose(fileHandle, err); + if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { + // Let us send an empty blob to the WatsonS2T operator to indicate that + // this speaker of a given voice call is done. + _oTuple = (BinarySpeech_t){}; + // Copy the three input tuple attributes that must + // match with that of the outgoing tuple. + assignFrom(_oTuple, BSD); + // Override the following two attributes to reflect the other voice channel. + // Flip this attribute value. + if(_oTuple.isCustomerSpeechData == true) { + _oTuple.isCustomerSpeechData = false; + } else { + _oTuple.isCustomerSpeechData = true; + } + + _oTuple.vgwVoiceChannelNumber = otherVgwVoiceChannelNumber; + + // Assign the S2T engine id where this voice channel was + // getting processed until now. + _oTuple.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; + // We have to send this tuple to the result processor as well for + // the call recording logic to work correctly. + _oTuple.speechResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + submit(_oTuple, BSDF); + // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. + removeM(_vgwSessionIdToUdpChannelMap, _key); + // Add the S2T engine id to this call completed map to be released later in the + // following if block only after receiving EOCS for both the voice channels of this call. + insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.speechEngineId); } + } + + // Since this voice call is ending, let us release the S2T result processor + // instance that was allocated above for this voice call. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == true) { + // Let us remove the result processor id only after the logic + // in the previous if-block took care of sending the EOCS for + // both the voice channels in a given voice call. + // Checking for this condition is important for the + // call recording logic inside the S2T result processor + // composite to work correctly. + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; - appTrc(Trace.error, "An ongoing voice call has completed. vgwSessionId=" + EOCS.vgwSessionId); + // Remove the result processor id only if the EOCS signal + // was sent for both of the voice channels. That must first + // happen before we can release the result processor id. + // + // This if condition was changed by Senthil on + // Feb/01/2021 for the following reason. + // If the user configured this application to handle + // a single EOCS as sufficient to consider a voice call + // completed for a given VGW session id, we will use the + // second || i.e. OR condition. Please refer to the + // constant i.e. expression declaration section above to + // read the commentary about this idea. + // + if (($numberOfEocsNeededForVoiceCallCompletion == 2 && + (has(_vgwSessionIdToUdpChannelMap, key1) == false && + has(_vgwSessionIdToUdpChannelMap, key2) == false)) || + ($numberOfEocsNeededForVoiceCallCompletion == 1 && + (has(_vgwSessionIdToUdpChannelMap, key1) == false || + has(_vgwSessionIdToUdpChannelMap, key2) == false))) { + removeM(_vgwSessionToResultProcessorChannelMap, BSD.vgwSessionId); + + // Since the voice call for this VGW session id has ended completely, + // we can also release the S2T engine(s) assigned for this call so that + // they can be repurposed for handling any new future calls. + // We can go ahead and release the S2T engine by adding it back to + // the idle UDP channels list. + if(has(_vgwSessionToCompletedUdpChannelMap, key1) == true) { + appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key1]); + // We are done. Remove it from the map as well. + removeM(_vgwSessionToCompletedUdpChannelMap, key1); + } + + if(has(_vgwSessionToCompletedUdpChannelMap, key2) == true) { + appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key2]); + // We are done. Remove it from the map as well. + removeM(_vgwSessionToCompletedUdpChannelMap, key2); + } + + // At this time, the voice call for this VGW session id has ended. + // We can now write an "End of Call" indicator file in the + // application's data directory. e-g: 5362954-call-completed.txt + mutable int32 err = 0ul; + rstring eocsFileName = dataDirectory() + "/" + + BSD.vgwSessionId + "-call-completed.txt"; + uint64 fileHandle = fopen (eocsFileName, "w+", err); + + if(err == 0) { + fwriteString ("VGW call session id " + BSD.vgwSessionId + + " ended at " + ctime(getTimestamp()) + ".", fileHandle, err); + fclose(fileHandle, err); + } + + appTrc(Trace.error, "An ongoing voice call has completed. vgwSessionId=" + BSD.vgwSessionId); + } } - } - } + } // End of if(BSD.endOfCallSignal == false) + } // End of onTuple BSD config - threadedPort: queue(BSD, Sys.Wait), queue(EOCS, Sys.Wait); + threadedPort: queue(BSD, Sys.Wait); } // End of Custom operator. // Invoke one or more instances of the IBMWatsonSpeech2Text composite operator. @@ -1926,14 +1934,13 @@ public composite CallRecordingWriteCoordinator(input SpeechFragment; // parallel region for the purpose of load testing by // replaying many pre-recorded voice calls at the same time. public composite CallRecordingReplay(input CallReplaySignalFileNameIn; - output PreRecordedBinarySpeechData, PreRecordedEndOfCallSignal) { + output PreRecordedBinarySpeechData) { param expression $callRecordingReadDirectory; // This composite operator receives externally // defined stream types via operator parameters. type $callMetaData_t; type $binarySpeech_t; - type $endOfCallSignal_t; // Replaying the pre-recorded voice calls. // The graph below will perform the logic necessary to @@ -2166,7 +2173,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // mix both of them and send out a tuple for transcription by // downstream operators. (stream<$binarySpeech_t> PreRecordedBinarySpeechData as PRBSD; - stream<$endOfCallSignal_t> PreRecordedEndOfCallSignal as PREOCS; stream Acknowledgement as Ack) as PreRecordedCallReplayer = Custom(CallMetaDataVC1, CallMetaDataVC2 as CMD; CallSpeechDataVC1, CallSpeechDataVC2 as CSD; TimerSignal as TS) { @@ -2192,7 +2198,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // waiting for data to be read from the pre-recorded files. mutable rstring lastObservedReplayMapKey = "abcxyz"; mutable $binarySpeech_t _oTuple1 = {}; - mutable $endOfCallSignal_t _oTuple2 = {}; } onTuple CMD: { @@ -2284,8 +2289,10 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // We have the call meta data. We can create a new // binary speech data tuple now and send it out for transcription. // Copy all the call meta data attributes to the outgoing tuple. + _oTuple1 = ($binarySpeech_t){}; assignFrom(_oTuple1, _callMetaDataMap[key]); _oTuple1.speech = CSD.speech; + _oTuple1.endOfCallSignal = false; _oTuple1.speechDataFragmentCnt = _speechDataFragmentCount[key]; _oTuple1.totalSpeechDataBytesReceived = _speechDataBytesCount[key]; submit(_oTuple1, PRBSD); @@ -2317,12 +2324,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // Send two EOCS signals one for each voice channel in the given call. for (rstring str in _callMetaDataMap) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } // Clear all the state maps to be ready for the @@ -2386,12 +2395,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // we are required to send an EOCS for that voice channel. for (rstring str in _callMetaDataMap) { if (_speechDataTuplesSentCount[str] > 0) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } } diff --git a/samples/VgwDataRouterToWatsonS2T/data/.gitignore b/samples/VgwDataRouterToWatsonS2T/data/.gitignore new file mode 100644 index 0000000..4cf1d1c --- /dev/null +++ b/samples/VgwDataRouterToWatsonS2T/data/.gitignore @@ -0,0 +1,8 @@ +# This directory is empty with no files at the time of committing it to the GitHub. + +# When the users clone this toolkit repository or download a release package from this repository, they will create files into their copy of this folder for compiling, testing and application bundle packaging purposes. So, they will need this empty directory to be present as part of this toolkit. + +# Having this .gitkeep file here will force the git add, git commit and git push commands to keep this empty folder in the remote repository instead of simply ignoring it. + +# ignore all content of this directory +/* diff --git a/samples/VgwDataRouterToWatsonS2T/info.xml b/samples/VgwDataRouterToWatsonS2T/info.xml index 88c9ef9..b64594f 100644 --- a/samples/VgwDataRouterToWatsonS2T/info.xml +++ b/samples/VgwDataRouterToWatsonS2T/info.xml @@ -4,7 +4,7 @@ VgwDataRouterToWatsonS2T Example that showcases embedded S2T in IBM Streams - 1.0.1 + 1.0.2 4.2.1.6 @@ -26,7 +26,7 @@ STTGatewayUtils - [1.0.0,5.0.0) + [1.0.1,5.0.0) \ No newline at end of file diff --git a/samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl b/samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl index 743d472..53b4155 100644 --- a/samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl +++ b/samples/VgwDataRouterToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VgwDataRouterToWatsonSTT.spl @@ -8,7 +8,7 @@ /* ============================================== First created on: Nov/27/2020 -Last modified on: Feb/02/2021 +Last modified on: Feb/10/2021 IMPORTANT NOTE -------------- @@ -697,8 +697,7 @@ public composite VgwDataRouterToWatsonSTT { // We can now (twice) deserialize the data received from the VGW data router application to // get the actual speech data or the EOCS (End Of Call Signal). - (stream BinarySpeechData as BSD; - stream EndOfCallSignal as EOCS) as VgwDataParser = + (stream BinarySpeechData as BSD) as VgwDataParser = Custom(ReceivedData as RD) { logic onTuple RD: { @@ -720,9 +719,9 @@ public composite VgwDataRouterToWatsonSTT { submit(speechData, BSD); } else if(outerTuple.msgType == 2) { // This is End Of Call Signal (EOCS) sent by the Voice Gateway product. - mutable EndOfCallSignal_t eocs = (EndOfCallSignal_t){}; + mutable BinarySpeech_t eocs = (BinarySpeech_t){}; deserializeTuple(eocs, outerTuple.payload); - submit(eocs, EOCS); + submit(eocs, BSD); } else { // Unsupported message type received. appTrc(Trace.error, "Unsupported message type " + @@ -772,15 +771,13 @@ public composite VgwDataRouterToWatsonSTT { // high number of replay engines before deciding on a suitable parallel width. // @parallel(width=$numberOfCallReplayEngines) - (stream PreRecordedBinarySpeechData; - stream PreRecordedEndOfCallSignal) as + (stream PreRecordedBinarySpeechData) as VoiceCallReplayer = CallRecordingReplay(CallReplaySignalFileName) { param callRecordingReadDirectory: $callRecordingReadDirectory; // Pass these stream types as composite operator parameters. callMetaData_t: CallMetaData_t; binarySpeech_t: BinarySpeech_t; - endOfCallSignal_t: EndOfCallSignal_t; } // We have to always route the speech data bytes (fragments) coming from @@ -804,8 +801,7 @@ public composite VgwDataRouterToWatsonSTT { // That special logic happens inside this operator. (stream BinarySpeechDataFragment as BSDF) as BinarySpeechDataRouter = Custom( - BinarySpeechData, PreRecordedBinarySpeechData as BSD; - EndOfCallSignal, PreRecordedEndOfCallSignal as EOCS) { + BinarySpeechData, PreRecordedBinarySpeechData as BSD) { logic state: { // This map tells us which UDP channel is processing a @@ -833,291 +829,302 @@ public composite VgwDataRouterToWatsonSTT { onTuple BSD: { // Get the sessionId + channelNumber combo string. _key = BSD.vgwSessionId + "_" + (rstring)BSD.vgwVoiceChannelNumber; - - // Check if this vgwSessionId_vgwVoiceChannelNumber combo already - // has an STT engine allocated for it via an UDP channel. - if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // This is a speaker of an ongoing voice call who has - // already been assigned to an STT engine. - // Always send this speaker's speech data fragment to - // that same STT engine. - BSD.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; - // We can always assume that there is a preselected - // STT result processor UDP channel available for this - // voice call (i.e. vgwSessionId). Because, it is already - // done in the else block below when this voice call's - // first speaker's speech data arrives here. - // Let us fetch and assign it here. - if (has(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId) == true) { - BSD.speechResultProcessorId = - _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; - } else { - // This should never happen since the call will end - // for both the speakers almost at the same time after - // which there will be no speech data from any of the - // speakers participating in a given voice call. - // This else block is just part of defensive coding. - appTrc(Trace.error, - "_XXXXX No STT result processor engine available at this time for the " + - "vgwSessionId_vgwVoiceChannelNumber: " + _key + - ". This should be a rare occurrence towards the very end of the call." + - " We are not going to process the speech data bytes" + - " of this speaker in this voice call."); - return; - } - } else { - // If we are here, that means this is a brand new speaker of a - // voice call for whom we must find an idle UDP channel a.k.a - // an idle STT engine that can process this speaker's speech data. - int32 mySpeechEngineId = getAnIdleUdpChannel(_idleUdpChannelsList); - - if (mySpeechEngineId == -1) { - // This is not good and we should never end up in this situation. - // This means we have not provisioned sufficient number of STT engines to - // handle the maximum planned concurrent calls. We have to ignore this - // speech data fragment and hope that an idle UDP channel number will - // become available by the time the next speech data fragment for this - // particular vgwSessionId_vgwVoiceChannelNumber combo arrives here. - if (BSD.speechDataFragmentCnt == 1) { - // Display this alert only for the very first data fragment of a - // given speaker of a given voice call. - appTrc(Trace.error, "No idle STT engine available at this time for the " + + + // We will get the regular binary speech data and the End Of Call Signal (EOCS) in + // the same input stream. This design change was done on Feb/09/2021 to avoid any + // any port locks and/or tuple ordering issues that may happen if we choose to + // do it using two different output ports. The incoming tuple has an attribute + // that is set to true or false by the IBMVoiceGatewaySource operator to indicate + // whether it is sending binary speech data or an EOCS. + if(BSD.endOfCallSignal == false) { + // The incoming tuple contains binary speech data. + // + // Check if this vgwSessionId_vgwVoiceChannelNumber combo already + // has an STT engine allocated for it via an UDP channel. + if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { + // This is a speaker of an ongoing voice call who has + // already been assigned to an STT engine. + // Always send this speaker's speech data fragment to + // that same STT engine. + BSD.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; + // We can always assume that there is a preselected + // STT result processor UDP channel available for this + // voice call (i.e. vgwSessionId). Because, it is already + // done in the else block below when this voice call's + // first speaker's speech data arrives here. + // Let us fetch and assign it here. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == true) { + BSD.speechResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + } else { + // This should never happen since the call will end + // for both the speakers almost at the same time after + // which there will be no speech data from any of the + // speakers participating in a given voice call. + // This else block is just part of defensive coding. + appTrc(Trace.error, + "_XXXXX No STT result processor engine available at this time for the " + "vgwSessionId_vgwVoiceChannelNumber: " + _key + - ". There are " + (rstring)$numberOfSTTEngines + - " STT engines configured and they are all processing other" + - " voice calls at this time. Please start sufficient number of STT engines" + - " next time to handle your maximum expected concurrent calls." + - " A rule of thumb is to have two STT engines to process" + - " two speakers in every given concurrent voice call."); + ". This should be a rare occurrence towards the very end of the call." + + " We are not going to process the speech data bytes" + + " of this speaker in this voice call."); + return; } - - return; } else { - // We got an idle STT engine. - BSD.speechEngineId = mySpeechEngineId; - - // If this call is just beginning, then we will create a - // tiny text file to indicate that we started receiving - // speech data from the IBM Voice Gateway for this new call. - rstring key1 = BSD.vgwSessionId + "_" + "1"; - rstring key2 = BSD.vgwSessionId + "_" + "2"; + // If we are here, that means this is a brand new speaker of a + // voice call for whom we must find an idle UDP channel a.k.a + // an idle STT engine that can process this speaker's speech data. + int32 mySpeechEngineId = getAnIdleUdpChannel(_idleUdpChannelsList); - // If we have not yet created any entry in our state map for this call, - // then we can be sure that it is the start of this call. - if (has(_vgwSessionIdToUdpChannelMap, key1) == false && - has(_vgwSessionIdToUdpChannelMap, key2) == false) { - // We can now write a "Start of Call" indicator file in the - // application's data directory. e-g: 5362954-call-started.txt - mutable int32 err = 0ul; - rstring socsFileName = dataDirectory() + "/" + - BSD.vgwSessionId + "-call-started.txt"; - uint64 fileHandle = fopen (socsFileName, "w+", err); + if (mySpeechEngineId == -1) { + // This is not good and we should never end up in this situation. + // This means we have not provisioned sufficient number of STT engines to + // handle the maximum planned concurrent calls. We have to ignore this + // speech data fragment and hope that an idle UDP channel number will + // become available by the time the next speech data fragment for this + // particular vgwSessionId_vgwVoiceChannelNumber combo arrives here. + if (BSD.speechDataFragmentCnt == 1) { + // Display this alert only for the very first data fragment of a + // given speaker of a given voice call. + appTrc(Trace.error, "No idle STT engine available at this time for the " + + "vgwSessionId_vgwVoiceChannelNumber: " + _key + + ". There are " + (rstring)$numberOfSTTEngines + + " STT engines configured and they are all processing other" + + " voice calls at this time. Please start sufficient number of STT engines" + + " next time to handle your maximum expected concurrent calls." + + " A rule of thumb is to have two STT engines to process" + + " two speakers in every given concurrent voice call."); + } + + return; + } else { + // We got an idle STT engine. + BSD.speechEngineId = mySpeechEngineId; + + // If this call is just beginning, then we will create a + // tiny text file to indicate that we started receiving + // speech data from the IBM Voice Gateway for this new call. + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; - if(err == 0) { - fwriteString ("VGW call session id " + BSD.vgwSessionId + - " started at " + ctime(getTimestamp()) + ".", fileHandle, err); - fclose(fileHandle, err); + // If we have not yet created any entry in our state map for this call, + // then we can be sure that it is the start of this call. + if (has(_vgwSessionIdToUdpChannelMap, key1) == false && + has(_vgwSessionIdToUdpChannelMap, key2) == false) { + // We can now write a "Start of Call" indicator file in the + // application's data directory. e-g: 5362954-call-started.txt + mutable int32 err = 0ul; + rstring socsFileName = dataDirectory() + "/" + + BSD.vgwSessionId + "-call-started.txt"; + uint64 fileHandle = fopen (socsFileName, "w+", err); + + if(err == 0) { + fwriteString ("VGW call session id " + BSD.vgwSessionId + + " started at " + ctime(getTimestamp()) + ".", fileHandle, err); + fclose(fileHandle, err); + } + + appTrc(Trace.error, "A new voice call has started. vgwSessionId=" + BSD.vgwSessionId); } - appTrc(Trace.error, "A new voice call has started. vgwSessionId=" + BSD.vgwSessionId); - } - - // Insert into the state map for future reference. - insertM(_vgwSessionIdToUdpChannelMap, - _key, mySpeechEngineId); + // Insert into the state map for future reference. + insertM(_vgwSessionIdToUdpChannelMap, + _key, mySpeechEngineId); + + // For this voice call (i.e. vgwSessionId), select a + // single result processor UDP channel. Both speakers in this + // same voice call will use that same result processor instance. + // This will ensure that the STT results for both the speakers + // will reach the same result processor. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == false) { + insertM(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId, mySpeechEngineId); + } - // For this voice call (i.e. vgwSessionId), select a - // single result processor UDP channel. Both speakers in this - // same voice call will use that same result processor instance. - // This will ensure that the STT results for both the speakers - // will reach the same result processor. - if (has(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId) == false) { - insertM(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId, mySpeechEngineId); - } - - // Set the STT result processor id. - BSD.speechResultProcessorId = - _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; - } // End of if (mySpeechEngineId == -1) - } // End of if (has(_vgwSessionIdToUdpChannelMap, _key) - - appTrc(Trace.debug, "vgwSessionId=" + BSD.vgwSessionId + - ", isCustomerSpeechData=" + (rstring)BSD.isCustomerSpeechData + - ", vgwVoiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber + - ", speechDataFragmentCnt=" + (rstring)BSD.speechDataFragmentCnt + - ", totalSpeechDataBytesReceived=" + - (rstring)BSD.totalSpeechDataBytesReceived + - ", speechEngineId=" + (rstring)BSD.speechEngineId + - ", speechResultProcessorId=" + (rstring)BSD.speechResultProcessorId); - // Submit this tuple. - submit(BSD, BSDF); - } // End of onTuple BSD - - // Process the end of voice call signal. - // Since there are two channels in every voice call, - // those two channels will carry their own "End STT session" - // message from the Voice Gateway. The logic below takes care of - // handling two End of Call Signals for every voice call. - onTuple EOCS: { - // Get the allocated STT engine id for a given - // vgwSessionId_vgwVoiceChannelNumber combo. - // We should always have an STT engine id. If not, that is a - // case where the user didn't provision sufficient number of - // STT engines and there was no idle STT engine available for that - // given vgwSessionId_vgwVoiceChannelNumber combo. - // This situation can be avoided by starting the application with a - // sufficient number of STT engines needed for the anticipated - // maximum concurrent voice calls. A rule of thumb is to have - // two STT engines to process two speakers in every given - // concurrent voice call. - // - // Get the sessionId + channelNumber combo string. - _key = EOCS.vgwSessionId + "_" + (rstring)EOCS.vgwVoiceChannelNumber; - - if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // Let us send an empty blob to the WatsonSTT operator to indicate that - // this speaker of a given voice call is done. - _oTuple = (BinarySpeech_t){}; - // Copy the three input tuple attributes that must - // match with that of the outgoing tuple. - assignFrom(_oTuple, EOCS); - // Assign the STT engine id where this voice channel was - // getting processed until now. - _oTuple.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; - // We have to send this tuple to the result processor as well for - // the call recording logic to work correctly. - _oTuple.speechResultProcessorId = - _vgwSessionToResultProcessorChannelMap[EOCS.vgwSessionId]; - submit(_oTuple, BSDF); - // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. - removeM(_vgwSessionIdToUdpChannelMap, _key); - // Add the STT engine id to this call completed map to be released later in the - // following if block only after receiving EOCS for both the voice channels of this call. - insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.speechEngineId); - } - - // Senthil added this if block on Feb/01/2020. - if($numberOfEocsNeededForVoiceCallCompletion == 1) { - // If the user configured this application to handle - // only one EOCS to treat a voice call as completed, then we - // will try to clean-up the other voice channel if it exists. - mutable int32 otherVgwVoiceChannelNumber = 1; - - if(EOCS.vgwVoiceChannelNumber == 1) { - otherVgwVoiceChannelNumber = 2; - } - - // Get the sessionId + channelNumber combo string. - _key = EOCS.vgwSessionId + "_" + (rstring)otherVgwVoiceChannelNumber; - + // Set the STT result processor id. + BSD.speechResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + } // End of if (mySpeechEngineId == -1) + } // End of if (has(_vgwSessionIdToUdpChannelMap, _key) + + appTrc(Trace.debug, "vgwSessionId=" + BSD.vgwSessionId + + ", isCustomerSpeechData=" + (rstring)BSD.isCustomerSpeechData + + ", vgwVoiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber + + ", speechDataFragmentCnt=" + (rstring)BSD.speechDataFragmentCnt + + ", totalSpeechDataBytesReceived=" + + (rstring)BSD.totalSpeechDataBytesReceived + + ", speechEngineId=" + (rstring)BSD.speechEngineId + + ", speechResultProcessorId=" + (rstring)BSD.speechResultProcessorId); + // Submit this tuple. + submit(BSD, BSDF); + } else { + // The incoming tuple contains an End of Call Signal (EOCS). + appTrc(Trace.error, "Received an EOCS at the speech processor id " + + (rstring)$idOfThisSpeechProcessor + + ". vgwSessionId=" + BSD.vgwSessionId + + ", voiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber); + // + // Process the end of voice call signal. + // Since there are two channels in every voice call, + // those two channels will carry their own "End STT session" + // message from the Voice Gateway. The logic below takes care of + // handling two End of Call Signals for every voice call. + // + // Get the allocated STT engine id for a given + // vgwSessionId_vgwVoiceChannelNumber combo. + // We should always have an STT engine id. If not, that is a + // case where the user didn't provision sufficient number of + // STT engines and there was no idle STT engine available for that + // given vgwSessionId_vgwVoiceChannelNumber combo. + // This situation can be avoided by starting the application with a + // sufficient number of STT engines needed for the anticipated + // maximum concurrent voice calls. A rule of thumb is to have + // two STT engines to process two speakers in every given + // concurrent voice call. if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // Let us send an empty blob to the WatsonS2T operator to indicate that + // Let us send an empty blob to the WatsonSTT operator to indicate that // this speaker of a given voice call is done. _oTuple = (BinarySpeech_t){}; // Copy the three input tuple attributes that must // match with that of the outgoing tuple. - assignFrom(_oTuple, EOCS); - // Override the following two attributes to reflect the other voice channel. - // Flip this attribute value. - if(_oTuple.isCustomerSpeechData == true) { - _oTuple.isCustomerSpeechData = false; - } else { - _oTuple.isCustomerSpeechData = true; - } - - _oTuple.vgwVoiceChannelNumber = otherVgwVoiceChannelNumber; - - // Assign the S2T engine id where this voice channel was + assignFrom(_oTuple, BSD); + // Assign the STT engine id where this voice channel was // getting processed until now. _oTuple.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; // We have to send this tuple to the result processor as well for // the call recording logic to work correctly. _oTuple.speechResultProcessorId = - _vgwSessionToResultProcessorChannelMap[EOCS.vgwSessionId]; + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; submit(_oTuple, BSDF); // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. removeM(_vgwSessionIdToUdpChannelMap, _key); - // Add the S2T engine id to this call completed map to be released later in the + // Add the STT engine id to this call completed map to be released later in the // following if block only after receiving EOCS for both the voice channels of this call. insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.speechEngineId); } - } - - // Since this voice call is ending, let us release the STT result processor - // instance that was allocated above for this voice call. - if (has(_vgwSessionToResultProcessorChannelMap, - EOCS.vgwSessionId) == true) { - // Let us remove the result processor id only after the logic - // in the previous if-block took care of sending the EOCS for - // both the voice channels in a given voice call. - // Checking for this condition is important for the - // call recording logic inside the STT result processor - // composite to work correctly. - rstring key1 = EOCS.vgwSessionId + "_" + "1"; - rstring key2 = EOCS.vgwSessionId + "_" + "2"; - - // Remove the result processor id only if the EOCS signal - // was sent for both of the voice channels. That must first - // happen before we can release the result processor id. - // - // This if condition was changed by Senthil on - // Feb/01/2021 for the following reason. - // If the user configured this application to handle - // a single EOCS as sufficient to consider a voice call - // completed for a given VGW session id, we will use the - // second || i.e. OR condition. Please refer to the - // constant i.e. expression declaration section above to - // read the commentary about this idea. - // - if (($numberOfEocsNeededForVoiceCallCompletion == 2 && - (has(_vgwSessionIdToUdpChannelMap, key1) == false && - has(_vgwSessionIdToUdpChannelMap, key2) == false)) || - ($numberOfEocsNeededForVoiceCallCompletion == 1 && - (has(_vgwSessionIdToUdpChannelMap, key1) == false || - has(_vgwSessionIdToUdpChannelMap, key2) == false))) { - removeM(_vgwSessionToResultProcessorChannelMap, EOCS.vgwSessionId); + + // Senthil added this if block on Feb/01/2020. + if($numberOfEocsNeededForVoiceCallCompletion == 1) { + // If the user configured this application to handle + // only one EOCS to treat a voice call as completed, then we + // will try to clean-up the other voice channel if it exists. + mutable int32 otherVgwVoiceChannelNumber = 1; - // Since the voice call for this VGW session id has ended completely, - // we can also release the STT engine(s) assigned for this call so that - // they can be repurposed for handling any new future calls. - // We can go ahead and release the STT engine by adding it back to - // the idle UDP channels list. - if(has(_vgwSessionToCompletedUdpChannelMap, key1) == true) { - appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key1]); - // We are done. Remove it from the map as well. - removeM(_vgwSessionToCompletedUdpChannelMap, key1); + if(BSD.vgwVoiceChannelNumber == 1) { + otherVgwVoiceChannelNumber = 2; } - - if(has(_vgwSessionToCompletedUdpChannelMap, key2) == true) { - appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key2]); - // We are done. Remove it from the map as well. - removeM(_vgwSessionToCompletedUdpChannelMap, key2); - } - - // At this time, the voice call for this VGW session id has ended. - // We can now write an "End of Call" indicator file in the - // application's data directory. e-g: 5362954-call-completed.txt - mutable int32 err = 0ul; - rstring eocsFileName = dataDirectory() + "/" + - EOCS.vgwSessionId + "-call-completed.txt"; - uint64 fileHandle = fopen (eocsFileName, "w+", err); - if(err == 0) { - fwriteString ("VGW call session id " + EOCS.vgwSessionId + - " ended at " + ctime(getTimestamp()) + ".", fileHandle, err); - fclose(fileHandle, err); + // Get the sessionId + channelNumber combo string. + _key = BSD.vgwSessionId + "_" + (rstring)otherVgwVoiceChannelNumber; + + if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { + // Let us send an empty blob to the WatsonS2T operator to indicate that + // this speaker of a given voice call is done. + _oTuple = (BinarySpeech_t){}; + // Copy the three input tuple attributes that must + // match with that of the outgoing tuple. + assignFrom(_oTuple, BSD); + // Override the following two attributes to reflect the other voice channel. + // Flip this attribute value. + if(_oTuple.isCustomerSpeechData == true) { + _oTuple.isCustomerSpeechData = false; + } else { + _oTuple.isCustomerSpeechData = true; + } + + _oTuple.vgwVoiceChannelNumber = otherVgwVoiceChannelNumber; + + // Assign the S2T engine id where this voice channel was + // getting processed until now. + _oTuple.speechEngineId = _vgwSessionIdToUdpChannelMap[_key]; + // We have to send this tuple to the result processor as well for + // the call recording logic to work correctly. + _oTuple.speechResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + submit(_oTuple, BSDF); + // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. + removeM(_vgwSessionIdToUdpChannelMap, _key); + // Add the S2T engine id to this call completed map to be released later in the + // following if block only after receiving EOCS for both the voice channels of this call. + insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.speechEngineId); } + } + + // Since this voice call is ending, let us release the STT result processor + // instance that was allocated above for this voice call. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == true) { + // Let us remove the result processor id only after the logic + // in the previous if-block took care of sending the EOCS for + // both the voice channels in a given voice call. + // Checking for this condition is important for the + // call recording logic inside the STT result processor + // composite to work correctly. + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; - appTrc(Trace.error, "An ongoing voice call has completed. vgwSessionId=" + EOCS.vgwSessionId); + // Remove the result processor id only if the EOCS signal + // was sent for both of the voice channels. That must first + // happen before we can release the result processor id. + // + // This if condition was changed by Senthil on + // Feb/01/2021 for the following reason. + // If the user configured this application to handle + // a single EOCS as sufficient to consider a voice call + // completed for a given VGW session id, we will use the + // second || i.e. OR condition. Please refer to the + // constant i.e. expression declaration section above to + // read the commentary about this idea. + // + if (($numberOfEocsNeededForVoiceCallCompletion == 2 && + (has(_vgwSessionIdToUdpChannelMap, key1) == false && + has(_vgwSessionIdToUdpChannelMap, key2) == false)) || + ($numberOfEocsNeededForVoiceCallCompletion == 1 && + (has(_vgwSessionIdToUdpChannelMap, key1) == false || + has(_vgwSessionIdToUdpChannelMap, key2) == false))) { + removeM(_vgwSessionToResultProcessorChannelMap, BSD.vgwSessionId); + + // Since the voice call for this VGW session id has ended completely, + // we can also release the STT engine(s) assigned for this call so that + // they can be repurposed for handling any new future calls. + // We can go ahead and release the STT engine by adding it back to + // the idle UDP channels list. + if(has(_vgwSessionToCompletedUdpChannelMap, key1) == true) { + appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key1]); + // We are done. Remove it from the map as well. + removeM(_vgwSessionToCompletedUdpChannelMap, key1); + } + + if(has(_vgwSessionToCompletedUdpChannelMap, key2) == true) { + appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key2]); + // We are done. Remove it from the map as well. + removeM(_vgwSessionToCompletedUdpChannelMap, key2); + } + + // At this time, the voice call for this VGW session id has ended. + // We can now write an "End of Call" indicator file in the + // application's data directory. e-g: 5362954-call-completed.txt + mutable int32 err = 0ul; + rstring eocsFileName = dataDirectory() + "/" + + BSD.vgwSessionId + "-call-completed.txt"; + uint64 fileHandle = fopen (eocsFileName, "w+", err); + + if(err == 0) { + fwriteString ("VGW call session id " + BSD.vgwSessionId + + " ended at " + ctime(getTimestamp()) + ".", fileHandle, err); + fclose(fileHandle, err); + } + + appTrc(Trace.error, "An ongoing voice call has completed. vgwSessionId=" + BSD.vgwSessionId); + } } - } - } + } // End of if(BSD.endOfCallSignal == false) + } // End of onTuple BSD config - threadedPort: queue(BSD, Sys.Wait), queue(EOCS, Sys.Wait); + threadedPort: queue(BSD, Sys.Wait); } // End of Custom operator. // IMPORTANT: IBM STT service on public cloud requires @@ -2014,14 +2021,13 @@ public composite CallRecordingWriteCoordinator(input SpeechFragment; // parallel region for the purpose of load testing by // replaying many pre-recorded voice calls at the same time. public composite CallRecordingReplay(input CallReplaySignalFileNameIn; - output PreRecordedBinarySpeechData, PreRecordedEndOfCallSignal) { + output PreRecordedBinarySpeechData) { param expression $callRecordingReadDirectory; // This composite operator receives externally // defined stream types via operator parameters. type $callMetaData_t; type $binarySpeech_t; - type $endOfCallSignal_t; // Replaying the pre-recorded voice calls. // The graph below will perform the logic necessary to @@ -2254,7 +2260,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // mix both of them and send out a tuple for transcription by // downstream operators. (stream<$binarySpeech_t> PreRecordedBinarySpeechData as PRBSD; - stream<$endOfCallSignal_t> PreRecordedEndOfCallSignal as PREOCS; stream Acknowledgement as Ack) as PreRecordedCallReplayer = Custom(CallMetaDataVC1, CallMetaDataVC2 as CMD; CallSpeechDataVC1, CallSpeechDataVC2 as CSD; TimerSignal as TS) { @@ -2280,7 +2285,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // waiting for data to be read from the pre-recorded files. mutable rstring lastObservedReplayMapKey = "abcxyz"; mutable $binarySpeech_t _oTuple1 = {}; - mutable $endOfCallSignal_t _oTuple2 = {}; } onTuple CMD: { @@ -2372,8 +2376,10 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // We have the call meta data. We can create a new // binary speech data tuple now and send it out for transcription. // Copy all the call meta data attributes to the outgoing tuple. + _oTuple1 = ($binarySpeech_t){}; assignFrom(_oTuple1, _callMetaDataMap[key]); _oTuple1.speech = CSD.speech; + _oTuple1.endOfCallSignal = false; _oTuple1.speechDataFragmentCnt = _speechDataFragmentCount[key]; _oTuple1.totalSpeechDataBytesReceived = _speechDataBytesCount[key]; submit(_oTuple1, PRBSD); @@ -2405,12 +2411,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // Send two EOCS signals one for each voice channel in the given call. for (rstring str in _callMetaDataMap) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } // Clear all the state maps to be ready for the @@ -2474,12 +2482,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // we are required to send an EOCS for that voice channel. for (rstring str in _callMetaDataMap) { if (_speechDataTuplesSentCount[str] > 0) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } } diff --git a/samples/VgwDataRouterToWatsonSTT/data/.gitignore b/samples/VgwDataRouterToWatsonSTT/data/.gitignore new file mode 100644 index 0000000..4cf1d1c --- /dev/null +++ b/samples/VgwDataRouterToWatsonSTT/data/.gitignore @@ -0,0 +1,8 @@ +# This directory is empty with no files at the time of committing it to the GitHub. + +# When the users clone this toolkit repository or download a release package from this repository, they will create files into their copy of this folder for compiling, testing and application bundle packaging purposes. So, they will need this empty directory to be present as part of this toolkit. + +# Having this .gitkeep file here will force the git add, git commit and git push commands to keep this empty folder in the remote repository instead of simply ignoring it. + +# ignore all content of this directory +/* diff --git a/samples/VgwDataRouterToWatsonSTT/info.xml b/samples/VgwDataRouterToWatsonSTT/info.xml index 1a38535..54ce1f6 100644 --- a/samples/VgwDataRouterToWatsonSTT/info.xml +++ b/samples/VgwDataRouterToWatsonSTT/info.xml @@ -4,13 +4,13 @@ VgwDataRouterToWatsonSTT Example that showcases STT on Cloud and STT on CP4D - 1.0.1 + 1.0.2 4.2.1.6 com.ibm.streamsx.sttgateway - [2.2.5,7.0.0] + [2.2.9,7.0.0] com.ibm.streamsx.json @@ -22,7 +22,7 @@ STTGatewayUtils - [1.0.0,7.0.0] + [1.0.1,7.0.0] \ No newline at end of file diff --git a/samples/VoiceGatewayToStreamsToWatsonS2T/com.ibm.streamsx.sttgateway.sample.watsons2t/VoiceGatewayToStreamsToWatsonS2T.spl b/samples/VoiceGatewayToStreamsToWatsonS2T/com.ibm.streamsx.sttgateway.sample.watsons2t/VoiceGatewayToStreamsToWatsonS2T.spl index c1d4c94..c6f51b3 100644 --- a/samples/VoiceGatewayToStreamsToWatsonS2T/com.ibm.streamsx.sttgateway.sample.watsons2t/VoiceGatewayToStreamsToWatsonS2T.spl +++ b/samples/VoiceGatewayToStreamsToWatsonS2T/com.ibm.streamsx.sttgateway.sample.watsons2t/VoiceGatewayToStreamsToWatsonS2T.spl @@ -8,7 +8,7 @@ /* ============================================== First created on: Oct/28/2019 -Last modified on: Feb/02/2021 +Last modified on: Feb/10/2021 A) What does this example application do? -------------------------------------- @@ -428,6 +428,9 @@ public composite VoiceGatewayToStreamsToWatsonS2T { // Whoever (caller or agent) sends the first round of // speech data bytes will get assigned a voice channel of 1. // The next one to follow will get assigned a voice channel of 2. + // boolean endOfCallSignal --> This attribute will be set to true by the IBMVoiceGatewaySource + // operator when it sends an EOCS for a voice channel. It will be + // set to false by that operator when it sends binary speech data. // rstring id --> This attribute is needed by the WatsonS2T operator. // It is set to vgwSessionId_vgwVoiceChannelNumber // rstring callStartDateTime --> Call start date time i.e. system clock time. @@ -438,18 +441,11 @@ public composite VoiceGatewayToStreamsToWatsonS2T { // int32 s2tEngineId --> This attribute will be set in the next operator. (Please, read the comments there.) // int32 s2tResultProcessorId --> This attribute will be set in the next operator. (Please, read the comments there.) BinarySpeech_t = blob speech, rstring vgwSessionId, boolean isCustomerSpeechData, - int32 vgwVoiceChannelNumber, rstring id, rstring callStartDateTime, + int32 vgwVoiceChannelNumber, boolean endOfCallSignal, + rstring id, rstring callStartDateTime, rstring callerPhoneNumber, rstring agentPhoneNumber, int32 speechDataFragmentCnt, int32 totalSpeechDataBytesReceived, int32 s2tEngineId, int32 s2tResultProcessorId; - // The following schema is for the second output stream of the - // IBMVoiceGatewaySource operator. It has three attributes indicating - // the speaker channel (vgwVoiceChannelNumber) of a given voice call (vgwSessionId) who - // got completed with the call as well as an indicator (isCustomerSpeechData) to - // denote whether the speech data we received on this channel belonged - // to a caller or an agent. - EndOfCallSignal_t = rstring vgwSessionId, - boolean isCustomerSpeechData, int32 vgwVoiceChannelNumber; // The following schema is for the call recording feature where we will // store the call metadata details for a specific voice channel of a given @@ -498,8 +494,7 @@ public composite VoiceGatewayToStreamsToWatsonS2T { // your own needs of further analytics on the S2T results as well as // specific ways of delivering the S2T results to other // downstream systems rather than only writing to files as this example does below. - (stream BinarySpeechData as BSD; - stream EndOfCallSignal as EOCS) as VoiceGatewayInterface = + (stream BinarySpeechData as BSD) as VoiceGatewayInterface = IBMVoiceGatewaySource() { logic state: { @@ -567,15 +562,13 @@ public composite VoiceGatewayToStreamsToWatsonS2T { // real-time voice calls. // @parallel(width=$numberOfCallReplayEngines) - (stream PreRecordedBinarySpeechData; - stream PreRecordedEndOfCallSignal) as + (stream PreRecordedBinarySpeechData) as VoiceCallReplayer = CallRecordingReplay(CallReplaySignalFileName) { param callRecordingReadDirectory: $callRecordingReadDirectory; // Pass these stream types as composite operator parameters. callMetaData_t: CallMetaData_t; binarySpeech_t: BinarySpeech_t; - endOfCallSignal_t: EndOfCallSignal_t; } // We have to always route the speech data bytes (fragments) coming from @@ -599,8 +592,7 @@ public composite VoiceGatewayToStreamsToWatsonS2T { // That special logic happens inside this operator. (stream BinarySpeechDataFragment as BSDF) as BinarySpeechDataRouter = Custom( - BinarySpeechData, PreRecordedBinarySpeechData as BSD; - EndOfCallSignal, PreRecordedEndOfCallSignal as EOCS) { + BinarySpeechData, PreRecordedBinarySpeechData as BSD) { logic state: { // This map tells us which UDP channel is processing a @@ -628,209 +620,172 @@ public composite VoiceGatewayToStreamsToWatsonS2T { onTuple BSD: { // Get the sessionId + channelNumber combo string. _key = BSD.vgwSessionId + "_" + (rstring)BSD.vgwVoiceChannelNumber; - - // Check if this vgwSessionId_vgwVoiceChannelNumber combo already - // has an S2T engine allocated for it via an UDP channel. - if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // This is a speaker of an ongoing voice call who has - // already been assigned to an S2T engine. - // Always send this speaker's speech data fragment to - // that same S2T engine. - BSD.s2tEngineId = _vgwSessionIdToUdpChannelMap[_key]; - // We can always assume that there is a preselected - // S2T result processor UDP channel available for this - // voice call (i.e. vgwSessionId). Because, it is already - // done in the else block below when this voice call's - // first speaker's speech data arrives here. - // Let us fetch and assign it here. - if (has(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId) == true) { - BSD.s2tResultProcessorId = - _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; - } else { - // This should never happen since the call will end - // for both the speakers almost at the same time after - // which there will be no speech data from any of the - // speakers participating in a given voice call. - // This else block is just part of defensive coding. - appTrc(Trace.error, - "_XXXXX No S2T result processor engine available at this time for the " + - "vgwSessionId_vgwVoiceChannelNumber: " + _key + - ". This should be a rare occurrence towards the very end of the call." + - " We are not going to process the speech data bytes" + - " of this speaker in this voice call."); - return; - } - } else { - // If we are here, that means this is a brand new speaker of a - // voice call for whom we must find an idle UDP channel a.k.a - // an idle S2T engine that can process this speaker's speech data. - int32 myS2TEngineId = getAnIdleUdpChannel(_idleUdpChannelsList); - - if (myS2TEngineId == -1) { - // This is not good and we should never end up in this situation. - // This means we have not provisioned sufficient number of S2T engines to - // handle the maximum planned concurrent calls. We have to ignore this - // speech data fragment and hope that an idle UDP channel number will - // become available by the time the next speech data fragment for this - // particular vgwSessionId_vgwVoiceChannelNumber combo arrives here. - if (BSD.speechDataFragmentCnt == 1) { - // Display this alert only for the very first data fragment of a - // given speaker of a given voice call. - appTrc(Trace.error, "No idle S2T engine available at this time for the " + + + // We will get the regular binary speech data and the End Of Call Signal (EOCS) in + // the same input stream. This design change was done on Feb/09/2021 to avoid any + // any port locks and/or tuple ordering issues that may happen if we choose to + // do it using two different output ports. The incoming tuple has an attribute + // that is set to true or false by the IBMVoiceGatewaySource operator to indicate + // whether it is sending binary speech data or an EOCS. + if(BSD.endOfCallSignal == false) { + // The incoming tuple contains binary speech data. + // + // Check if this vgwSessionId_vgwVoiceChannelNumber combo already + // has an S2T engine allocated for it via an UDP channel. + if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { + // This is a speaker of an ongoing voice call who has + // already been assigned to an S2T engine. + // Always send this speaker's speech data fragment to + // that same S2T engine. + BSD.s2tEngineId = _vgwSessionIdToUdpChannelMap[_key]; + // We can always assume that there is a preselected + // S2T result processor UDP channel available for this + // voice call (i.e. vgwSessionId). Because, it is already + // done in the else block below when this voice call's + // first speaker's speech data arrives here. + // Let us fetch and assign it here. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == true) { + BSD.s2tResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + } else { + // This should never happen since the call will end + // for both the speakers almost at the same time after + // which there will be no speech data from any of the + // speakers participating in a given voice call. + // This else block is just part of defensive coding. + appTrc(Trace.error, + "_XXXXX No S2T result processor engine available at this time for the " + "vgwSessionId_vgwVoiceChannelNumber: " + _key + - ". There are " + (rstring)$numberOfS2TEngines + - " S2T engines configured and they are all processing other" + - " voice calls at this time. Please start sufficient number of S2T engines" + - " next time to handle your maximum expected concurrent calls." + - " A rule of thumb is to have two S2T engines to process" + - " two speakers in every given concurrent voice call."); + ". This should be a rare occurrence towards the very end of the call." + + " We are not going to process the speech data bytes" + + " of this speaker in this voice call."); + return; } - - return; } else { - // We got an idle S2T engine. - BSD.s2tEngineId = myS2TEngineId; - - // If this call is just beginning, then we will create a - // tiny text file to indicate that we started receiving - // speech data from the IBM Voice Gateway for this new call. - rstring key1 = BSD.vgwSessionId + "_" + "1"; - rstring key2 = BSD.vgwSessionId + "_" + "2"; + // If we are here, that means this is a brand new speaker of a + // voice call for whom we must find an idle UDP channel a.k.a + // an idle S2T engine that can process this speaker's speech data. + int32 myS2TEngineId = getAnIdleUdpChannel(_idleUdpChannelsList); - // If we have not yet created any entry in our state map for this call, - // then we can be sure that it is the start of this call. - if (has(_vgwSessionIdToUdpChannelMap, key1) == false && - has(_vgwSessionIdToUdpChannelMap, key2) == false) { - // We can now write a "Start of Call" indicator file in the - // application's data directory. e-g: 5362954-call-started.txt - mutable int32 err = 0ul; - rstring socsFileName = dataDirectory() + "/" + - BSD.vgwSessionId + "-call-started.txt"; - uint64 fileHandle = fopen (socsFileName, "w+", err); + if (myS2TEngineId == -1) { + // This is not good and we should never end up in this situation. + // This means we have not provisioned sufficient number of S2T engines to + // handle the maximum planned concurrent calls. We have to ignore this + // speech data fragment and hope that an idle UDP channel number will + // become available by the time the next speech data fragment for this + // particular vgwSessionId_vgwVoiceChannelNumber combo arrives here. + if (BSD.speechDataFragmentCnt == 1) { + // Display this alert only for the very first data fragment of a + // given speaker of a given voice call. + appTrc(Trace.error, "No idle S2T engine available at this time for the " + + "vgwSessionId_vgwVoiceChannelNumber: " + _key + + ". There are " + (rstring)$numberOfS2TEngines + + " S2T engines configured and they are all processing other" + + " voice calls at this time. Please start sufficient number of S2T engines" + + " next time to handle your maximum expected concurrent calls." + + " A rule of thumb is to have two S2T engines to process" + + " two speakers in every given concurrent voice call."); + } + + return; + } else { + // We got an idle S2T engine. + BSD.s2tEngineId = myS2TEngineId; + + // If this call is just beginning, then we will create a + // tiny text file to indicate that we started receiving + // speech data from the IBM Voice Gateway for this new call. + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; - if(err == 0) { - fwriteString ("VGW call session id " + BSD.vgwSessionId + - " started at " + ctime(getTimestamp()) + ".", fileHandle, err); - fclose(fileHandle, err); + // If we have not yet created any entry in our state map for this call, + // then we can be sure that it is the start of this call. + if (has(_vgwSessionIdToUdpChannelMap, key1) == false && + has(_vgwSessionIdToUdpChannelMap, key2) == false) { + // We can now write a "Start of Call" indicator file in the + // application's data directory. e-g: 5362954-call-started.txt + mutable int32 err = 0ul; + rstring socsFileName = dataDirectory() + "/" + + BSD.vgwSessionId + "-call-started.txt"; + uint64 fileHandle = fopen (socsFileName, "w+", err); + + if(err == 0) { + fwriteString ("VGW call session id " + BSD.vgwSessionId + + " started at " + ctime(getTimestamp()) + ".", fileHandle, err); + fclose(fileHandle, err); + } + + appTrc(Trace.error, "A new voice call has started. vgwSessionId=" + BSD.vgwSessionId); } - appTrc(Trace.error, "A new voice call has started. vgwSessionId=" + BSD.vgwSessionId); - } - - // Insert into the state map for future reference. - insertM(_vgwSessionIdToUdpChannelMap, _key, myS2TEngineId); + // Insert into the state map for future reference. + insertM(_vgwSessionIdToUdpChannelMap, _key, myS2TEngineId); + + // For this voice call (i.e. vgwSessionId), select a + // single result processor UDP channel. Both speakers in this + // same voice call will use that same result processor instance. + // This will ensure that the S2T results for both the speakers + // will reach the same result processor. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == false) { + insertM(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId, myS2TEngineId); + } - // For this voice call (i.e. vgwSessionId), select a - // single result processor UDP channel. Both speakers in this - // same voice call will use that same result processor instance. - // This will ensure that the S2T results for both the speakers - // will reach the same result processor. - if (has(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId) == false) { - insertM(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId, myS2TEngineId); - } - - // Set the S2T result processor id. - BSD.s2tResultProcessorId = - _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; - } // End of if (myS2TEngineId == -1) - } // End of if (has(_vgwSessionIdToUdpChannelMap, _key) - - appTrc(Trace.debug, "vgwSessionId=" + BSD.vgwSessionId + - ", isCustomerSpeechData=" + (rstring)BSD.isCustomerSpeechData + - ", vgwVoiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber + - ", speechDataFragmentCnt=" + (rstring)BSD.speechDataFragmentCnt + - ", totalSpeechDataBytesReceived=" + - (rstring)BSD.totalSpeechDataBytesReceived + - ", s2tEngineId=" + (rstring)BSD.s2tEngineId + - ", s2tResultProcessorId=" + (rstring)BSD.s2tResultProcessorId); - // Submit this tuple. - submit(BSD, BSDF); - } // End of onTuple BSD - - // Process the end of voice call signal. - // Since there are two channels in every voice call, - // those two channels will carry their own "End S2T session" - // message from the Voice Gateway. The logic below takes care of - // handling two End of Call Signals for every voice call. - onTuple EOCS: { - // Get the allocated S2T engine id for a given - // vgwSessionId_vgwVoiceChannelNumber combo. - // We should always have an S2T engine id. If not, that is a - // case where the user didn't provision sufficient number of - // S2T engines and there was no idle S2T engine available for that - // given vgwSessionId_vgwVoiceChannelNumber combo. - // This situation can be avoided by starting the application with a - // sufficient number of S2T engines needed for the anticipated - // maximum concurrent voice calls. A rule of thumb is to have - // two S2T engines to process two speakers in every given - // concurrent voice call. - // - // Get the sessionId + channelNumber combo string. - _key = EOCS.vgwSessionId + "_" + (rstring)EOCS.vgwVoiceChannelNumber; - - if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // Let us send an empty blob to the WatsonS2T operator to indicate that - // this speaker of a given voice call is done. - _oTuple = (BinarySpeech_t){}; - // Copy the three input tuple attributes that must - // match with that of the outgoing tuple. - assignFrom(_oTuple, EOCS); - // Assign the S2T engine id where this voice channel was - // getting processed until now. - _oTuple.s2tEngineId = _vgwSessionIdToUdpChannelMap[_key]; - // We have to send this tuple to the result processor as well for - // the call recording logic to work correctly. - _oTuple.s2tResultProcessorId = - _vgwSessionToResultProcessorChannelMap[EOCS.vgwSessionId]; - submit(_oTuple, BSDF); - // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. - removeM(_vgwSessionIdToUdpChannelMap, _key); - // Add the S2T engine id to this call completed map to be released later in the - // following if block only after receiving EOCS for both the voice channels of this call. - insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.s2tEngineId); - } - - // Senthil added this if block on Feb/01/2020. - if($numberOfEocsNeededForVoiceCallCompletion == 1) { - // If the user configured this application to handle - // only one EOCS to treat a voice call as completed, then we - // will try to clean-up the other voice channel if it exists. - mutable int32 otherVgwVoiceChannelNumber = 1; - - if(EOCS.vgwVoiceChannelNumber == 1) { - otherVgwVoiceChannelNumber = 2; - } - - // Get the sessionId + channelNumber combo string. - _key = EOCS.vgwSessionId + "_" + (rstring)otherVgwVoiceChannelNumber; - + // Set the S2T result processor id. + BSD.s2tResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + } // End of if (myS2TEngineId == -1) + } // End of if (has(_vgwSessionIdToUdpChannelMap, _key) + + appTrc(Trace.debug, "vgwSessionId=" + BSD.vgwSessionId + + ", isCustomerSpeechData=" + (rstring)BSD.isCustomerSpeechData + + ", vgwVoiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber + + ", speechDataFragmentCnt=" + (rstring)BSD.speechDataFragmentCnt + + ", totalSpeechDataBytesReceived=" + + (rstring)BSD.totalSpeechDataBytesReceived + + ", s2tEngineId=" + (rstring)BSD.s2tEngineId + + ", s2tResultProcessorId=" + (rstring)BSD.s2tResultProcessorId); + // Submit this tuple. + submit(BSD, BSDF); + } else { + // The incoming tuple contains an End of Call Signal (EOCS). + appTrc(Trace.error, "Received an EOCS. vgwSessionId=" + BSD.vgwSessionId + + ", voiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber); + // + // Process the end of voice call signal. + // Since there are two channels in every voice call, + // those two channels will carry their own "End S2T session" + // message from the Voice Gateway. The logic below takes care of + // handling two End of Call Signals for every voice call. + // + // Get the allocated S2T engine id for a given + // vgwSessionId_vgwVoiceChannelNumber combo. + // We should always have an S2T engine id. If not, that is a + // case where the user didn't provision sufficient number of + // S2T engines and there was no idle S2T engine available for that + // given vgwSessionId_vgwVoiceChannelNumber combo. + // This situation can be avoided by starting the application with a + // sufficient number of S2T engines needed for the anticipated + // maximum concurrent voice calls. A rule of thumb is to have + // two S2T engines to process two speakers in every given + // concurrent voice call. + // if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { // Let us send an empty blob to the WatsonS2T operator to indicate that // this speaker of a given voice call is done. _oTuple = (BinarySpeech_t){}; // Copy the three input tuple attributes that must // match with that of the outgoing tuple. - assignFrom(_oTuple, EOCS); - // Override the following two attributes to reflect the other voice channel. - // Flip this attribute value. - if(_oTuple.isCustomerSpeechData == true) { - _oTuple.isCustomerSpeechData = false; - } else { - _oTuple.isCustomerSpeechData = true; - } - - _oTuple.vgwVoiceChannelNumber = otherVgwVoiceChannelNumber; - + assignFrom(_oTuple, BSD); // Assign the S2T engine id where this voice channel was // getting processed until now. _oTuple.s2tEngineId = _vgwSessionIdToUdpChannelMap[_key]; // We have to send this tuple to the result processor as well for // the call recording logic to work correctly. _oTuple.s2tResultProcessorId = - _vgwSessionToResultProcessorChannelMap[EOCS.vgwSessionId]; + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; submit(_oTuple, BSDF); // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. removeM(_vgwSessionIdToUdpChannelMap, _key); @@ -838,80 +793,127 @@ public composite VoiceGatewayToStreamsToWatsonS2T { // following if block only after receiving EOCS for both the voice channels of this call. insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.s2tEngineId); } - } - - // Since this voice call is ending, let us release the S2T result processor - // instance that was allocated above for this voice call. - if (has(_vgwSessionToResultProcessorChannelMap, - EOCS.vgwSessionId) == true) { - // Let us remove the result processor id only after the logic - // in the previous if-block took care of sending the EOCS for - // both the voice channels in a given voice call. - // Checking for this condition is important for the - // call recording logic inside the S2T result processor - // composite to work correctly. - rstring key1 = EOCS.vgwSessionId + "_" + "1"; - rstring key2 = EOCS.vgwSessionId + "_" + "2"; - - // Remove the result processor id only if the EOCS signal - // was sent for both of the voice channels. That must first - // happen before we can release the result processor id. - // - // This if condition was changed by Senthil on - // Feb/01/2021 for the following reason. - // If the user configured this application to handle - // a single EOCS as sufficient to consider a voice call - // completed for a given VGW session id, we will use the - // second || i.e. OR condition. Please refer to the - // constant i.e. expression declaration section above to - // read the commentary about this idea. - // - if (($numberOfEocsNeededForVoiceCallCompletion == 2 && - (has(_vgwSessionIdToUdpChannelMap, key1) == false && - has(_vgwSessionIdToUdpChannelMap, key2) == false)) || - ($numberOfEocsNeededForVoiceCallCompletion == 1 && - (has(_vgwSessionIdToUdpChannelMap, key1) == false || - has(_vgwSessionIdToUdpChannelMap, key2) == false))) { - removeM(_vgwSessionToResultProcessorChannelMap, EOCS.vgwSessionId); + + // Senthil added this if block on Feb/01/2020. + if($numberOfEocsNeededForVoiceCallCompletion == 1) { + // If the user configured this application to handle + // only one EOCS to treat a voice call as completed, then we + // will try to clean-up the other voice channel if it exists. + mutable int32 otherVgwVoiceChannelNumber = 1; - // Since the voice call for this VGW session id has ended completely, - // we can also release the S2T engine(s) assigned for this call so that - // they can be repurposed for handling any new future calls. - // We can go ahead and release the S2T engine by adding it back to - // the idle UDP channels list. - if(has(_vgwSessionToCompletedUdpChannelMap, key1) == true) { - appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key1]); - // We are done. Remove it from the map as well. - removeM(_vgwSessionToCompletedUdpChannelMap, key1); - } - - if(has(_vgwSessionToCompletedUdpChannelMap, key2) == true) { - appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key2]); - // We are done. Remove it from the map as well. - removeM(_vgwSessionToCompletedUdpChannelMap, key2); + if(BSD.vgwVoiceChannelNumber == 1) { + otherVgwVoiceChannelNumber = 2; } - // At this time, the voice call for this VGW session id has ended. - // We can now write an "End of Call" indicator file in the - // application's data directory. e-g: 5362954-call-completed.txt - mutable int32 err = 0ul; - rstring eocsFileName = dataDirectory() + "/" + - EOCS.vgwSessionId + "-call-completed.txt"; - uint64 fileHandle = fopen (eocsFileName, "w+", err); + // Get the sessionId + channelNumber combo string. + _key = BSD.vgwSessionId + "_" + (rstring)otherVgwVoiceChannelNumber; - if(err == 0) { - fwriteString ("VGW call session id " + EOCS.vgwSessionId + - " ended at " + ctime(getTimestamp()) + ".", fileHandle, err); - fclose(fileHandle, err); + if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { + // Let us send an empty blob to the WatsonS2T operator to indicate that + // this speaker of a given voice call is done. + _oTuple = (BinarySpeech_t){}; + // Copy the three input tuple attributes that must + // match with that of the outgoing tuple. + assignFrom(_oTuple, BSD); + // Override the following two attributes to reflect the other voice channel. + // Flip this attribute value. + if(_oTuple.isCustomerSpeechData == true) { + _oTuple.isCustomerSpeechData = false; + } else { + _oTuple.isCustomerSpeechData = true; + } + + _oTuple.vgwVoiceChannelNumber = otherVgwVoiceChannelNumber; + + // Assign the S2T engine id where this voice channel was + // getting processed until now. + _oTuple.s2tEngineId = _vgwSessionIdToUdpChannelMap[_key]; + // We have to send this tuple to the result processor as well for + // the call recording logic to work correctly. + _oTuple.s2tResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + submit(_oTuple, BSDF); + // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. + removeM(_vgwSessionIdToUdpChannelMap, _key); + // Add the S2T engine id to this call completed map to be released later in the + // following if block only after receiving EOCS for both the voice channels of this call. + insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.s2tEngineId); } - - appTrc(Trace.error, "An ongoing voice call has completed. vgwSessionId=" + EOCS.vgwSessionId); } - } - } + + // Since this voice call is ending, let us release the S2T result processor + // instance that was allocated above for this voice call. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == true) { + // Let us remove the result processor id only after the logic + // in the previous if-block took care of sending the EOCS for + // both the voice channels in a given voice call. + // Checking for this condition is important for the + // call recording logic inside the S2T result processor + // composite to work correctly. + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; + + // Remove the result processor id only if the EOCS signal + // was sent for both of the voice channels. That must first + // happen before we can release the result processor id. + // + // This if condition was changed by Senthil on + // Feb/01/2021 for the following reason. + // If the user configured this application to handle + // a single EOCS as sufficient to consider a voice call + // completed for a given VGW session id, we will use the + // second || i.e. OR condition. Please refer to the + // constant i.e. expression declaration section above to + // read the commentary about this idea. + // + if (($numberOfEocsNeededForVoiceCallCompletion == 2 && + (has(_vgwSessionIdToUdpChannelMap, key1) == false && + has(_vgwSessionIdToUdpChannelMap, key2) == false)) || + ($numberOfEocsNeededForVoiceCallCompletion == 1 && + (has(_vgwSessionIdToUdpChannelMap, key1) == false || + has(_vgwSessionIdToUdpChannelMap, key2) == false))) { + removeM(_vgwSessionToResultProcessorChannelMap, BSD.vgwSessionId); + + // Since the voice call for this VGW session id has ended completely, + // we can also release the S2T engine(s) assigned for this call so that + // they can be repurposed for handling any new future calls. + // We can go ahead and release the S2T engine by adding it back to + // the idle UDP channels list. + if(has(_vgwSessionToCompletedUdpChannelMap, key1) == true) { + appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key1]); + // We are done. Remove it from the map as well. + removeM(_vgwSessionToCompletedUdpChannelMap, key1); + } + + if(has(_vgwSessionToCompletedUdpChannelMap, key2) == true) { + appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key2]); + // We are done. Remove it from the map as well. + removeM(_vgwSessionToCompletedUdpChannelMap, key2); + } + + // At this time, the voice call for this VGW session id has ended. + // We can now write an "End of Call" indicator file in the + // application's data directory. e-g: 5362954-call-completed.txt + mutable int32 err = 0ul; + rstring eocsFileName = dataDirectory() + "/" + + BSD.vgwSessionId + "-call-completed.txt"; + uint64 fileHandle = fopen (eocsFileName, "w+", err); + + if(err == 0) { + fwriteString ("VGW call session id " + BSD.vgwSessionId + + " ended at " + ctime(getTimestamp()) + ".", fileHandle, err); + fclose(fileHandle, err); + } + + appTrc(Trace.error, "An ongoing voice call has completed. vgwSessionId=" + BSD.vgwSessionId); + } + } + } // End of if(BSD.endOfCallSignal == false) + } // End of onTuple BSD config - threadedPort: queue(BSD, Sys.Wait), queue(EOCS, Sys.Wait); + threadedPort: queue(BSD, Sys.Wait); } // End of Custom operator. // Invoke one or more instances of the IBMWatsonSpeech2Text composite operator. @@ -1768,14 +1770,13 @@ public composite CallRecordingWriteCoordinator(input SpeechFragment; // parallel region for the purpose of load testing by // replaying many pre-recorded voice calls at the same time. public composite CallRecordingReplay(input CallReplaySignalFileNameIn; - output PreRecordedBinarySpeechData, PreRecordedEndOfCallSignal) { + output PreRecordedBinarySpeechData) { param expression $callRecordingReadDirectory; // This composite operator receives externally // defined stream types via operator parameters. type $callMetaData_t; type $binarySpeech_t; - type $endOfCallSignal_t; // Replaying the pre-recorded voice calls. // The graph below will perform the logic necessary to @@ -2008,7 +2009,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // mix both of them and send out a tuple for transcription by // downstream operators. (stream<$binarySpeech_t> PreRecordedBinarySpeechData as PRBSD; - stream<$endOfCallSignal_t> PreRecordedEndOfCallSignal as PREOCS; stream Acknowledgement as Ack) as PreRecordedCallReplayer = Custom(CallMetaDataVC1, CallMetaDataVC2 as CMD; CallSpeechDataVC1, CallSpeechDataVC2 as CSD; TimerSignal as TS) { @@ -2034,7 +2034,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // waiting for data to be read from the pre-recorded files. mutable rstring lastObservedReplayMapKey = "abcxyz"; mutable $binarySpeech_t _oTuple1 = {}; - mutable $endOfCallSignal_t _oTuple2 = {}; } onTuple CMD: { @@ -2126,8 +2125,10 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // We have the call meta data. We can create a new // binary speech data tuple now and send it out for transcription. // Copy all the call meta data attributes to the outgoing tuple. + _oTuple1 = ($binarySpeech_t){}; assignFrom(_oTuple1, _callMetaDataMap[key]); _oTuple1.speech = CSD.speech; + _oTuple1.endOfCallSignal = false; _oTuple1.speechDataFragmentCnt = _speechDataFragmentCount[key]; _oTuple1.totalSpeechDataBytesReceived = _speechDataBytesCount[key]; submit(_oTuple1, PRBSD); @@ -2159,12 +2160,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // Send two EOCS signals one for each voice channel in the given call. for (rstring str in _callMetaDataMap) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } // Clear all the state maps to be ready for the @@ -2228,12 +2231,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // we are required to send an EOCS for that voice channel. for (rstring str in _callMetaDataMap) { if (_speechDataTuplesSentCount[str] > 0) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } } diff --git a/samples/VoiceGatewayToStreamsToWatsonS2T/info.xml b/samples/VoiceGatewayToStreamsToWatsonS2T/info.xml index c5955a5..e14af09 100644 --- a/samples/VoiceGatewayToStreamsToWatsonS2T/info.xml +++ b/samples/VoiceGatewayToStreamsToWatsonS2T/info.xml @@ -3,7 +3,7 @@ VoiceGatewayToStreamsToWatsonS2T Example that showcases embedded S2T in IBM Streams - 1.0.4 + 1.0.5 4.2.1.6 @@ -13,7 +13,7 @@ com.ibm.streamsx.sttgateway - [2.2.3,4.0.0) + [2.2.9,4.0.0) com.ibm.streamsx.websocket diff --git a/samples/VoiceGatewayToStreamsToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VoiceGatewayToStreamsToWatsonSTT.spl b/samples/VoiceGatewayToStreamsToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VoiceGatewayToStreamsToWatsonSTT.spl index b1c7b59..427baa7 100644 --- a/samples/VoiceGatewayToStreamsToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VoiceGatewayToStreamsToWatsonSTT.spl +++ b/samples/VoiceGatewayToStreamsToWatsonSTT/com.ibm.streamsx.sttgateway.sample.watsonstt/VoiceGatewayToStreamsToWatsonSTT.spl @@ -8,7 +8,7 @@ /* ============================================== First created on: Sep/25/2019 -Last modified on: Feb/02/2021 +Last modified on: Feb/10/2021 A) What does this example application do? -------------------------------------- @@ -470,6 +470,9 @@ public composite VoiceGatewayToStreamsToWatsonSTT { // Whoever (caller or agent) sends the first round of // speech data bytes will get assigned a voice channel of 1. // The next one to follow will get assigned a voice channel of 2. + // boolean endOfCallSignal --> This attribute will be set to true by the IBMVoiceGatewaySource + // operator when it sends an EOCS for a voice channel. It will be + // set to false by that operator when it sends binary speech data. // rstring callStartDateTime --> Call start date time i.e. system clock time. // rstring callerPhoneNumber --> Details about the caller's phone number. // rstring agentPhoneNumber --> Details about the agent's phone number. @@ -478,18 +481,11 @@ public composite VoiceGatewayToStreamsToWatsonSTT { // int32 sttEngineId --> This attribute will be set in the next operator. (Please, read the comments there.) // int32 sttResultProcessorId --> This attribute will be set in the next operator. (Please, read the comments there.) BinarySpeech_t = blob speech, rstring vgwSessionId, boolean isCustomerSpeechData, - int32 vgwVoiceChannelNumber, rstring callStartDateTime, rstring callerPhoneNumber, + int32 vgwVoiceChannelNumber, boolean endOfCallSignal, + rstring callStartDateTime, rstring callerPhoneNumber, rstring agentPhoneNumber, int32 speechDataFragmentCnt, int32 totalSpeechDataBytesReceived, int32 sttEngineId, int32 sttResultProcessorId; - // The following schema is for the second output stream of the - // IBMVoiceGatewaySource operator. It has three attributes indicating - // the speaker channel (vgwVoiceChannelNumber) of a given voice call (vgwSessionId) who - // got completed with the call as well as an indicator (isCustomerSpeechData) to - // denote whether the speech data we received on this channel belonged - // to a caller or an agent. - EndOfCallSignal_t = rstring vgwSessionId, - boolean isCustomerSpeechData, int32 vgwVoiceChannelNumber; // The following schema is for the call recording feature where we will // store the call metadata details for a specific voice channel of a given @@ -538,8 +534,7 @@ public composite VoiceGatewayToStreamsToWatsonSTT { // your own needs of further analytics on the STT results as well as // specific ways of delivering the STT results to other // downstream systems rather than only writing to files as this example does below. - (stream BinarySpeechData as BSD; - stream EndOfCallSignal as EOCS) as VoiceGatewayInterface = + (stream BinarySpeechData as BSD) as VoiceGatewayInterface = IBMVoiceGatewaySource() { logic state: { @@ -614,15 +609,13 @@ public composite VoiceGatewayToStreamsToWatsonSTT { // high number of replay engines before deciding on a suitable parallel width. // @parallel(width=$numberOfCallReplayEngines) - (stream PreRecordedBinarySpeechData; - stream PreRecordedEndOfCallSignal) as + (stream PreRecordedBinarySpeechData) as VoiceCallReplayer = CallRecordingReplay(CallReplaySignalFileName) { param callRecordingReadDirectory: $callRecordingReadDirectory; // Pass these stream types as composite operator parameters. callMetaData_t: CallMetaData_t; binarySpeech_t: BinarySpeech_t; - endOfCallSignal_t: EndOfCallSignal_t; } // We have to always route the speech data bytes (fragments) coming from @@ -646,8 +639,7 @@ public composite VoiceGatewayToStreamsToWatsonSTT { // That special logic happens inside this operator. (stream BinarySpeechDataFragment as BSDF) as BinarySpeechDataRouter = Custom( - BinarySpeechData, PreRecordedBinarySpeechData as BSD; - EndOfCallSignal, PreRecordedEndOfCallSignal as EOCS) { + BinarySpeechData, PreRecordedBinarySpeechData as BSD) { logic state: { // This map tells us which UDP channel is processing a @@ -675,291 +667,301 @@ public composite VoiceGatewayToStreamsToWatsonSTT { onTuple BSD: { // Get the sessionId + channelNumber combo string. _key = BSD.vgwSessionId + "_" + (rstring)BSD.vgwVoiceChannelNumber; - - // Check if this vgwSessionId_vgwVoiceChannelNumber combo already - // has an STT engine allocated for it via an UDP channel. - if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // This is a speaker of an ongoing voice call who has - // already been assigned to an STT engine. - // Always send this speaker's speech data fragment to - // that same STT engine. - BSD.sttEngineId = _vgwSessionIdToUdpChannelMap[_key]; - // We can always assume that there is a preselected - // STT result processor UDP channel available for this - // voice call (i.e. vgwSessionId). Because, it is already - // done in the else block below when this voice call's - // first speaker's speech data arrives here. - // Let us fetch and assign it here. - if (has(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId) == true) { - BSD.sttResultProcessorId = - _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; - } else { - // This should never happen since the call will end - // for both the speakers almost at the same time after - // which there will be no speech data from any of the - // speakers participating in a given voice call. - // This else block is just part of defensive coding. - appTrc(Trace.error, - "_XXXXX No STT result processor engine available at this time for the " + - "vgwSessionId_vgwVoiceChannelNumber: " + _key + - ". This should be a rare occurrence towards the very end of the call." + - " We are not going to process the speech data bytes" + - " of this speaker in this voice call."); - return; - } - } else { - // If we are here, that means this is a brand new speaker of a - // voice call for whom we must find an idle UDP channel a.k.a - // an idle STT engine that can process this speaker's speech data. - int32 mySttEngineId = getAnIdleUdpChannel(_idleUdpChannelsList); - - if (mySttEngineId == -1) { - // This is not good and we should never end up in this situation. - // This means we have not provisioned sufficient number of STT engines to - // handle the maximum planned concurrent calls. We have to ignore this - // speech data fragment and hope that an idle UDP channel number will - // become available by the time the next speech data fragment for this - // particular vgwSessionId_vgwVoiceChannelNumber combo arrives here. - if (BSD.speechDataFragmentCnt == 1) { - // Display this alert only for the very first data fragment of a - // given speaker of a given voice call. - appTrc(Trace.error, "No idle STT engine available at this time for the " + + + // We will get the regular binary speech data and the End Of Call Signal (EOCS) in + // the same input stream. This design change was done on Feb/09/2021 to avoid any + // any port locks and/or tuple ordering issues that may happen if we choose to + // do it using two different output ports. The incoming tuple has an attribute + // that is set to true or false by the IBMVoiceGatewaySource operator to indicate + // whether it is sending binary speech data or an EOCS. + if(BSD.endOfCallSignal == false) { + // The incoming tuple contains binary speech data. + // + // Check if this vgwSessionId_vgwVoiceChannelNumber combo already + // has an STT engine allocated for it via an UDP channel. + if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { + // This is a speaker of an ongoing voice call who has + // already been assigned to an STT engine. + // Always send this speaker's speech data fragment to + // that same STT engine. + BSD.sttEngineId = _vgwSessionIdToUdpChannelMap[_key]; + // We can always assume that there is a preselected + // STT result processor UDP channel available for this + // voice call (i.e. vgwSessionId). Because, it is already + // done in the else block below when this voice call's + // first speaker's speech data arrives here. + // Let us fetch and assign it here. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == true) { + BSD.sttResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + } else { + // This should never happen since the call will end + // for both the speakers almost at the same time after + // which there will be no speech data from any of the + // speakers participating in a given voice call. + // This else block is just part of defensive coding. + appTrc(Trace.error, + "_XXXXX No STT result processor engine available at this time for the " + "vgwSessionId_vgwVoiceChannelNumber: " + _key + - ". There are " + (rstring)$numberOfSTTEngines + - " STT engines configured and they are all processing other" + - " voice calls at this time. Please start sufficient number of STT engines" + - " next time to handle your maximum expected concurrent calls." + - " A rule of thumb is to have two STT engines to process" + - " two speakers in every given concurrent voice call."); + ". This should be a rare occurrence towards the very end of the call." + + " We are not going to process the speech data bytes" + + " of this speaker in this voice call."); + return; } - - return; } else { - // We got an idle STT engine. - BSD.sttEngineId = mySttEngineId; - - // If this call is just beginning, then we will create a - // tiny text file to indicate that we started receiving - // speech data from the IBM Voice Gateway for this new call. - rstring key1 = BSD.vgwSessionId + "_" + "1"; - rstring key2 = BSD.vgwSessionId + "_" + "2"; + // If we are here, that means this is a brand new speaker of a + // voice call for whom we must find an idle UDP channel a.k.a + // an idle STT engine that can process this speaker's speech data. + int32 mySttEngineId = getAnIdleUdpChannel(_idleUdpChannelsList); - // If we have not yet created any entry in our state map for this call, - // then we can be sure that it is the start of this call. - if (has(_vgwSessionIdToUdpChannelMap, key1) == false && - has(_vgwSessionIdToUdpChannelMap, key2) == false) { - // We can now write a "Start of Call" indicator file in the - // application's data directory. e-g: 5362954-call-started.txt - mutable int32 err = 0ul; - rstring socsFileName = dataDirectory() + "/" + - BSD.vgwSessionId + "-call-started.txt"; - uint64 fileHandle = fopen (socsFileName, "w+", err); + if (mySttEngineId == -1) { + // This is not good and we should never end up in this situation. + // This means we have not provisioned sufficient number of STT engines to + // handle the maximum planned concurrent calls. We have to ignore this + // speech data fragment and hope that an idle UDP channel number will + // become available by the time the next speech data fragment for this + // particular vgwSessionId_vgwVoiceChannelNumber combo arrives here. + if (BSD.speechDataFragmentCnt == 1) { + // Display this alert only for the very first data fragment of a + // given speaker of a given voice call. + appTrc(Trace.error, "No idle STT engine available at this time for the " + + "vgwSessionId_vgwVoiceChannelNumber: " + _key + + ". There are " + (rstring)$numberOfSTTEngines + + " STT engines configured and they are all processing other" + + " voice calls at this time. Please start sufficient number of STT engines" + + " next time to handle your maximum expected concurrent calls." + + " A rule of thumb is to have two STT engines to process" + + " two speakers in every given concurrent voice call."); + } + + return; + } else { + // We got an idle STT engine. + BSD.sttEngineId = mySttEngineId; + + // If this call is just beginning, then we will create a + // tiny text file to indicate that we started receiving + // speech data from the IBM Voice Gateway for this new call. + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; - if(err == 0) { - fwriteString ("VGW call session id " + BSD.vgwSessionId + - " started at " + ctime(getTimestamp()) + ".", fileHandle, err); - fclose(fileHandle, err); + // If we have not yet created any entry in our state map for this call, + // then we can be sure that it is the start of this call. + if (has(_vgwSessionIdToUdpChannelMap, key1) == false && + has(_vgwSessionIdToUdpChannelMap, key2) == false) { + // We can now write a "Start of Call" indicator file in the + // application's data directory. e-g: 5362954-call-started.txt + mutable int32 err = 0ul; + rstring socsFileName = dataDirectory() + "/" + + BSD.vgwSessionId + "-call-started.txt"; + uint64 fileHandle = fopen (socsFileName, "w+", err); + + if(err == 0) { + fwriteString ("VGW call session id " + BSD.vgwSessionId + + " started at " + ctime(getTimestamp()) + ".", fileHandle, err); + fclose(fileHandle, err); + } + + appTrc(Trace.error, "A new voice call has started. vgwSessionId=" + BSD.vgwSessionId); } - appTrc(Trace.error, "A new voice call has started. vgwSessionId=" + BSD.vgwSessionId); - } - - // Insert into the state map for future reference. - insertM(_vgwSessionIdToUdpChannelMap, - _key, mySttEngineId); + // Insert into the state map for future reference. + insertM(_vgwSessionIdToUdpChannelMap, + _key, mySttEngineId); + + // For this voice call (i.e. vgwSessionId), select a + // single result processor UDP channel. Both speakers in this + // same voice call will use that same result processor instance. + // This will ensure that the STT results for both the speakers + // will reach the same result processor. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == false) { + insertM(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId, mySttEngineId); + } - // For this voice call (i.e. vgwSessionId), select a - // single result processor UDP channel. Both speakers in this - // same voice call will use that same result processor instance. - // This will ensure that the STT results for both the speakers - // will reach the same result processor. - if (has(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId) == false) { - insertM(_vgwSessionToResultProcessorChannelMap, - BSD.vgwSessionId, mySttEngineId); - } - - // Set the STT result processor id. - BSD.sttResultProcessorId = - _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; - } // End of if (mySttEngineId == -1) - } // End of if (has(_vgwSessionIdToUdpChannelMap, _key) - - appTrc(Trace.debug, "vgwSessionId=" + BSD.vgwSessionId + - ", isCustomerSpeechData=" + (rstring)BSD.isCustomerSpeechData + - ", vgwVoiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber + - ", speechDataFragmentCnt=" + (rstring)BSD.speechDataFragmentCnt + - ", totalSpeechDataBytesReceived=" + - (rstring)BSD.totalSpeechDataBytesReceived + - ", sttEngineId=" + (rstring)BSD.sttEngineId + - ", sttResultProcessorId=" + (rstring)BSD.sttResultProcessorId); - // Submit this tuple. - submit(BSD, BSDF); - } // End of onTuple BSD - - // Process the end of voice call signal. - // Since there are two channels in every voice call, - // those two channels will carry their own "End STT session" - // message from the Voice Gateway. The logic below takes care of - // handling two End of Call Signals for every voice call. - onTuple EOCS: { - // Get the allocated STT engine id for a given - // vgwSessionId_vgwVoiceChannelNumber combo. - // We should always have an STT engine id. If not, that is a - // case where the user didn't provision sufficient number of - // STT engines and there was no idle STT engine available for that - // given vgwSessionId_vgwVoiceChannelNumber combo. - // This situation can be avoided by starting the application with a - // sufficient number of STT engines needed for the anticipated - // maximum concurrent voice calls. A rule of thumb is to have - // two STT engines to process two speakers in every given - // concurrent voice call. - // - // Get the sessionId + channelNumber combo string. - _key = EOCS.vgwSessionId + "_" + (rstring)EOCS.vgwVoiceChannelNumber; - - if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // Let us send an empty blob to the WatsonSTT operator to indicate that - // this speaker of a given voice call is done. - _oTuple = (BinarySpeech_t){}; - // Copy the three input tuple attributes that must - // match with that of the outgoing tuple. - assignFrom(_oTuple, EOCS); - // Assign the STT engine id where this voice channel was - // getting processed until now. - _oTuple.sttEngineId = _vgwSessionIdToUdpChannelMap[_key]; - // We have to send this tuple to the result processor as well for - // the call recording logic to work correctly. - _oTuple.sttResultProcessorId = - _vgwSessionToResultProcessorChannelMap[EOCS.vgwSessionId]; - submit(_oTuple, BSDF); - // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. - removeM(_vgwSessionIdToUdpChannelMap, _key); - // Add the STT engine id to this call completed map to be released later in the - // following if block only after receiving EOCS for both the voice channels of this call. - insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.sttEngineId); - } - - // Senthil added this if block on Feb/01/2020. - if($numberOfEocsNeededForVoiceCallCompletion == 1) { - // If the user configured this application to handle - // only one EOCS to treat a voice call as completed, then we - // will try to clean-up the other voice channel if it exists. - mutable int32 otherVgwVoiceChannelNumber = 1; - - if(EOCS.vgwVoiceChannelNumber == 1) { - otherVgwVoiceChannelNumber = 2; - } - - // Get the sessionId + channelNumber combo string. - _key = EOCS.vgwSessionId + "_" + (rstring)otherVgwVoiceChannelNumber; - + // Set the STT result processor id. + BSD.sttResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + } // End of if (mySttEngineId == -1) + } // End of if (has(_vgwSessionIdToUdpChannelMap, _key) + + appTrc(Trace.debug, "vgwSessionId=" + BSD.vgwSessionId + + ", isCustomerSpeechData=" + (rstring)BSD.isCustomerSpeechData + + ", vgwVoiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber + + ", speechDataFragmentCnt=" + (rstring)BSD.speechDataFragmentCnt + + ", totalSpeechDataBytesReceived=" + + (rstring)BSD.totalSpeechDataBytesReceived + + ", sttEngineId=" + (rstring)BSD.sttEngineId + + ", sttResultProcessorId=" + (rstring)BSD.sttResultProcessorId); + // Submit this tuple. + submit(BSD, BSDF); + } else { + // The incoming tuple contains an End of Call Signal (EOCS). + appTrc(Trace.error, "Received an BSD. vgwSessionId=" + BSD.vgwSessionId + + ", voiceChannelNumber=" + (rstring)BSD.vgwVoiceChannelNumber); + // + // Process the end of voice call signal. + // Since there are two channels in every voice call, + // those two channels will carry their own "End STT session" + // message from the Voice Gateway. The logic below takes care of + // handling two End of Call Signals for every voice call. + // + // Get the allocated STT engine id for a given + // vgwSessionId_vgwVoiceChannelNumber combo. + // We should always have an STT engine id. If not, that is a + // case where the user didn't provision sufficient number of + // STT engines and there was no idle STT engine available for that + // given vgwSessionId_vgwVoiceChannelNumber combo. + // This situation can be avoided by starting the application with a + // sufficient number of STT engines needed for the anticipated + // maximum concurrent voice calls. A rule of thumb is to have + // two STT engines to process two speakers in every given + // concurrent voice call. + // if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { - // Let us send an empty blob to the WatsonS2T operator to indicate that + // Let us send an empty blob to the WatsonSTT operator to indicate that // this speaker of a given voice call is done. _oTuple = (BinarySpeech_t){}; // Copy the three input tuple attributes that must // match with that of the outgoing tuple. - assignFrom(_oTuple, EOCS); - // Override the following two attributes to reflect the other voice channel. - // Flip this attribute value. - if(_oTuple.isCustomerSpeechData == true) { - _oTuple.isCustomerSpeechData = false; - } else { - _oTuple.isCustomerSpeechData = true; - } - - _oTuple.vgwVoiceChannelNumber = otherVgwVoiceChannelNumber; - - // Assign the S2T engine id where this voice channel was + assignFrom(_oTuple, BSD); + // Assign the STT engine id where this voice channel was // getting processed until now. _oTuple.sttEngineId = _vgwSessionIdToUdpChannelMap[_key]; // We have to send this tuple to the result processor as well for // the call recording logic to work correctly. _oTuple.sttResultProcessorId = - _vgwSessionToResultProcessorChannelMap[EOCS.vgwSessionId]; + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; submit(_oTuple, BSDF); // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. removeM(_vgwSessionIdToUdpChannelMap, _key); - // Add the S2T engine id to this call completed map to be released later in the + // Add the STT engine id to this call completed map to be released later in the // following if block only after receiving EOCS for both the voice channels of this call. insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.sttEngineId); } - } - - // Since this voice call is ending, let us release the STT result processor - // instance that was allocated above for this voice call. - if (has(_vgwSessionToResultProcessorChannelMap, - EOCS.vgwSessionId) == true) { - // Let us remove the result processor id only after the logic - // in the previous if-block took care of sending the EOCS for - // both the voice channels in a given voice call. - // Checking for this condition is important for the - // call recording logic inside the STT result processor - // composite to work correctly. - rstring key1 = EOCS.vgwSessionId + "_" + "1"; - rstring key2 = EOCS.vgwSessionId + "_" + "2"; - - // Remove the result processor id only if the EOCS signal - // was sent for both of the voice channels. That must first - // happen before we can release the result processor id. - // - // This if condition was changed by Senthil on - // Feb/01/2021 for the following reason. - // If the user configured this application to handle - // a single EOCS as sufficient to consider a voice call - // completed for a given VGW session id, we will use the - // second || i.e. OR condition. Please refer to the - // constant i.e. expression declaration section above to - // read the commentary about this idea. - // - if (($numberOfEocsNeededForVoiceCallCompletion == 2 && - (has(_vgwSessionIdToUdpChannelMap, key1) == false && - has(_vgwSessionIdToUdpChannelMap, key2) == false)) || - ($numberOfEocsNeededForVoiceCallCompletion == 1 && - (has(_vgwSessionIdToUdpChannelMap, key1) == false || - has(_vgwSessionIdToUdpChannelMap, key2) == false))) { - removeM(_vgwSessionToResultProcessorChannelMap, EOCS.vgwSessionId); + + // Senthil added this if block on Feb/01/2020. + if($numberOfEocsNeededForVoiceCallCompletion == 1) { + // If the user configured this application to handle + // only one EOCS to treat a voice call as completed, then we + // will try to clean-up the other voice channel if it exists. + mutable int32 otherVgwVoiceChannelNumber = 1; - // Since the voice call for this VGW session id has ended completely, - // we can also release the STT engine(s) assigned for this call so that - // they can be repurposed for handling any new future calls. - // We can go ahead and release the STT engine by adding it back to - // the idle UDP channels list. - if(has(_vgwSessionToCompletedUdpChannelMap, key1) == true) { - appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key1]); - // We are done. Remove it from the map as well. - removeM(_vgwSessionToCompletedUdpChannelMap, key1); + if(BSD.vgwVoiceChannelNumber == 1) { + otherVgwVoiceChannelNumber = 2; } - - if(has(_vgwSessionToCompletedUdpChannelMap, key2) == true) { - appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key2]); - // We are done. Remove it from the map as well. - removeM(_vgwSessionToCompletedUdpChannelMap, key2); - } - - // At this time, the voice call for this VGW session id has ended. - // We can now write an "End of Call" indicator file in the - // application's data directory. e-g: 5362954-call-completed.txt - mutable int32 err = 0ul; - rstring eocsFileName = dataDirectory() + "/" + - EOCS.vgwSessionId + "-call-completed.txt"; - uint64 fileHandle = fopen (eocsFileName, "w+", err); - if(err == 0) { - fwriteString ("VGW call session id " + EOCS.vgwSessionId + - " ended at " + ctime(getTimestamp()) + ".", fileHandle, err); - fclose(fileHandle, err); - } + // Get the sessionId + channelNumber combo string. + _key = BSD.vgwSessionId + "_" + (rstring)otherVgwVoiceChannelNumber; - appTrc(Trace.error, "An ongoing voice call has completed. vgwSessionId=" + EOCS.vgwSessionId); + if (has(_vgwSessionIdToUdpChannelMap, _key) == true) { + // Let us send an empty blob to the WatsonS2T operator to indicate that + // this speaker of a given voice call is done. + _oTuple = (BinarySpeech_t){}; + // Copy the three input tuple attributes that must + // match with that of the outgoing tuple. + assignFrom(_oTuple, BSD); + // Override the following two attributes to reflect the other voice channel. + // Flip this attribute value. + if(_oTuple.isCustomerSpeechData == true) { + _oTuple.isCustomerSpeechData = false; + } else { + _oTuple.isCustomerSpeechData = true; + } + + _oTuple.vgwVoiceChannelNumber = otherVgwVoiceChannelNumber; + + // Assign the S2T engine id where this voice channel was + // getting processed until now. + _oTuple.sttEngineId = _vgwSessionIdToUdpChannelMap[_key]; + // We have to send this tuple to the result processor as well for + // the call recording logic to work correctly. + _oTuple.sttResultProcessorId = + _vgwSessionToResultProcessorChannelMap[BSD.vgwSessionId]; + submit(_oTuple, BSDF); + // We are now done with this vgwSessionId_vgwVoiceChannelNumber combo. + removeM(_vgwSessionIdToUdpChannelMap, _key); + // Add the S2T engine id to this call completed map to be released later in the + // following if block only after receiving EOCS for both the voice channels of this call. + insertM(_vgwSessionToCompletedUdpChannelMap, _key, _oTuple.sttEngineId); + } } - } - } + + // Since this voice call is ending, let us release the STT result processor + // instance that was allocated above for this voice call. + if (has(_vgwSessionToResultProcessorChannelMap, + BSD.vgwSessionId) == true) { + // Let us remove the result processor id only after the logic + // in the previous if-block took care of sending the EOCS for + // both the voice channels in a given voice call. + // Checking for this condition is important for the + // call recording logic inside the STT result processor + // composite to work correctly. + rstring key1 = BSD.vgwSessionId + "_" + "1"; + rstring key2 = BSD.vgwSessionId + "_" + "2"; + + // Remove the result processor id only if the EOCS signal + // was sent for both of the voice channels. That must first + // happen before we can release the result processor id. + // + // This if condition was changed by Senthil on + // Feb/01/2021 for the following reason. + // If the user configured this application to handle + // a single EOCS as sufficient to consider a voice call + // completed for a given VGW session id, we will use the + // second || i.e. OR condition. Please refer to the + // constant i.e. expression declaration section above to + // read the commentary about this idea. + // + if (($numberOfEocsNeededForVoiceCallCompletion == 2 && + (has(_vgwSessionIdToUdpChannelMap, key1) == false && + has(_vgwSessionIdToUdpChannelMap, key2) == false)) || + ($numberOfEocsNeededForVoiceCallCompletion == 1 && + (has(_vgwSessionIdToUdpChannelMap, key1) == false || + has(_vgwSessionIdToUdpChannelMap, key2) == false))) { + removeM(_vgwSessionToResultProcessorChannelMap, BSD.vgwSessionId); + + // Since the voice call for this VGW session id has ended completely, + // we can also release the STT engine(s) assigned for this call so that + // they can be repurposed for handling any new future calls. + // We can go ahead and release the STT engine by adding it back to + // the idle UDP channels list. + if(has(_vgwSessionToCompletedUdpChannelMap, key1) == true) { + appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key1]); + // We are done. Remove it from the map as well. + removeM(_vgwSessionToCompletedUdpChannelMap, key1); + } + + if(has(_vgwSessionToCompletedUdpChannelMap, key2) == true) { + appendM(_idleUdpChannelsList, _vgwSessionToCompletedUdpChannelMap[key2]); + // We are done. Remove it from the map as well. + removeM(_vgwSessionToCompletedUdpChannelMap, key2); + } + + // At this time, the voice call for this VGW session id has ended. + // We can now write an "End of Call" indicator file in the + // application's data directory. e-g: 5362954-call-completed.txt + mutable int32 err = 0ul; + rstring eocsFileName = dataDirectory() + "/" + + BSD.vgwSessionId + "-call-completed.txt"; + uint64 fileHandle = fopen (eocsFileName, "w+", err); + + if(err == 0) { + fwriteString ("VGW call session id " + BSD.vgwSessionId + + " ended at " + ctime(getTimestamp()) + ".", fileHandle, err); + fclose(fileHandle, err); + } + + appTrc(Trace.error, "An ongoing voice call has completed. vgwSessionId=" + BSD.vgwSessionId); + } + } + } // End of if(BSD.endOfCallSignal == false) + } // End of onTuple BSD config - threadedPort: queue(BSD, Sys.Wait), queue(EOCS, Sys.Wait); + threadedPort: queue(BSD, Sys.Wait); } // End of Custom operator. // IMPORTANT: IBM STT service on public cloud requires @@ -1856,14 +1858,13 @@ public composite CallRecordingWriteCoordinator(input SpeechFragment; // parallel region for the purpose of load testing by // replaying many pre-recorded voice calls at the same time. public composite CallRecordingReplay(input CallReplaySignalFileNameIn; - output PreRecordedBinarySpeechData, PreRecordedEndOfCallSignal) { + output PreRecordedBinarySpeechData) { param expression $callRecordingReadDirectory; // This composite operator receives externally // defined stream types via operator parameters. type $callMetaData_t; type $binarySpeech_t; - type $endOfCallSignal_t; // Replaying the pre-recorded voice calls. // The graph below will perform the logic necessary to @@ -2096,7 +2097,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // mix both of them and send out a tuple for transcription by // downstream operators. (stream<$binarySpeech_t> PreRecordedBinarySpeechData as PRBSD; - stream<$endOfCallSignal_t> PreRecordedEndOfCallSignal as PREOCS; stream Acknowledgement as Ack) as PreRecordedCallReplayer = Custom(CallMetaDataVC1, CallMetaDataVC2 as CMD; CallSpeechDataVC1, CallSpeechDataVC2 as CSD; TimerSignal as TS) { @@ -2122,7 +2122,6 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // waiting for data to be read from the pre-recorded files. mutable rstring lastObservedReplayMapKey = "abcxyz"; mutable $binarySpeech_t _oTuple1 = {}; - mutable $endOfCallSignal_t _oTuple2 = {}; } onTuple CMD: { @@ -2214,8 +2213,10 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // We have the call meta data. We can create a new // binary speech data tuple now and send it out for transcription. // Copy all the call meta data attributes to the outgoing tuple. + _oTuple1 = ($binarySpeech_t){}; assignFrom(_oTuple1, _callMetaDataMap[key]); _oTuple1.speech = CSD.speech; + _oTuple1.endOfCallSignal = false; _oTuple1.speechDataFragmentCnt = _speechDataFragmentCount[key]; _oTuple1.totalSpeechDataBytesReceived = _speechDataBytesCount[key]; submit(_oTuple1, PRBSD); @@ -2247,12 +2248,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // Send two EOCS signals one for each voice channel in the given call. for (rstring str in _callMetaDataMap) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } // Clear all the state maps to be ready for the @@ -2316,12 +2319,14 @@ public composite CallRecordingReplay(input CallReplaySignalFileNameIn; // we are required to send an EOCS for that voice channel. for (rstring str in _callMetaDataMap) { if (_speechDataTuplesSentCount[str] > 0) { - _oTuple2.vgwSessionId = _callMetaDataMap[str].vgwSessionId; - _oTuple2.isCustomerSpeechData = + _oTuple1 = ($binarySpeech_t){}; + _oTuple1.vgwSessionId = _callMetaDataMap[str].vgwSessionId; + _oTuple1.isCustomerSpeechData = _callMetaDataMap[str].isCustomerSpeechData; - _oTuple2.vgwVoiceChannelNumber = + _oTuple1.vgwVoiceChannelNumber = _callMetaDataMap[str].vgwVoiceChannelNumber; - submit(_oTuple2, PREOCS); + _oTuple1.endOfCallSignal = true; + submit(_oTuple1, PRBSD); } } diff --git a/samples/VoiceGatewayToStreamsToWatsonSTT/info.xml b/samples/VoiceGatewayToStreamsToWatsonSTT/info.xml index ea825b5..07097ad 100644 --- a/samples/VoiceGatewayToStreamsToWatsonSTT/info.xml +++ b/samples/VoiceGatewayToStreamsToWatsonSTT/info.xml @@ -3,13 +3,13 @@ VoiceGatewayToStreamsToWatsonSTT Example that showcases STT on Cloud and STT on CP4D - 1.0.4 + 1.0.5 4.2.1.6 com.ibm.streamsx.sttgateway - [2.2.3,4.0.0) + [2.2.9,4.0.0) com.ibm.streamsx.json diff --git a/sttgateway-tech-brief.txt b/sttgateway-tech-brief.txt index 8a94f4a..70ecc4d 100644 --- a/sttgateway-tech-brief.txt +++ b/sttgateway-tech-brief.txt @@ -1,6 +1,6 @@ ============================================================ First created on: July/01/2018 -Last modified on: February/07/2021 +Last modified on: February/11/2021 Purpose of this toolkit -----------------------