Skip to content

Commit

Permalink
Expose sendData (#9)
Browse files Browse the repository at this point in the history
Also inform server of client_version (and api_version), use new
transcript messages, expose all data messages, and add tests in CI.

This matches
fixie-ai/ultravox-client-sdk-flutter#11
  • Loading branch information
mdepinet authored Nov 28, 2024
1 parent 14eb7ac commit 7ad1f5d
Show file tree
Hide file tree
Showing 6 changed files with 171 additions and 78 deletions.
23 changes: 23 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-java@v4
with:
java-version: '17'
distribution: 'temurin'

- name: Setup Gradle
uses: gradle/actions/setup-gradle@v3

- name: Test
run: |
chmod +x gradlew
./gradlew test --stacktrace
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ integrate your server with the [Ultravox REST API](https://fixie-ai.github.io/ul

## Publishing

1. Bump the version in ultravox_client's build.gradle.kts
2. Uncomment the "LocalForCentralUpload" maven repository block in the same gradle file
3. Uncomment and properly populate the three signing-related values in gradle.properties
4. Run publishToMaven
5. Compress the output "ai" directory into a zip file
6. Upload that zip to https://central.sonatype.com/publishing and work through the UI to publish
1. Bump the version in ultravox_client's build.gradle.kts *and in UltravoxSession.kt*
2. Open a PR in GitHub and get the changes merged. (This also runs tests, so please only publish
from main!)
3. Uncomment the "LocalForCentralUpload" maven repository block in the same gradle file
4. Uncomment and properly populate the three signing-related values in gradle.properties
5. Run publishToMaven (from within AndroidStudio)
6. Compress the output "ai" directory into a zip file
7. Upload that zip to https://central.sonatype.com/publishing and work through the UI to publish
8. Create a new tag/release in GitHub please!
11 changes: 1 addition & 10 deletions demoapp/src/main/java/ai/ultravox/demoapp/MainActivity.kt
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class MainActivity : AppCompatActivity() {
}

private fun joinCall() {
session.listen("transcript") {
session.listen("transcripts") {
run {
val last = session.lastTranscript
if (last != null && last.isFinal) {
Expand All @@ -111,15 +111,6 @@ class MainActivity : AppCompatActivity() {
}
}
}
session.listen("status") {
run {
Toast.makeText(
this.applicationContext,
session.status.name,
Toast.LENGTH_SHORT
).show()
}
}
session.joinCall(joinText.text.toString())
joinButton.text = "Leave"
joinButton.setOnClickListener { onLeaveClicked() }
Expand Down
2 changes: 1 addition & 1 deletion ultravox_client/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ publishing {
register<MavenPublication>("release") {
groupId = "ai.fixie"
artifactId = "ultravox-client-sdk"
version = "0.1.4"
version = "0.1.5"

pom {
name = "Ultravox Client"
Expand Down
163 changes: 102 additions & 61 deletions ultravox_client/src/main/java/ai/ultravox/UltravoxSession.kt
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,13 @@ typealias AsyncClientToolImplementation = suspend (JSONObject) -> ClientToolResu
*
* - "status": Fired when the session status changes.
* - "transcripts": Fired when a transcript is added or updated.
* - "experimental_message": Fired when an experimental message is received. The message is available via lastExperimentalMessage.
* - "experimental_message": Fired when an experimental message is received. The message is
* available via lastExperimentalMessage.
* - "mic_muted": Fired when the user's microphone is muted or unmuted.
* - "speaker_muted": Fired when the user's speaker (agent output audio) is muted or unmuted.
* - "data_message": Fired when any data message is received, including those typically handled
* by this SDK. The message is available via lastDataMessage. See
* https://docs.ultravox.ai/datamessages for message types.
*/
@Suppress("unused", "MemberVisibilityCanBePrivate")
class UltravoxSession(
Expand All @@ -39,14 +43,18 @@ class UltravoxSession(
private val client: OkHttpClient = OkHttpClient(),
private val experimentalMessages: Set<String> = HashSet(),
) {
companion object {
const val ULTRAVOX_SDK_VERSION = "0.1.5"
}

private var socket: WebSocket? = null
private var room: Room = LiveKit.create(ctx)

private val _transcripts = ArrayList<Transcript>()
private val _transcripts = ArrayList<Transcript?>()

/** An immutable copy of all the session's transcripts. */
val transcripts
get() = _transcripts.toImmutableList()
get() = _transcripts.filterNotNull().toImmutableList()

/** The most recent transcript for the session. */
val lastTranscript
Expand All @@ -69,6 +77,13 @@ class UltravoxSession(
fireListeners("experimental_message")
}

/** The most recently received data message. */
var lastDataMessage: JSONObject? = null
private set(value) {
field = value
fireListeners("data_message")
}

/** Whether the user's microphone is muted. (This does not inspect hardware state.) */
var micMuted: Boolean = false
set(value) {
Expand Down Expand Up @@ -145,24 +160,37 @@ class UltravoxSession(
}

/** Connects to a call using the given joinUrl. */
fun joinCall(joinUrl: String) {
fun joinCall(
joinUrl: String,
@Suppress("FORBIDDEN_VARARG_PARAMETER_TYPE", "UNUSED_PARAMETER")
vararg forceNamedParams: Nothing,
clientVersion: String? = null
) {
if (status != UltravoxSessionStatus.DISCONNECTED) {
throw RuntimeException("Cannot join a new call while already in a call")
}
status = UltravoxSessionStatus.CONNECTING
var httpUrl = if (joinUrl.startsWith("wss://") or joinUrl.startsWith("ws://")) {
val httpUrl = if (joinUrl.startsWith("wss://") or joinUrl.startsWith("ws://")) {
// This is the expected case, but OkHttp expects http(s) protocol even
// for WebSocket requests for some reason.
joinUrl.replaceFirst("ws", "http").toHttpUrl()
} else {
joinUrl.toHttpUrl()
}
val urlBuilder = httpUrl.newBuilder()
var uvClientVersion = "android_$ULTRAVOX_SDK_VERSION"
if (clientVersion != null) {
uvClientVersion += ":$clientVersion"
}
urlBuilder.addQueryParameter("clientVersion", uvClientVersion)
urlBuilder.addQueryParameter("apiVersion", "1")
if (experimentalMessages.isNotEmpty()) {
httpUrl = httpUrl.newBuilder()
.addQueryParameter("experimentalMessages", experimentalMessages.joinToString(","))
.build()
urlBuilder.addQueryParameter(
"experimentalMessages",
experimentalMessages.joinToString(",")
)
}
val req = Request.Builder().url(httpUrl).build()
val req = Request.Builder().url(urlBuilder.build()).build()
socket = client.newWebSocket(req, object : WebSocketListener() {
override fun onMessage(webSocket: WebSocket, text: String) {
val message = JSONObject(text)
Expand Down Expand Up @@ -204,7 +232,9 @@ class UltravoxSession(
*/
fun setOutputMedium(medium: Transcript.Medium) {
if (!status.live) {
throw RuntimeException("Cannot set output medium while not connected. Current status is $status.")
throw RuntimeException(
"Cannot set output medium while not connected. Current status is $status."
)
}
val message = JSONObject()
message.put("type", "set_output_medium")
Expand All @@ -215,14 +245,29 @@ class UltravoxSession(
/** Sends a message via text. */
fun sendText(text: String) {
if (!status.live) {
throw RuntimeException("Cannot send text while not connected. Current status is $status.")
throw RuntimeException(
"Cannot send text while not connected. Current status is $status."
)
}
val message = JSONObject()
message.put("type", "input_text_message")
message.put("text", text)
sendData(message)
}

/** Sends an arbitrary data message to the server.
*
* See https://docs.ultravox.ai/datamessages for message types.
*/
fun sendData(message: JSONObject) {
if (!message.has("type")) {
throw RuntimeException("Cannot send a data message without a type.")
}
coroScope.launch {
room.localParticipant.publishData(message.toString().encodeToByteArray())
}
}

private fun disconnect() {
if (status == UltravoxSessionStatus.DISCONNECTED) {
return
Expand All @@ -235,61 +280,51 @@ class UltravoxSession(

private fun onDataReceived(event: RoomEvent.DataReceived) {
val message = JSONObject(event.data.decodeToString())
when (message["type"]) {
lastDataMessage = message
when (message.getString("type")) {
"state" -> {
when (message["state"]) {
when (message.optString("state")) {
"listening" -> status = UltravoxSessionStatus.LISTENING
"thinking" -> status = UltravoxSessionStatus.THINKING
"speaking" -> status = UltravoxSessionStatus.SPEAKING
}
}

"transcript" -> {
val transcript = message["transcript"] as JSONObject
val medium =
if (transcript.has("medium") && transcript["medium"] == "text") Transcript.Medium.TEXT else Transcript.Medium.VOICE
addOrUpdateTranscript(
Transcript(
transcript["text"] as String,
transcript["final"] as Boolean,
Transcript.Role.USER,
medium
if (message.optString("medium") == "voice")
Transcript.Medium.VOICE
else Transcript.Medium.TEXT
val role =
if (message.optString("role") == "agent")
Transcript.Role.AGENT
else Transcript.Role.USER
val ordinal = message.getInt("ordinal")
val isFinal = message.optBoolean("final", false)
if (!message.isNull("text")) {
addOrUpdateTranscript(
ordinal,
medium,
role,
isFinal,
text = message.getString("text")
)
)
}

"voice_synced_transcript", "agent_text_transcript" -> {
val medium =
if (message["type"] == "agent_text_transcript") Transcript.Medium.TEXT else Transcript.Medium.VOICE
if (message.has("text") && message["text"] != JSONObject.NULL) {
} else if (!message.isNull("delta")) {
addOrUpdateTranscript(
Transcript(
message["text"] as String,
message["final"] as Boolean,
Transcript.Role.AGENT,
medium
)
ordinal,
medium,
role,
isFinal,
delta = message.getString("delta")
)
} else if (message.has("delta") && message["delta"] != JSONObject.NULL) {
val last = lastTranscript
if (last != null && last.speaker == Transcript.Role.AGENT) {
addOrUpdateTranscript(
Transcript(
last.text + message["delta"] as String,
message["final"] as Boolean,
Transcript.Role.AGENT,
medium
)
)
}
}
}

"client_tool_invocation" -> {
invokeClientTool(
message["toolName"] as String,
message["invocationId"] as String,
message["parameters"] as JSONObject
message.getString("toolName"),
message.getString("invocationId"),
message.getJSONObject("parameters")
)
}

Expand All @@ -301,12 +336,24 @@ class UltravoxSession(
}
}

private fun addOrUpdateTranscript(transcript: Transcript) {
val last = lastTranscript
if (last != null && !last.isFinal && last.speaker == transcript.speaker) {
_transcripts.removeLast()
private fun addOrUpdateTranscript(
ordinal: Int,
medium: Transcript.Medium,
speaker: Transcript.Role,
isFinal: Boolean,
text: String? = null,
delta: String? = null
) {
while (_transcripts.size < ordinal) {
_transcripts.add(null)
}
if (_transcripts.size == ordinal) {
_transcripts.add(Transcript(text ?: delta ?: "", isFinal, speaker, medium))
} else {
val priorText = _transcripts[ordinal]?.text ?: ""
_transcripts[ordinal] =
Transcript(text ?: (priorText + (delta ?: "")), isFinal, speaker, medium)
}
_transcripts.add(transcript)
fireListeners("transcripts")
}

Expand Down Expand Up @@ -351,12 +398,6 @@ class UltravoxSession(
sendData(message)
}

private fun sendData(message: JSONObject) {
coroScope.launch {
room.localParticipant.publishData(message.toString().encodeToByteArray())
}
}

private fun fireListeners(event: String) {
if (!listeners.containsKey(event)) {
return
Expand All @@ -369,4 +410,4 @@ class UltravoxSession(
}
}
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package ai.ultravox;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

@RunWith(JUnit4.class)
public class ClientVersionCheckTest {
private static final Pattern VERSION_LINE_PATTERN =
Pattern.compile("^\\s+version = \"(?<version>[0-9.]+)\"\\s*$");

@Test
public void checkSdkVersion_matchesGradle() throws Exception {
Path path = FileSystems.getDefault().getPath("").resolveSibling("build.gradle.kts");
String gradleSdkVersion = null;
for (String line : Files.readAllLines(path)) {
Matcher matcher = VERSION_LINE_PATTERN.matcher(line);
if (matcher.matches()) {
gradleSdkVersion = matcher.group("version");
break;
}
}
assertNotNull("Failed to find SDK version from Gradle", gradleSdkVersion);
assertEquals(gradleSdkVersion, UltravoxSession.ULTRAVOX_SDK_VERSION);
}
}

0 comments on commit 7ad1f5d

Please sign in to comment.