From 3b25b92a81a05ebaf1c6dbabf675fbfbe6c9f418 Mon Sep 17 00:00:00 2001 From: Slavi Pantaleev Date: Sat, 14 Sep 2024 10:39:20 +0300 Subject: [PATCH] Implement more fine-grained typing notices sending MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous approach (implemented in dd1dd78312e3db7) was simple (send typing notices for as long as the "controller" is running), but this proved to be overly simplistic and unable to handle edge-cases: - in multi-user rooms (or rooms with a prefix requirement), the bot used to send a typing notice while "working", but its work consisted of ignoring the message. So it then sent a "not typing" notice. This is wasteful and otherwise problematic - certain clients (like nheko) do not handle this "race" well. - certain reactions (anything other than 🗣️ right now) are meant to be ignored. There's no point in doing the same "typing / not typing" dance - there are other instances where the bot may do work, but doesn't (due to configuration or lack of capabilities) This new more fine-grained implementation of typing notices aims to: - only send a typing notice if actual "slow work" will be done - avoid stopping & restarting typing notices (wasteful) if a chain of work is to be performed (processing voice messages and doing speech-to-text + text-generation + ...). Rather, maintaining typing notice sending throughout --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- src/bot/implementation.rs | 9 +++++++++ src/bot/messaging.rs | 7 ------- src/bot/reacting.rs | 7 ------- src/controller/chat_completion/mod.rs | 14 ++++++++++++++ src/controller/image/generation.rs | 2 ++ src/controller/reaction/text_to_speech.rs | 2 ++ 8 files changed, 30 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6b54ba8..c2ab163 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2122,9 +2122,9 @@ dependencies = [ [[package]] name = "mxlink" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "195f0bd9764068de3e3e446bbac81a54489ff51674d80e387057b805c7dab96c" +checksum = "f1aa513435471af3e1131dc18a1f4eb2e6fb42c11308f1be9b4f0316b3809e32" dependencies = [ "base64 0.22.1", "chacha20poly1305", diff --git a/Cargo.toml b/Cargo.toml index d144a48..204a42c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ base64 = "0.22.*" # We'd rather not depend on this, but we cannot use the ruma-events EventContent macro without it. matrix-sdk = { version = "0.7.1", default-features = false } mxidwc = "1.0.*" -mxlink = "1.2.*" +mxlink = ">=1.2.1" etke_openai_api_rust = "0.1.*" quick_cache = "0.6.*" regex = "1.10.*" diff --git a/src/bot/implementation.rs b/src/bot/implementation.rs index 29e077c..311f728 100644 --- a/src/bot/implementation.rs +++ b/src/bot/implementation.rs @@ -9,6 +9,7 @@ use mxlink::matrix_sdk::Room; use mxlink::{ InitConfig, LoginConfig, LoginCredentials, LoginEncryption, MatrixLink, PersistenceConfig, + TypingNoticeGuard, }; use mxlink::helpers::account_data_config::{ @@ -210,6 +211,14 @@ impl Bot { .await } + pub(crate) async fn start_typing_notice(&self, room: &Room) -> TypingNoticeGuard { + self.inner + .matrix_link + .rooms() + .start_typing_notice(room) + .await + } + pub async fn start(&self) -> anyhow::Result<()> { self.rooms().attach_event_handlers().await; self.messaging().attach_event_handlers().await; diff --git a/src/bot/messaging.rs b/src/bot/messaging.rs index 61498bd..11967be 100644 --- a/src/bot/messaging.rs +++ b/src/bot/messaging.rs @@ -317,13 +317,6 @@ impl Messaging { let start_time = std::time::Instant::now(); - let _typing_notice_guard = self - .bot - .matrix_link() - .rooms() - .start_typing_notice(message_context.room()) - .await; - let event_span = tracing::error_span!("message_controller", ?controller_type); crate::controller::dispatch_controller(&controller_type, &message_context, &self.bot) diff --git a/src/bot/reacting.rs b/src/bot/reacting.rs index 0c4b4bd..019a377 100644 --- a/src/bot/reacting.rs +++ b/src/bot/reacting.rs @@ -244,13 +244,6 @@ impl Reacting { tracing::info!("Handling reaction via reaction controller"); - let _typing_notice_guard = self - .bot - .matrix_link() - .rooms() - .start_typing_notice(message_context.room()) - .await; - let event_span = tracing::error_span!("reaction_controller"); crate::controller::reaction::handle( diff --git a/src/controller/chat_completion/mod.rs b/src/controller/chat_completion/mod.rs index 7386373..c72f55f 100644 --- a/src/controller/chat_completion/mod.rs +++ b/src/controller/chat_completion/mod.rs @@ -43,6 +43,8 @@ pub async fn handle( ) -> anyhow::Result<()> { let mut original_message_is_audio = false; + let mut _typing_notice_guard: Option = None; + let speech_to_text_flow_type = message_context .room_config_context() .speech_to_text_flow_type(); @@ -71,6 +73,10 @@ pub async fn handle( } }; + if _typing_notice_guard.is_none() { + _typing_notice_guard = Some(bot.start_typing_notice(message_context.room()).await); + } + let Some(speech_to_text_created_event_id_result) = handle_stage_speech_to_text(bot, message_context, audio_content, response_type).await else { @@ -96,6 +102,10 @@ pub async fn handle( .room_config_context() .should_auto_text_generate(original_message_is_audio) { + if _typing_notice_guard.is_none() { + _typing_notice_guard = Some(bot.start_typing_notice(message_context.room()).await); + } + let speech_to_text_created_event_id_reaction_event_id = if let Some(speech_to_text_created_event_id) = speech_to_text_created_event_id { let reaction_event_response = bot @@ -213,6 +223,10 @@ pub async fn handle( match text_to_speech_stage_params { Some(TextToSpeechParams::Perform(text_to_speech_eligible_payload, response_type)) => { + if _typing_notice_guard.is_none() { + _typing_notice_guard = Some(bot.start_typing_notice(message_context.room()).await); + } + let _tts_result = generate_and_send_tts_for_message( bot, matrix_link.clone(), diff --git a/src/controller/image/generation.rs b/src/controller/image/generation.rs index e0924a9..4cf5843 100644 --- a/src/controller/image/generation.rs +++ b/src/controller/image/generation.rs @@ -137,6 +137,8 @@ pub async fn handle_sticker( return Ok(()); }; + let _typing_notice_guard = bot.start_typing_notice(message_context.room()).await; + let span = tracing::debug_span!( "sticker_generation", agent_id = agent.identifier().as_string() diff --git a/src/controller/reaction/text_to_speech.rs b/src/controller/reaction/text_to_speech.rs index eaac21a..b9c3205 100644 --- a/src/controller/reaction/text_to_speech.rs +++ b/src/controller/reaction/text_to_speech.rs @@ -52,6 +52,8 @@ pub(super) async fn handle( return Ok(()); }; + let _typing_notice_guard = bot.start_typing_notice(message_context.room()).await; + crate::controller::utils::text_to_speech::generate_and_send_tts_for_message( bot, matrix_link,