godotengine · akien-mga · Aug 5, 2022 · May 23, 2022
@@ -59,7 +59,8 @@ jobs:
           # The actual dependencies
           sudo apt-get install build-essential pkg-config libx11-dev libxcursor-dev \
               libxinerama-dev libgl1-mesa-dev libglu-dev libasound2-dev libpulse-dev \
-              libdbus-1-dev libudev-dev libxi-dev libxrandr-dev yasm xvfb wget unzip
+              libdbus-1-dev libudev-dev libxi-dev libxrandr-dev yasm xvfb wget unzip \
+              libspeechd-dev speech-dispatcher
 
       - name: Setup Godot build cache
         uses: ./.github/actions/godot-cache

@@ -190,6 +190,42 @@ void _OS::global_menu_clear(const String &p_menu) {
 	OS::get_singleton()->global_menu_clear(p_menu);
 }
 
+bool _OS::tts_is_speaking() const {
+	return OS::get_singleton()->tts_is_speaking();
+}
+
+bool _OS::tts_is_paused() const {
+	return OS::get_singleton()->tts_is_paused();
+}
+
+Array _OS::tts_get_voices() const {
+	return OS::get_singleton()->tts_get_voices();
+}
+
+PoolStringArray _OS::tts_get_voices_for_language(const String &p_language) const {
+	return OS::get_singleton()->tts_get_voices_for_language(p_language);
+}
+
+void _OS::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	OS::get_singleton()->tts_speak(p_text, p_voice, p_volume, p_pitch, p_rate, p_utterance_id, p_interrupt);
+}
+
+void _OS::tts_pause() {
+	OS::get_singleton()->tts_pause();
+}
+
+void _OS::tts_resume() {
+	OS::get_singleton()->tts_resume();
+}
+
+void _OS::tts_stop() {
+	OS::get_singleton()->tts_stop();
+}
+
+void _OS::tts_set_utterance_callback(TTSUtteranceEvent p_event, Object *p_object, String p_callback) {
+	OS::get_singleton()->tts_set_utterance_callback((OS::TTSUtteranceEvent)p_event, p_object, p_callback);
+}
+
 Point2 _OS::get_mouse_position() const {
 	return OS::get_singleton()->get_mouse_position();
 }
@@ -1260,6 +1296,18 @@ void _OS::_bind_methods() {
 	//ClassDB::bind_method(D_METHOD("is_video_mode_resizable","screen"),&_OS::is_video_mode_resizable,DEFVAL(0));
 	//ClassDB::bind_method(D_METHOD("get_fullscreen_mode_list","screen"),&_OS::get_fullscreen_mode_list,DEFVAL(0));
 
+	ClassDB::bind_method(D_METHOD("tts_is_speaking"), &_OS::tts_is_speaking);
+	ClassDB::bind_method(D_METHOD("tts_is_paused"), &_OS::tts_is_paused);
+	ClassDB::bind_method(D_METHOD("tts_get_voices"), &_OS::tts_get_voices);
+	ClassDB::bind_method(D_METHOD("tts_get_voices_for_language", "language"), &_OS::tts_get_voices_for_language);
+
+	ClassDB::bind_method(D_METHOD("tts_speak", "text", "voice", "volume", "pitch", "rate", "utterance_id", "interrupt"), &_OS::tts_speak, DEFVAL(50), DEFVAL(1.f), DEFVAL(1.f), DEFVAL(0), DEFVAL(false));
+	ClassDB::bind_method(D_METHOD("tts_pause"), &_OS::tts_pause);
+	ClassDB::bind_method(D_METHOD("tts_resume"), &_OS::tts_resume);
+	ClassDB::bind_method(D_METHOD("tts_stop"), &_OS::tts_stop);
+
+	ClassDB::bind_method(D_METHOD("tts_set_utterance_callback", "event", "object", "callback"), &_OS::tts_set_utterance_callback);
+
 	ClassDB::bind_method(D_METHOD("global_menu_add_item", "menu", "label", "id", "meta"), &_OS::global_menu_add_item);
 	ClassDB::bind_method(D_METHOD("global_menu_add_separator", "menu"), &_OS::global_menu_add_separator);
 	ClassDB::bind_method(D_METHOD("global_menu_remove_item", "menu", "idx"), &_OS::global_menu_remove_item);
@@ -1568,6 +1616,11 @@ void _OS::_bind_methods() {
 	BIND_ENUM_CONSTANT(POWERSTATE_NO_BATTERY);
 	BIND_ENUM_CONSTANT(POWERSTATE_CHARGING);
 	BIND_ENUM_CONSTANT(POWERSTATE_CHARGED);
+
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_STARTED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_ENDED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_CANCELED);
+	BIND_ENUM_CONSTANT(TTS_UTTERANCE_BOUNDARY);
 }
 
 _OS::_OS() {

@@ -152,11 +152,31 @@ class _OS : public Object {
 		OPENGL_CONTEXT, // HGLRC, X11::GLXContext, NSOpenGLContext*, EGLContext* ...
 	};
 
+	enum TTSUtteranceEvent {
+		TTS_UTTERANCE_STARTED,
+		TTS_UTTERANCE_ENDED,
+		TTS_UTTERANCE_CANCELED,
+		TTS_UTTERANCE_BOUNDARY,
+		TTS_UTTERANCE_MAX,
+	};
+
 	void global_menu_add_item(const String &p_menu, const String &p_label, const Variant &p_signal, const Variant &p_meta);
 	void global_menu_add_separator(const String &p_menu);
 	void global_menu_remove_item(const String &p_menu, int p_idx);
 	void global_menu_clear(const String &p_menu);
 
+	bool tts_is_speaking() const;
+	bool tts_is_paused() const;
+	Array tts_get_voices() const;
+	PoolStringArray tts_get_voices_for_language(const String &p_language) const;
+
+	void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	void tts_pause();
+	void tts_resume();
+	void tts_stop();
+
+	void tts_set_utterance_callback(TTSUtteranceEvent p_event, Object *p_object, String p_callback);
+
 	Point2 get_mouse_position() const;
 	void set_window_title(const String &p_title);
 	void set_window_mouse_passthrough(const PoolVector2Array &p_region);
@@ -413,6 +433,7 @@ VARIANT_ENUM_CAST(_OS::Month);
 VARIANT_ENUM_CAST(_OS::SystemDir);
 VARIANT_ENUM_CAST(_OS::ScreenOrientation);
 VARIANT_ENUM_CAST(_OS::HandleType);
+VARIANT_ENUM_CAST(_OS::TTSUtteranceEvent);
 
 class _Geometry : public Object {
 	GDCLASS(_Geometry, Object);

@@ -557,6 +557,75 @@ bool OS::can_use_threads() const {
 #endif
 }
 
+bool OS::tts_is_speaking() const {
+	WARN_PRINT("TTS is not supported by this platform.");
+	return false;
+}
+
+bool OS::tts_is_paused() const {
+	WARN_PRINT("TTS is not supported by this platform.");
+	return false;
+}
+
+void OS::tts_pause() {
+	WARN_PRINT("TTS is not supported by this platformr.");
+}
+
+void OS::tts_resume() {
+	WARN_PRINT("TTS is not supported by this platform.");
+}
+
+Array OS::tts_get_voices() const {
+	WARN_PRINT("TTS is not supported by this platform.");
+	return Array();
+}
+
+PoolStringArray OS::tts_get_voices_for_language(const String &p_language) const {
+	PoolStringArray ret;
+	Array voices = tts_get_voices();
+	for (int i = 0; i < voices.size(); i++) {
+		const Dictionary &voice = voices[i];
+		if (voice.has("id") && voice.has("language") && voice["language"].operator String().begins_with(p_language)) {
+			ret.push_back(voice["id"]);
+		}
+	}
+	return ret;
+}
+
+void OS::tts_speak(const String &p_text, const String &p_voice, int p_volume, float p_pitch, float p_rate, int p_utterance_id, bool p_interrupt) {
+	WARN_PRINT("TTS is not supported by this platform.");
+}
+
+void OS::tts_stop() {
+	WARN_PRINT("TTS is not supported by this platform.");
+}
+
+void OS::tts_set_utterance_callback(TTSUtteranceEvent p_event, Object *p_object, const StringName &p_callback) {
+	ERR_FAIL_INDEX(p_event, OS::TTS_UTTERANCE_MAX);
+	utterance_callback[p_event].object = p_object;
+	utterance_callback[p_event].cb_name = p_callback;
+}
+
+void OS::tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos) {
+	ERR_FAIL_INDEX(p_event, OS::TTS_UTTERANCE_MAX);
+	switch (p_event) {
+		case OS::TTS_UTTERANCE_STARTED:
+		case OS::TTS_UTTERANCE_ENDED:
+		case OS::TTS_UTTERANCE_CANCELED: {
+			if (utterance_callback[p_event].object != nullptr) {
+				utterance_callback[p_event].object->call_deferred(utterance_callback[p_event].cb_name, p_id);
+			}
+		} break;
+		case OS::TTS_UTTERANCE_BOUNDARY: {
+			if (utterance_callback[p_event].object != nullptr) {
+				utterance_callback[p_event].object->call_deferred(utterance_callback[p_event].cb_name, p_pos, p_id);
+			}
+		} break;
+		default:
+			break;
+	}
+}
+
 OS::MouseMode OS::get_mouse_mode() const {
 	return MOUSE_MODE_VISIBLE;
 }

@@ -123,6 +123,31 @@ class OS {
 		}
 	};
 
+	struct TTSUtterance {
+		String text;
+		String voice;
+		int volume = 50;
+		float pitch = 1.f;
+		float rate = 1.f;
+		int id = 0;
+	};
+
+	enum TTSUtteranceEvent {
+		TTS_UTTERANCE_STARTED,
+		TTS_UTTERANCE_ENDED,
+		TTS_UTTERANCE_CANCELED,
+		TTS_UTTERANCE_BOUNDARY,
+		TTS_UTTERANCE_MAX,
+	};
+
+private:
+	struct Callback {
+		Object *object = nullptr;
+		StringName cb_name;
+	};
+
+	Callback utterance_callback[TTS_UTTERANCE_MAX];
+
 protected:
 	friend class Main;
 
@@ -172,6 +197,20 @@ class OS {
 	virtual void set_mouse_mode(MouseMode p_mode);
 	virtual MouseMode get_mouse_mode() const;
 
+	virtual bool tts_is_speaking() const;
+	virtual bool tts_is_paused() const;
+	virtual Array tts_get_voices() const;
+
+	virtual PoolStringArray tts_get_voices_for_language(const String &p_language) const;
+
+	virtual void tts_speak(const String &p_text, const String &p_voice, int p_volume = 50, float p_pitch = 1.f, float p_rate = 1.f, int p_utterance_id = 0, bool p_interrupt = false);
+	virtual void tts_pause();
+	virtual void tts_resume();
+	virtual void tts_stop();
+
+	virtual void tts_set_utterance_callback(TTSUtteranceEvent p_event, Object *p_object, const StringName &p_callback);
+	virtual void tts_post_utterance_event(TTSUtteranceEvent p_event, int p_id, int p_pos = 0);
+
 	virtual void warp_mouse_position(const Point2 &p_to) {}
 	virtual Point2 get_mouse_position() const = 0;
 	virtual int get_mouse_button_state() const = 0;

@@ -1020,6 +1020,94 @@
 				[b]Note:[/b] This method is implemented on Android, iOS and UWP.
 			</description>
 		</method>
+		<method name="tts_get_voices" qualifiers="const">
+			<return type="Array" />
+			<description>
+				Returns an [Array] of voice information dictionaries.
+				Each [Dictionary] contains two [String] entries:
+				- [code]name[/code] is voice name.
+				- [code]id[/code] is voice identifier.
+				- [code]language[/code] is language code in [code]lang_Variant[/code] format. [code]lang[/code] part is a 2 or 3-letter code based on the ISO-639 standard, in lowercase. And [code]Variant[/code] part is an engine dependent string describing country, region or/and dialect.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_get_voices_for_language" qualifiers="const">
+			<return type="PoolStringArray" />
+			<argument index="0" name="language" type="String" />
+			<description>
+				Returns an [PoolStringArray] of voice identifiers for the [code]language[/code].
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_is_paused" qualifiers="const">
+			<return type="bool" />
+			<description>
+				Returns [code]true[/code] if the synthesizer is in a paused state.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_is_speaking" qualifiers="const">
+			<return type="bool" />
+			<description>
+				Returns [code]true[/code] if the synthesizer is generating speech, or have utterance waiting in the queue.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_pause">
+			<return type="void" />
+			<description>
+				Puts the synthesizer into a paused state.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_resume">
+			<return type="void" />
+			<description>
+				Resumes the synthesizer if it was paused.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_set_utterance_callback">
+			<return type="void" />
+			<argument index="0" name="event" type="int" enum="OS.TTSUtteranceEvent" />
+			<argument index="1" name="object" type="Object" />
+			<argument index="2" name="callback" type="String" />
+			<description>
+				Adds a callback, which is called when the utterance has started, finished, canceled or reached a text boundary.
+				- [code]TTS_UTTERANCE_STARTED[/code], [code]TTS_UTTERANCE_ENDED[/code], and [code]TTS_UTTERANCE_CANCELED[/code] callable's method should take one [int] parameter, the utterance id.
+				- [code]TTS_UTTERANCE_BOUNDARY[/code] callable's method should take two [int] parameters, the index of the character and the utterance id.
+				[b]Note:[/b] The granularity of the boundary callbacks is engine dependent.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_speak">
+			<return type="void" />
+			<argument index="0" name="text" type="String" />
+			<argument index="1" name="voice" type="String" />
+			<argument index="2" name="volume" type="int" default="50" />
+			<argument index="3" name="pitch" type="float" default="1.0" />
+			<argument index="4" name="rate" type="float" default="1.0" />
+			<argument index="5" name="utterance_id" type="int" default="0" />
+			<argument index="6" name="interrupt" type="bool" default="false" />
+			<description>
+				Adds an utterance to the queue. If [code]interrupt[/code] is [code]true[/code], the queue is cleared first.
+				- [code]voice[/code] identifier is one of the [code]"id"[/code] values returned by [method tts_get_voices] or one of the values returned by [method tts_get_voices_for_language].
+				- [code]volume[/code] ranges from [code]0[/code] (lowest) to [code]100[/code] (highest).
+				- [code]pitch[/code] ranges from [code]0.0[/code] (lowest) to [code]2.0[/code] (highest), [code]1.0[/code] is default pitch for the current voice.
+				- [code]rate[/code] ranges from [code]0.1[/code] (lowest) to [code]10.0[/code] (highest), [code]1.0[/code] is a normal speaking rate. Other values act as a percentage relative.
+				- [code]utterance_id[/code] is passed as a parameter to the callback functions.
+				[b]Note:[/b] On Windows and Linux, utterance [code]text[/code] can use SSML markup. SSML support is engine and voice dependent. If the engine does not support SSML, you should strip out all XML markup before calling [method tts_speak].
+				[b]Note:[/b] The granularity of pitch, rate, and volume is engine and voice dependent. Values may be truncated.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
+		<method name="tts_stop">
+			<return type="void" />
+			<description>
+				Stops synthesis in progress and removes all utterances from the queue.
+				[b]Note:[/b] This method is implemented on Android, iOS, HTML5, Linux, macOS, and Windows.
+			</description>
+		</method>
 	</methods>
 	<members>
 		<member name="clipboard" type="String" setter="set_clipboard" getter="get_clipboard" default="&quot;&quot;">
@@ -1246,5 +1334,17 @@
 		<constant name="POWERSTATE_CHARGED" value="4" enum="PowerState">
 			Plugged in, battery fully charged.
 		</constant>
+		<constant name="TTS_UTTERANCE_STARTED" value="0" enum="TTSUtteranceEvent">
+			Utterance has begun to be spoken.
+		</constant>
+		<constant name="TTS_UTTERANCE_ENDED" value="1" enum="TTSUtteranceEvent">
+			Utterance was successfully finished.
+		</constant>
+		<constant name="TTS_UTTERANCE_CANCELED" value="2" enum="TTSUtteranceEvent">
+			Utterance was canceled, or TTS service was unable to process it.
+		</constant>
+		<constant name="TTS_UTTERANCE_BOUNDARY" value="3" enum="TTSUtteranceEvent">
+			Utterance reached a word or sentence boundary.
+		</constant>
 	</constants>
 </class>
@@ -9,6 +9,7 @@ android_files = [
     "file_access_filesystem_jandroid.cpp",
     "audio_driver_opensl.cpp",
     "dir_access_jandroid.cpp",
+    "tts_android.cpp",
     "thread_jandroid.cpp",
     "net_socket_android.cpp",
     "java_godot_lib_jni.cpp",

@@ -38,6 +38,7 @@
 import org.godotengine.godot.io.file.FileAccessHandler;
 import org.godotengine.godot.plugin.GodotPlugin;
 import org.godotengine.godot.plugin.GodotPluginRegistry;
+import org.godotengine.godot.tts.GodotTTS;
 import org.godotengine.godot.utils.GodotNetUtils;
 import org.godotengine.godot.utils.PermissionsUtil;
 import org.godotengine.godot.xr.XRMode;
@@ -254,6 +255,7 @@ protected void instanceSingleton(SingletonBase s) {
 
 	public GodotIO io;
 	public GodotNetUtils netUtils;
+	public GodotTTS tts;
 
 	static SingletonBase[] singletons = new SingletonBase[MAX_SINGLETONS];
 	static int singleton_count = 0;
@@ -575,6 +577,7 @@ private void initializeGodot() {
 		final Activity activity = getActivity();
 		io = new GodotIO(activity);
 		netUtils = new GodotNetUtils(activity);
+		tts = new GodotTTS(activity);
 		Context context = getContext();
 		DirectoryAccessHandler directoryAccessHandler = new DirectoryAccessHandler(context);
 		FileAccessHandler fileAccessHandler = new FileAccessHandler(context);