From 68a9e191d7b00c424fe137390254a7a5c8e41db7 Mon Sep 17 00:00:00 2001 From: rany2 Date: Sun, 19 Jun 2022 21:06:55 +0300 Subject: [PATCH] drop custom SSML support --- README.md | 28 ++------------------ setup.cfg | 2 +- src/edge_tts/communicate.py | 53 +++++++++++++++---------------------- src/edge_tts/util.py | 7 ----- 4 files changed, 24 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index 59c2b38..4e20a61 100644 --- a/README.md +++ b/README.md @@ -61,35 +61,11 @@ You must first check the available voices with the `--list-voices` option: ### Custom SSML -It is possible to send Microsoft's text-to-speech servers a custom SSML document which would allow greater customization of the speech. - -Information about the SSML format can be found here on Microsoft's own website: https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup - -As a short example, if you want to apply the following SSML document and play it back using `edge-tts`. - -``` - - - - That'd be just amazing! - - - -``` - -It would be easiest to do the following: - -1. Create a file called `custom_ssml.xml` with the above content. -2. Run the following command: - - $ edge-tts --custom-ssml --file custom_ssml.xml --write-media amazing.mp3 - -3. Voila! +Support for custom SSML has been removed since 5.0.0 because Microsoft has taken the initiative to prevent it from working. You cannot use custom SSML anymore. ### Changing pitch, rate, volume, etc. -It is possible to make minor changes to the generated speech without resorting to custom SSML. However, you must note that you couldn't use the `--custom-ssml` option with the `--pitch`, `--rate`, `--volume`, etc. options. +It is possible to make minor changes to the generated speech. $ edge-tts --pitch=-10Hz --text "Hello, world!" --write-media hello_with_pitch_down.mp3 $ edge-tts --rate=0.5 --text "Hello, world!" --write-media hello_with_rate_halved.mp3 diff --git a/setup.cfg b/setup.cfg index 6107499..c1255d6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = edge-tts -version = 4.0.11 +version = 5.0.0 author = rany author_email = ranygh@riseup.net description = Microsoft Edge's TTS diff --git a/src/edge_tts/communicate.py b/src/edge_tts/communicate.py index 432ce39..e5ed6f7 100644 --- a/src/edge_tts/communicate.py +++ b/src/edge_tts/communicate.py @@ -213,7 +213,6 @@ async def run( pitch="+0Hz", rate="+0%", volume="+0%", - customspeak=False, proxy=None, ): """ @@ -223,11 +222,10 @@ async def run( messages (str or list): A list of SSML strings or a single text. boundery_type (int): The type of boundary to use. 0 for none, 1 for word_boundary, 2 for sentence_boundary. codec (str): The codec to use. - voice (str): The voice to use (only applicable to non-customspeak). - pitch (str): The pitch to use (only applicable to non-customspeak). - rate (str): The rate to use (only applicable to non-customspeak). - volume (str): The volume to use (only applicable to non-customspeak). - customspeak (bool): Whether to create the SSML or treat the messages as SSML. + voice (str): The voice to use. + pitch (str): The pitch to use. + rate (str): The rate to use. + volume (str): The volume to use. Yields: tuple: The subtitle offset, subtitle, and audio data. @@ -244,23 +242,19 @@ async def run( word_boundary = str(word_boundary).lower() - if not customspeak: - websocket_max_size = 2**16 - overhead_per_message = ( - len( - ssml_headers_plus_data( - connect_id(), self.date, mkssml("", voice, pitch, rate, volume) - ) + websocket_max_size = 2**16 + overhead_per_message = ( + len( + ssml_headers_plus_data( + connect_id(), self.date, mkssml("", voice, pitch, rate, volume) ) - + 50 - ) # margin of error - messages = split_text_by_byte_length( - escape(remove_incompatible_characters(messages)), - websocket_max_size - overhead_per_message, ) - else: - if isinstance(messages, str): - messages = [messages] + + 50 + ) # margin of error + messages = split_text_by_byte_length( + escape(remove_incompatible_characters(messages)), + websocket_max_size - overhead_per_message, + ) # Variables for the loop download = False @@ -307,18 +301,13 @@ async def run( # Send the request to the service. await websocket.send_str(request) # Send the message itself. - if not customspeak: - await websocket.send_str( - ssml_headers_plus_data( - connect_id(), - self.date, - mkssml(message, voice, pitch, rate, volume), - ) - ) - else: - await websocket.send_str( - ssml_headers_plus_data(connect_id(), self.date, message) + await websocket.send_str( + ssml_headers_plus_data( + connect_id(), + self.date, + mkssml(message, voice, pitch, rate, volume), ) + ) # Begin listening for the response. async for received in websocket: diff --git a/src/edge_tts/util.py b/src/edge_tts/util.py index 491e265..5581908 100644 --- a/src/edge_tts/util.py +++ b/src/edge_tts/util.py @@ -38,7 +38,6 @@ async def _tts(args): args.pitch, args.rate, args.volume, - customspeak=args.custom_ssml, proxy=args.proxy, ): if i[2] is not None: @@ -62,12 +61,6 @@ async def _main(): group = parser.add_mutually_exclusive_group(required=True) group.add_argument("-t", "--text", help="what TTS will say") group.add_argument("-f", "--file", help="same as --text but read from file") - parser.add_argument( - "-z", - "--custom-ssml", - help="treat text as ssml to send. For more info check https://bit.ly/3fIq13S", - action="store_true", - ) parser.add_argument( "-v", "--voice",