Skip to content

Commit

Permalink
Support bark for TTS (LazyAGI#100)
Browse files Browse the repository at this point in the history
  • Loading branch information
JingofXin authored Aug 1, 2024
1 parent aa9c115 commit 2249571
Show file tree
Hide file tree
Showing 7 changed files with 101 additions and 7 deletions.
2 changes: 1 addition & 1 deletion lazyllm/common/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __getattr__(self, __name: str) -> Any:
raise AttributeError(f'Attr {__name} not found in globals')

def clear(self):
self.__data.pop(self._sid)
self.__data.pop(self._sid, None)

def _clear_all(self):
self.__data.clear()
Expand Down
3 changes: 3 additions & 0 deletions lazyllm/components/auto/autodeploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .auto_helper import model_map, get_model_name, check_requirements
from lazyllm.components.embedding.embed import EmbeddingDeploy
from lazyllm.components.stable_diffusion.stable_diffusion3 import StableDiffusionDeploy
from lazyllm.components.text_to_speech.bark import BarkDeploy
from ..utils.downloader import ModelManager

class AutoDeploy(LazyLLMDeployBase):
Expand All @@ -20,6 +21,8 @@ def __new__(cls, base_model, source=lazyllm.config['model_source'], trust_remote
return EmbeddingDeploy(trust_remote_code, launcher)
elif type == 'sd' or ModelManager.get_model_type(model_name) == 'sd':
return StableDiffusionDeploy(launcher)
elif type == 'tts' or ModelManager.get_model_type(model_name) == 'tts':
return BarkDeploy(launcher)
map_name = model_map(model_name)
candidates = get_configer().query_deploy(lazyllm.config['gpu_type'], launcher.ngpus,
map_name, max_token_num)
Expand Down
Empty file.
70 changes: 70 additions & 0 deletions lazyllm/components/text_to_speech/bark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import os
import json

import lazyllm
from lazyllm import LOG
from lazyllm.thirdparty import torch
from lazyllm.thirdparty import transformers as tf
from ..utils.downloader import ModelManager

class Bark(object):

def __init__(self, base_sd, source=None, trust_remote_code=True, init=False):
source = lazyllm.config['model_source'] if not source else source
self.base_sd = ModelManager(source).download(base_sd)
self.trust_remote_code = trust_remote_code
self.processor, self.bark = None, None
self.init_flag = lazyllm.once_flag()
self.device = 'cpu'
if init:
lazyllm.call_once(self.init_flag, self.load_bark)

def load_bark(self):
self.device = "cuda" if torch.cuda.is_available() else "cpu"
self.processor = tf.AutoProcessor.from_pretrained(self.base_sd)
self.processor.speaker_embeddings['repo_or_path'] = self.base_sd
self.bark = tf.BarkModel.from_pretrained(self.base_sd,
torch_dtype=torch.float16,
attn_implementation="flash_attention_2").to(self.device)

def __call__(self, string):
lazyllm.call_once(self.init_flag, self.load_bark)
if isinstance(string, str):
query = string
voice_preset = "v2/zh_speaker_9"
elif isinstance(string, dict):
query = string['inputs']
voice_preset = string['voice_preset']
else:
raise TypeError(f"Not support input type:{type(string)}, requires str or dict.")
inputs = self.processor(query, voice_preset=voice_preset).to(self.device)
speech = self.bark.generate(**inputs) * 32767
res = {'sounds': (
self.bark.generation_config.sample_rate,
speech.cpu().numpy().squeeze().tolist()
)}
return json.dumps(res)


class BarkDeploy(object):
keys_name_handle = {
'inputs': 'inputs',
}
message_format = {
'inputs': 'Who are you ?',
'voice_preset': None,
}
default_headers = {'Content-Type': 'application/json'}

def __init__(self, launcher=None):
self.launcher = launcher

def __call__(self, finetuned_model=None, base_model=None):
if not os.path.exists(finetuned_model) or \
not any(filename.endswith('.bin') or filename.endswith('.safetensors')
for _, _, filename in os.walk(finetuned_model) if filename):
if not finetuned_model:
LOG.warning(f"Note! That finetuned_model({finetuned_model}) is an invalid path, "
f"base_model({base_model}) will be used")
finetuned_model = base_model
return lazyllm.deploy.RelayServer(func=Bark(finetuned_model), launcher=self.launcher)()
6 changes: 5 additions & 1 deletion lazyllm/components/utils/downloader/model_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,11 @@
"stable-diffusion-3-medium": {
"source": {"huggingface": "stabilityai/stable-diffusion-3-medium", "modelscope": "AI-ModelScope/stable-diffusion-3-medium-diffusers"},
"type": "sd",
},
},
"bark":{
"source": {"huggingface": "suno/bark", "modelscope": "mapjack/bark"},
"type": "tts",
},
"llava-1.5-7b": {
"source": {"huggingface": "llava-hf/llava-1.5-7b-hf", "modelscope": "huangjintao/llava-1.5-7b-hf"},
"type": "vlm",
Expand Down
21 changes: 16 additions & 5 deletions lazyllm/tools/webpages/webmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from PIL import Image
from io import BytesIO
from types import GeneratorType
import numpy as np

import lazyllm
from lazyllm import LOG, globals
Expand Down Expand Up @@ -93,6 +94,9 @@ def init_web(self, component_descs):
for _, gname, name, ctype, value in component_descs:
if ctype in ('Checkbox', 'Text'):
components.append(getattr(gr, ctype)(interactive=True, value=value, label=f'{gname}.{name}'))
elif ctype == 'Dropdown':
components.append(getattr(gr, ctype)(interactive=True, choices=value,
label=f'{gname}.{name}'))
else:
raise KeyError(f'invalid component type: {ctype}')
with gr.Row():
Expand Down Expand Up @@ -262,7 +266,11 @@ def get_log_and_message(s):
elif 'images_base64' in r:
image_data = r.pop('images_base64')[0]
image = Image.open(BytesIO(base64.b64decode(image_data)))
return "The image is: ", "".join(log_history), image
return "The image is: ", "".join(log_history), {'img': image}
elif 'sounds' in r:
sound_data = r.pop('sounds')
sound_data = (sound_data[0], np.array(sound_data[1]).astype(np.int16))
return "The Audio is: ", "".join(log_history), {'audio': sound_data}
else:
s = s
except (ValueError, KeyError, TypeError):
Expand All @@ -272,11 +280,14 @@ def get_log_and_message(s):
return s, "".join(log_history), None

log_history = []
image = None
file = None
if isinstance(result, (str, dict)):
result, log, image = get_log_and_message(result)
if image:
chat_history[-1][1] = gr.Image(image)
result, log, file = get_log_and_message(result)
if file:
if 'img' in file:
chat_history[-1][1] = gr.Image(file['img'])
if 'audio' in file:
chat_history[-1][1] = gr.Audio(file['audio'])
elif isinstance(result, str):
chat_history[-1][1] = result
elif isinstance(result, GeneratorType):
Expand Down
6 changes: 6 additions & 0 deletions tests/advanced_tests/test_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ def test_sd3(self):
res = m('a little cat')
assert "images_base64" in json.loads(res)

def test_bark(self):
m = lazyllm.TrainableModule('bark')
m.update_server()
res = m('你好啊,很高兴认识你。')
assert "sounds" in json.loads(res)

def test_vlm_and_lmdeploy(self):
chat = lazyllm.TrainableModule('internvl-chat-2b-v1-5').deploy_method(deploy.LMDeploy)
m = lazyllm.ServerModule(chat)
Expand Down

0 comments on commit 2249571

Please sign in to comment.