Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade data_path for query_with_files #330

Merged
merged 6 commits into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions examples/stt_sensevoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,8 @@
chat = lazyllm.TrainableModule('SenseVoiceSmall')

if __name__ == '__main__':
# Note that audio is enabled here
lazyllm.WebModule(chat, port=8847, audio=True).start().wait()
# Note:
# 1. that audio is enabled here
# 2. If `files_target` is not set, then all modules can access the input file.
# If it is set, only the specified modules can access the input file.
lazyllm.WebModule(chat, port=8847, audio=True, files_target=chat).start().wait()
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion examples/tts_bark.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
m.name = "tts"

if __name__ == '__main__':
m.WebModule(
lazyllm.WebModule(
m,
port=8847,
components={
Expand Down
3 changes: 2 additions & 1 deletion lazyllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .flow import * # noqa F403
from .components import (LazyLLMDataprocBase, LazyLLMFinetuneBase, LazyLLMDeployBase,
LazyLLMValidateBase, register as component_register, Prompter,
AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter)
AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter, FileFormatter)

from .module import (ModuleBase, UrlModule, TrainableModule, ActionModule,
ServerModule, TrialModule, register as module_register,
Expand Down Expand Up @@ -37,6 +37,7 @@
'ChatPrompter',
'FastapiApp',
'JsonFormatter',
'FileFormatter',

# launcher
'LazyLLMLaunchersBase', # empty, slurm, sco
Expand Down
6 changes: 6 additions & 0 deletions lazyllm/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .common import package, kwargs, arguments, LazyLLMCMD, timeout, final, ReadOnlyWrapper, DynamicDescriptor
from .common import FlatList, Identity, ResultCollector, ArgsDict, CaseInsensitiveDict
from .common import ReprRule, make_repr, modify_repr
from .common import encode_query_with_filepaths, decode_query_with_filepaths, lazyllm_merge_query
from .common import once_flag, call_once, once_wrapper, singleton, reset_on_pickle
from .option import Option, OptionIter
from .threading import Thread, ThreadPoolExecutor
Expand Down Expand Up @@ -82,4 +83,9 @@

# file-system queue
'FileSystemQueue',

# query with file_path
'encode_query_with_filepaths',
'decode_query_with_filepaths',
'lazyllm_merge_query',
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
]
56 changes: 55 additions & 1 deletion lazyllm/common/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import re
import builtins
import json
import typing
from typing import Any, Callable
from typing import Any, Callable, List, Union
from contextlib import contextmanager
import copy
import threading
Expand Down Expand Up @@ -376,6 +377,59 @@ def get_instance(*args, **kwargs):
return instances[cls]
return get_instance

LAZYLLM_QUERY_PREFIX = 'lazyllm-query'
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved

def encode_query_with_filepaths(query: str = None, files: List[str] = None) -> str:
query = query if query else ''
query_with_docs = {'query': query, 'files': files}
if files:
assert isinstance(files, list), "files must be a list."
assert all(isinstance(item, str) for item in files), "All items in files must be strings"
return LAZYLLM_QUERY_PREFIX + json.dumps(query_with_docs)
else:
return query

def decode_query_with_filepaths(query_files: str) -> Union[dict, str]:
assert isinstance(query_files, str), "query_files must be a str."
query_files = query_files.strip()
if query_files.startswith(LAZYLLM_QUERY_PREFIX):
try:
obj = json.loads(query_files[len(LAZYLLM_QUERY_PREFIX):])
return obj
except json.JSONDecodeError as e:
raise ValueError(f"JSON parsing failed: {e}")
else:
return query_files

def lazyllm_merge_query(*args: str) -> str:
if len(args) == 1:
return args[0]
for item in args:
assert isinstance(item, str), "Merge object must be str!"
querys = ''
files = []
for item in args:
decode = decode_query_with_filepaths(item)
if isinstance(decode, dict):
querys += decode['query']
files.extend(decode['files'])
else:
querys += decode
return encode_query_with_filepaths(querys, files)

def _lazyllm_get_file_list(files: Any) -> list:
if isinstance(files, str):
decode = decode_query_with_filepaths(files)
if isinstance(decode, str):
return [decode]
if isinstance(decode, dict):
return decode['files']
elif isinstance(files, dict) and set(files.keys()) == {'query', 'files'}:
return files['files']
elif isinstance(files, list) and all(isinstance(item, str) for item in files):
return files
else:
raise TypeError(f'Not supported type: {type(files)}.')

def reset_on_pickle(*fields):
def decorator(cls):
Expand Down
3 changes: 2 additions & 1 deletion lazyllm/common/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ def __reduce__(self):

class Globals(object):
__global_attrs__ = ThreadSafeDict(chat_history={}, global_parameters={},
bind_args={}, tool_delimiter="<|tool_calls|>")
bind_args={}, tool_delimiter="<|tool_calls|>",
lazyllm_files={})

def __init__(self):
self.__data = ThreadSafeDict()
Expand Down
3 changes: 2 additions & 1 deletion lazyllm/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .validate import LazyLLMValidateBase
from .auto import AutoDeploy, AutoFinetune
from .utils import ModelManager
from .formatter import FormatterBase, EmptyFormatter, JsonFormatter
from .formatter import FormatterBase, EmptyFormatter, JsonFormatter, FileFormatter
from .stable_diffusion import StableDiffusionDeploy
from .text_to_speech import TTSDeploy, BarkDeploy, ChatTTSDeploy, MusicGenDeploy
from .speech_to_text import SenseVoiceDeploy
Expand All @@ -29,6 +29,7 @@
'FormatterBase',
'EmptyFormatter',
'JsonFormatter',
'FileFormatter',
'StableDiffusionDeploy',
'TTSDeploy',
'BarkDeploy',
Expand Down
1 change: 1 addition & 0 deletions lazyllm/components/finetune/llamafactory.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def cmd(self, trainset, valset=None) -> str:
self.temp_yaml_file = self.build_temp_yaml(updated_template_str)

cmds = f'llamafactory-cli train {self.temp_yaml_file}'
cmds += f' 2>&1 | tee {self.target_path}/llm_$(date +"%Y-%m-%d_%H-%M-%S").log'
if self.temp_export_yaml_file:
cmds += f' && llamafactory-cli export {self.temp_export_yaml_file}'
return cmds
4 changes: 3 additions & 1 deletion lazyllm/components/formatter/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .formatterbase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, EmptyFormatter
from .formatterbase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, \
EmptyFormatter, FileFormatter
from .jsonformatter import JsonFormatter
from .yamlformatter import YamlFormatter

Expand All @@ -9,4 +10,5 @@
'EmptyFormatter',
'JsonFormatter',
'YamlFormatter',
'FileFormatter',
]
32 changes: 31 additions & 1 deletion lazyllm/components/formatter/formatterbase.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from ...common import LazyLLMRegisterMetaClass, package
from ...common import LazyLLMRegisterMetaClass, package, \
decode_query_with_filepaths, encode_query_with_filepaths
from typing import Optional

def is_number(s: str):
Expand Down Expand Up @@ -108,3 +109,32 @@ class PythonFormatter(JsonLikeFormatter): pass
class EmptyFormatter(LazyLLMFormatterBase):
def _parse_py_data_by_formatter(self, msg: str):
return msg

class FileFormatter(LazyLLMFormatterBase):

def __init__(self, formatter: str = 'decode'):
self._mode = formatter.strip().lower()
assert self._mode in ('decode', 'encode')

def _parse_py_data_by_formatter(self, py_data):
if isinstance(py_data, package):
res = []
for i_data in py_data:
res.append(self._parse_py_data_by_formatter(i_data))
return package(res)
elif isinstance(py_data, (str, dict)):
return self._decode_one_data(py_data)
else:
return py_data

def _decode_one_data(self, py_data):
if self._mode == 'decode':
if isinstance(py_data, str):
return decode_query_with_filepaths(py_data)
else:
return py_data
else:
if isinstance(py_data, dict) and 'query' in py_data and 'files' in py_data:
return encode_query_with_filepaths(**py_data)
else:
return py_data
2 changes: 1 addition & 1 deletion lazyllm/components/speech_to_text/sense_voice.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __call__(self, string):
lazyllm.call_once(self.init_flag, self.load_stt)
if isinstance(string, dict):
if string['audio']:
string = string['audio'][0] if isinstance(string['audio'], list) else string['audio']
string = string['audio'][-1] if isinstance(string['audio'], list) else string['audio']
else:
string = string['inputs']
assert isinstance(string, str)
Expand Down
38 changes: 30 additions & 8 deletions lazyllm/components/stable_diffusion/stable_diffusion3.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import json
import base64
import uuid
from PIL import Image
import numpy as np
from io import BytesIO
Expand All @@ -11,13 +11,14 @@


class StableDiffusion3(object):
def __init__(self, base_sd, source=None, embed_batch_size=30, trust_remote_code=True, init=False):
def __init__(self, base_sd, source=None, embed_batch_size=30, trust_remote_code=True, save_path=None, init=False):
source = lazyllm.config['model_source'] if not source else source
self.base_sd = ModelManager(source).download(base_sd)
self.embed_batch_size = embed_batch_size
self.trust_remote_code = trust_remote_code
self.sd = None
self.init_flag = lazyllm.once_flag()
self.save_path = save_path if save_path else os.path.join(os.getcwd(), '.temp/sd3')
if init:
lazyllm.call_once(self.init_flag, self.load_sd)

Expand Down Expand Up @@ -45,6 +46,28 @@ def image_to_base64(image):
def images_to_base64(images):
return [StableDiffusion3.image_to_base64(img) for img in images]

@staticmethod
def image_to_file(image, file_path):
if isinstance(image, Image.Image):
image.save(file_path, format="PNG")
elif isinstance(image, np.ndarray):
image = Image.fromarray(image)
image.save(file_path, format="PNG")
else:
raise ValueError("Unsupported image type")

@staticmethod
def images_to_files(images, directory):
if not os.path.exists(directory):
os.makedirs(directory)
unique_id = uuid.uuid4()
path_list = []
for i, img in enumerate(images):
file_path = os.path.join(directory, f'image_{unique_id}_{i}.png')
StableDiffusion3.image_to_file(img, file_path)
path_list.append(file_path)
return path_list

def __call__(self, string):
lazyllm.call_once(self.init_flag, self.load_sd)
imgs = self.sd(
Expand All @@ -54,17 +77,16 @@ def __call__(self, string):
guidance_scale=7.0,
max_sequence_length=512,
).images
img_base64_list = StableDiffusion3.images_to_base64(imgs)
res = {"lazyllm_images": img_base64_list}
return json.dumps(res)
img_path_list = StableDiffusion3.images_to_files(imgs, self.save_path)
return lazyllm.encode_query_with_filepaths(files=img_path_list)

@classmethod
def rebuild(cls, base_sd, embed_batch_size, init):
return cls(base_sd, embed_batch_size=embed_batch_size, init=init)
def rebuild(cls, base_sd, embed_batch_size, init, save_path):
return cls(base_sd, embed_batch_size=embed_batch_size, init=init, save_path=save_path)

def __reduce__(self):
init = bool(os.getenv('LAZYLLM_ON_CLOUDPICKLE', None) == 'ON' or self.init_flag)
return StableDiffusion3.rebuild, (self.base_sd, self.embed_batch_size, init)
return StableDiffusion3.rebuild, (self.base_sd, self.embed_batch_size, init, self.save_path)

class StableDiffusionDeploy(object):
message_format = None
Expand Down
20 changes: 9 additions & 11 deletions lazyllm/components/text_to_speech/bark.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import os
import json

import lazyllm
from lazyllm import LOG
from lazyllm.thirdparty import torch
from lazyllm.thirdparty import transformers as tf
from ..utils.downloader import ModelManager
from .utils import sounds_to_files

class Bark(object):

def __init__(self, base_path, source=None, trust_remote_code=True, init=False):
def __init__(self, base_path, source=None, trust_remote_code=True, save_path=None, init=False):
source = lazyllm.config['model_source'] if not source else source
self.base_path = ModelManager(source).download(base_path)
self.trust_remote_code = trust_remote_code
self.processor, self.bark = None, None
self.init_flag = lazyllm.once_flag()
self.device = 'cpu'
self.save_path = save_path if save_path else os.path.join(os.getcwd(), '.temp/bark')
if init:
lazyllm.call_once(self.init_flag, self.load_bark)

Expand All @@ -38,20 +39,17 @@ def __call__(self, string):
else:
raise TypeError(f"Not support input type:{type(string)}, requires str or dict.")
inputs = self.processor(query, voice_preset=voice_preset).to(self.device)
speech = self.bark.generate(**inputs) * 32767
res = {'lazyllm_sounds': (
self.bark.generation_config.sample_rate,
speech.cpu().numpy().squeeze().tolist()
)}
return json.dumps(res)
speech = self.bark.generate(**inputs).cpu().numpy().squeeze()
file_path = sounds_to_files([speech], self.save_path, self.bark.generation_config.sample_rate)
return lazyllm.encode_query_with_filepaths(files=file_path)

@classmethod
def rebuild(cls, base_path, init):
return cls(base_path, init=init)
def rebuild(cls, base_path, init, save_path):
return cls(base_path, init=init, save_path=save_path)

def __reduce__(self):
init = bool(os.getenv('LAZYLLM_ON_CLOUDPICKLE', None) == 'ON' or self.init_flag)
return Bark.rebuild, (self.base_path, init)
return Bark.rebuild, (self.base_path, init, self.save_path)

class BarkDeploy(object):
keys_name_handle = {
Expand Down
15 changes: 8 additions & 7 deletions lazyllm/components/text_to_speech/chattts.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import os
import json

import lazyllm
from lazyllm import LOG
from lazyllm.thirdparty import torch, ChatTTS
from ..utils.downloader import ModelManager
from .utils import sounds_to_files


class ChatTTSModule(object):

def __init__(self, base_path, source=None, init=False):
def __init__(self, base_path, source=None, save_path=None, init=False):
source = lazyllm.config['model_source'] if not source else source
self.base_path = ModelManager(source).download(base_path)
self.model, self.spk = None, None
self.init_flag = lazyllm.once_flag()
self.device = 'cpu'
self.seed = 1024
self.save_path = save_path if save_path else os.path.join(os.getcwd(), '.temp/chattts')
if init:
lazyllm.call_once(self.init_flag, self.load_tts)

Expand Down Expand Up @@ -56,16 +57,16 @@ def __call__(self, string):
params_refine_text=params_refine_text,
params_infer_code=params_infer_code,
)
res = {'lazyllm_sounds': (24000, (speech[0].squeeze() * 32767).tolist())}
return json.dumps(res)
file_path = sounds_to_files(speech[0], self.save_path)
return lazyllm.encode_query_with_filepaths(files=file_path)

@classmethod
def rebuild(cls, base_path, init):
return cls(base_path, init=init)
def rebuild(cls, base_path, init, save_path):
return cls(base_path, init=init, save_path=save_path)

def __reduce__(self):
init = bool(os.getenv('LAZYLLM_ON_CLOUDPICKLE', None) == 'ON' or self.init_flag)
return ChatTTSModule.rebuild, (self.base_path, init)
return ChatTTSModule.rebuild, (self.base_path, init, self.save_path)

class ChatTTSDeploy(object):
keys_name_handle = {
Expand Down
Loading