LazyAGI · wzh1994 · Nov 4, 2024 · Oct 30, 2024 · Oct 30, 2024 · Oct 30, 2024
diff --git a/examples/stt_sensevoice.py b/examples/stt_sensevoice.py
@@ -16,5 +16,8 @@
 chat = lazyllm.TrainableModule('SenseVoiceSmall')
 
 if __name__ == '__main__':
-    # Note that audio is enabled here
-    lazyllm.WebModule(chat, port=8847, audio=True).start().wait()
+    # Note:
+    # 1. that audio is enabled here
+    # 2. If `files_target` is not set, then all modules can access the input file.
+    #    If it is set, only the specified modules can access the input file.
+    lazyllm.WebModule(chat, port=8847, audio=True, files_target=chat).start().wait()
diff --git a/examples/tts_bark.py b/examples/tts_bark.py
@@ -15,7 +15,7 @@
 m.name = "tts"
 
 if __name__ == '__main__':
-    m.WebModule(
+    lazyllm.WebModule(
         m,
         port=8847,
         components={

diff --git a/lazyllm/__init__.py b/lazyllm/__init__.py
@@ -7,7 +7,7 @@
 from .flow import *  # noqa F403
 from .components import (LazyLLMDataprocBase, LazyLLMFinetuneBase, LazyLLMDeployBase,
                          LazyLLMValidateBase, register as component_register, Prompter,
-                         AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter)
+                         AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter, FileFormatter)
 
 from .module import (ModuleBase, UrlModule, TrainableModule, ActionModule,
                      ServerModule, TrialModule, register as module_register,
@@ -37,6 +37,7 @@
     'ChatPrompter',
     'FastapiApp',
     'JsonFormatter',
+    'FileFormatter',
 
     # launcher
     'LazyLLMLaunchersBase',        # empty, slurm, sco

diff --git a/lazyllm/common/__init__.py b/lazyllm/common/__init__.py
@@ -2,6 +2,7 @@
 from .common import package, kwargs, arguments, LazyLLMCMD, timeout, final, ReadOnlyWrapper, DynamicDescriptor
 from .common import FlatList, Identity, ResultCollector, ArgsDict, CaseInsensitiveDict
 from .common import ReprRule, make_repr, modify_repr
+from .common import encode_query_with_filepaths, decode_query_with_filepaths, lazyllm_merge_query
 from .common import once_flag, call_once, once_wrapper, singleton, reset_on_pickle
 from .option import Option, OptionIter
 from .threading import Thread, ThreadPoolExecutor
@@ -82,4 +83,9 @@
 
     # file-system queue
     'FileSystemQueue',
+
+    # query with file_path
+    'encode_query_with_filepaths',
+    'decode_query_with_filepaths',
+    'lazyllm_merge_query',
 ]
diff --git a/lazyllm/common/common.py b/lazyllm/common/common.py
@@ -1,7 +1,8 @@
 import re
 import builtins
+import json
 import typing
-from typing import Any, Callable
+from typing import Any, Callable, List, Union
 from contextlib import contextmanager
 import copy
 import threading
@@ -376,6 +377,59 @@ def get_instance(*args, **kwargs):
         return instances[cls]
     return get_instance
 
+LAZYLLM_QUERY_PREFIX = 'lazyllm-query'
+
+def encode_query_with_filepaths(query: str = None, files: List[str] = None) -> str:
+    query = query if query else ''
+    query_with_docs = {'query': query, 'files': files}
+    if files:
+        assert isinstance(files, list), "files must be a list."
+        assert all(isinstance(item, str) for item in files), "All items in files must be strings"
+        return LAZYLLM_QUERY_PREFIX + json.dumps(query_with_docs)
+    else:
+        return query
+
+def decode_query_with_filepaths(query_files: str) -> Union[dict, str]:
+    assert isinstance(query_files, str), "query_files must be a str."
+    query_files = query_files.strip()
+    if query_files.startswith(LAZYLLM_QUERY_PREFIX):
+        try:
+            obj = json.loads(query_files[len(LAZYLLM_QUERY_PREFIX):])
+            return obj
+        except json.JSONDecodeError as e:
+            raise ValueError(f"JSON parsing failed: {e}")
+    else:
+        return query_files
+
+def lazyllm_merge_query(*args: str) -> str:
+    if len(args) == 1:
+        return args[0]
+    for item in args:
+        assert isinstance(item, str), "Merge object must be str!"
+    querys = ''
+    files = []
+    for item in args:
+        decode = decode_query_with_filepaths(item)
+        if isinstance(decode, dict):
+            querys += decode['query']
+            files.extend(decode['files'])
+        else:
+            querys += decode
+    return encode_query_with_filepaths(querys, files)
+
+def _lazyllm_get_file_list(files: Any) -> list:
+    if isinstance(files, str):
+        decode = decode_query_with_filepaths(files)
+        if isinstance(decode, str):
+            return [decode]
+        if isinstance(decode, dict):
+            return decode['files']
+    elif isinstance(files, dict) and set(files.keys()) == {'query', 'files'}:
+        return files['files']
+    elif isinstance(files, list) and all(isinstance(item, str) for item in files):
+        return files
+    else:
+        raise TypeError(f'Not supported type: {type(files)}.')
 
 def reset_on_pickle(*fields):
     def decorator(cls):

diff --git a/lazyllm/common/globals.py b/lazyllm/common/globals.py
@@ -122,7 +122,8 @@ def __reduce__(self):
 
 class Globals(object):
     __global_attrs__ = ThreadSafeDict(chat_history={}, global_parameters={},
-                                      bind_args={}, tool_delimiter="<|tool_calls|>")
+                                      bind_args={}, tool_delimiter="<|tool_calls|>",
+                                      lazyllm_files={})
 
     def __init__(self):
         self.__data = ThreadSafeDict()

diff --git a/lazyllm/components/__init__.py b/lazyllm/components/__init__.py
@@ -8,7 +8,7 @@
 from .validate import LazyLLMValidateBase
 from .auto import AutoDeploy, AutoFinetune
 from .utils import ModelManager
-from .formatter import FormatterBase, EmptyFormatter, JsonFormatter
+from .formatter import FormatterBase, EmptyFormatter, JsonFormatter, FileFormatter
 from .stable_diffusion import StableDiffusionDeploy
 from .text_to_speech import TTSDeploy, BarkDeploy, ChatTTSDeploy, MusicGenDeploy
 from .speech_to_text import SenseVoiceDeploy
@@ -29,6 +29,7 @@
     'FormatterBase',
     'EmptyFormatter',
     'JsonFormatter',
+    'FileFormatter',
     'StableDiffusionDeploy',
     'TTSDeploy',
     'BarkDeploy',

diff --git a/lazyllm/components/finetune/llamafactory.py b/lazyllm/components/finetune/llamafactory.py
@@ -145,6 +145,7 @@ def cmd(self, trainset, valset=None) -> str:
         self.temp_yaml_file = self.build_temp_yaml(updated_template_str)
 
         cmds = f'llamafactory-cli train {self.temp_yaml_file}'
+        cmds += f' 2>&1 | tee {self.target_path}/llm_$(date +"%Y-%m-%d_%H-%M-%S").log'
         if self.temp_export_yaml_file:
             cmds += f' && llamafactory-cli export {self.temp_export_yaml_file}'
         return cmds
diff --git a/lazyllm/components/formatter/__init__.py b/lazyllm/components/formatter/__init__.py
@@ -1,4 +1,5 @@
-from .formatterbase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, EmptyFormatter
+from .formatterbase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, \
+    EmptyFormatter, FileFormatter
 from .jsonformatter import JsonFormatter
 from .yamlformatter import YamlFormatter
 
@@ -9,4 +10,5 @@
     'EmptyFormatter',
     'JsonFormatter',
     'YamlFormatter',
+    'FileFormatter',
 ]
diff --git a/lazyllm/components/formatter/formatterbase.py b/lazyllm/components/formatter/formatterbase.py
@@ -1,4 +1,5 @@
-from ...common import LazyLLMRegisterMetaClass, package
+from ...common import LazyLLMRegisterMetaClass, package, \
+    decode_query_with_filepaths, encode_query_with_filepaths
 from typing import Optional
 
 def is_number(s: str):
@@ -108,3 +109,32 @@ class PythonFormatter(JsonLikeFormatter): pass
 class EmptyFormatter(LazyLLMFormatterBase):
     def _parse_py_data_by_formatter(self, msg: str):
         return msg
+
+class FileFormatter(LazyLLMFormatterBase):
+
+    def __init__(self, formatter: str = 'decode'):
+        self._mode = formatter.strip().lower()
+        assert self._mode in ('decode', 'encode')
+
+    def _parse_py_data_by_formatter(self, py_data):
+        if isinstance(py_data, package):
+            res = []
+            for i_data in py_data:
+                res.append(self._parse_py_data_by_formatter(i_data))
+            return package(res)
+        elif isinstance(py_data, (str, dict)):
+            return self._decode_one_data(py_data)
+        else:
+            return py_data
+
+    def _decode_one_data(self, py_data):
+        if self._mode == 'decode':
+            if isinstance(py_data, str):
+                return decode_query_with_filepaths(py_data)
+            else:
+                return py_data
+        else:
+            if isinstance(py_data, dict) and 'query' in py_data and 'files' in py_data:
+                return encode_query_with_filepaths(**py_data)
+            else:
+                return py_data
diff --git a/lazyllm/components/speech_to_text/sense_voice.py b/lazyllm/components/speech_to_text/sense_voice.py
@@ -39,7 +39,7 @@ def __call__(self, string):
         lazyllm.call_once(self.init_flag, self.load_stt)
         if isinstance(string, dict):
             if string['audio']:
-                string = string['audio'][0] if isinstance(string['audio'], list) else string['audio']
+                string = string['audio'][-1] if isinstance(string['audio'], list) else string['audio']
             else:
                 string = string['inputs']
         assert isinstance(string, str)

diff --git a/lazyllm/components/stable_diffusion/stable_diffusion3.py b/lazyllm/components/stable_diffusion/stable_diffusion3.py
@@ -1,6 +1,6 @@
 import os
-import json
 import base64
+import uuid
 from PIL import Image
 import numpy as np
 from io import BytesIO
@@ -11,13 +11,14 @@
 
 
 class StableDiffusion3(object):
-    def __init__(self, base_sd, source=None, embed_batch_size=30, trust_remote_code=True, init=False):
+    def __init__(self, base_sd, source=None, embed_batch_size=30, trust_remote_code=True, save_path=None, init=False):
         source = lazyllm.config['model_source'] if not source else source
         self.base_sd = ModelManager(source).download(base_sd)
         self.embed_batch_size = embed_batch_size
         self.trust_remote_code = trust_remote_code
         self.sd = None
         self.init_flag = lazyllm.once_flag()
+        self.save_path = save_path if save_path else os.path.join(os.getcwd(), '.temp/sd3')
         if init:
             lazyllm.call_once(self.init_flag, self.load_sd)
 
@@ -45,6 +46,28 @@ def image_to_base64(image):
     def images_to_base64(images):
         return [StableDiffusion3.image_to_base64(img) for img in images]
 
+    @staticmethod
+    def image_to_file(image, file_path):
+        if isinstance(image, Image.Image):
+            image.save(file_path, format="PNG")
+        elif isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
+            image.save(file_path, format="PNG")
+        else:
+            raise ValueError("Unsupported image type")
+
+    @staticmethod
+    def images_to_files(images, directory):
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+        unique_id = uuid.uuid4()
+        path_list = []
+        for i, img in enumerate(images):
+            file_path = os.path.join(directory, f'image_{unique_id}_{i}.png')
+            StableDiffusion3.image_to_file(img, file_path)
+            path_list.append(file_path)
+        return path_list
+
     def __call__(self, string):
         lazyllm.call_once(self.init_flag, self.load_sd)
         imgs = self.sd(
@@ -54,17 +77,16 @@ def __call__(self, string):
             guidance_scale=7.0,
             max_sequence_length=512,
         ).images
-        img_base64_list = StableDiffusion3.images_to_base64(imgs)
-        res = {"lazyllm_images": img_base64_list}
-        return json.dumps(res)
+        img_path_list = StableDiffusion3.images_to_files(imgs, self.save_path)
+        return lazyllm.encode_query_with_filepaths(files=img_path_list)
 
     @classmethod
-    def rebuild(cls, base_sd, embed_batch_size, init):
-        return cls(base_sd, embed_batch_size=embed_batch_size, init=init)
+    def rebuild(cls, base_sd, embed_batch_size, init, save_path):
+        return cls(base_sd, embed_batch_size=embed_batch_size, init=init, save_path=save_path)
 
     def __reduce__(self):
         init = bool(os.getenv('LAZYLLM_ON_CLOUDPICKLE', None) == 'ON' or self.init_flag)
-        return StableDiffusion3.rebuild, (self.base_sd, self.embed_batch_size, init)
+        return StableDiffusion3.rebuild, (self.base_sd, self.embed_batch_size, init, self.save_path)
 
 class StableDiffusionDeploy(object):
     message_format = None

diff --git a/lazyllm/components/text_to_speech/bark.py b/lazyllm/components/text_to_speech/bark.py
@@ -1,21 +1,22 @@
 import os
-import json
 
 import lazyllm
 from lazyllm import LOG
 from lazyllm.thirdparty import torch
 from lazyllm.thirdparty import transformers as tf
 from ..utils.downloader import ModelManager
+from .utils import sounds_to_files
 
 class Bark(object):
 
-    def __init__(self, base_path, source=None, trust_remote_code=True, init=False):
+    def __init__(self, base_path, source=None, trust_remote_code=True, save_path=None, init=False):
         source = lazyllm.config['model_source'] if not source else source
         self.base_path = ModelManager(source).download(base_path)
         self.trust_remote_code = trust_remote_code
         self.processor, self.bark = None, None
         self.init_flag = lazyllm.once_flag()
         self.device = 'cpu'
+        self.save_path = save_path if save_path else os.path.join(os.getcwd(), '.temp/bark')
         if init:
             lazyllm.call_once(self.init_flag, self.load_bark)
 
@@ -38,20 +39,17 @@ def __call__(self, string):
         else:
             raise TypeError(f"Not support input type:{type(string)}, requires str or dict.")
         inputs = self.processor(query, voice_preset=voice_preset).to(self.device)
-        speech = self.bark.generate(**inputs) * 32767
-        res = {'lazyllm_sounds': (
-            self.bark.generation_config.sample_rate,
-            speech.cpu().numpy().squeeze().tolist()
-        )}
-        return json.dumps(res)
+        speech = self.bark.generate(**inputs).cpu().numpy().squeeze()
+        file_path = sounds_to_files([speech], self.save_path, self.bark.generation_config.sample_rate)
+        return lazyllm.encode_query_with_filepaths(files=file_path)
 
     @classmethod
-    def rebuild(cls, base_path, init):
-        return cls(base_path, init=init)
+    def rebuild(cls, base_path, init, save_path):
+        return cls(base_path, init=init, save_path=save_path)
 
     def __reduce__(self):
         init = bool(os.getenv('LAZYLLM_ON_CLOUDPICKLE', None) == 'ON' or self.init_flag)
-        return Bark.rebuild, (self.base_path, init)
+        return Bark.rebuild, (self.base_path, init, self.save_path)
 
 class BarkDeploy(object):
     keys_name_handle = {

diff --git a/lazyllm/components/text_to_speech/chattts.py b/lazyllm/components/text_to_speech/chattts.py
@@ -1,21 +1,22 @@
 import os
-import json
 
 import lazyllm
 from lazyllm import LOG
 from lazyllm.thirdparty import torch, ChatTTS
 from ..utils.downloader import ModelManager
+from .utils import sounds_to_files
 
 
 class ChatTTSModule(object):
 
-    def __init__(self, base_path, source=None, init=False):
+    def __init__(self, base_path, source=None, save_path=None, init=False):
         source = lazyllm.config['model_source'] if not source else source
         self.base_path = ModelManager(source).download(base_path)
         self.model, self.spk = None, None
         self.init_flag = lazyllm.once_flag()
         self.device = 'cpu'
         self.seed = 1024
+        self.save_path = save_path if save_path else os.path.join(os.getcwd(), '.temp/chattts')
         if init:
             lazyllm.call_once(self.init_flag, self.load_tts)
 
@@ -56,16 +57,16 @@ def __call__(self, string):
                                   params_refine_text=params_refine_text,
                                   params_infer_code=params_infer_code,
                                 )
-        res = {'lazyllm_sounds': (24000, (speech[0].squeeze() * 32767).tolist())}
-        return json.dumps(res)
+        file_path = sounds_to_files(speech[0], self.save_path)
+        return lazyllm.encode_query_with_filepaths(files=file_path)
 
     @classmethod
-    def rebuild(cls, base_path, init):
-        return cls(base_path, init=init)
+    def rebuild(cls, base_path, init, save_path):
+        return cls(base_path, init=init, save_path=save_path)
 
     def __reduce__(self):
         init = bool(os.getenv('LAZYLLM_ON_CLOUDPICKLE', None) == 'ON' or self.init_flag)
-        return ChatTTSModule.rebuild, (self.base_path, init)
+        return ChatTTSModule.rebuild, (self.base_path, init, self.save_path)
 
 class ChatTTSDeploy(object):
     keys_name_handle = {