From b60db379d136cc5e10bef800fc9b510dcce35494 Mon Sep 17 00:00:00 2001
From: "suluyan.sly" <suluyan.sly@alibaba-inc.com>
Date: Thu, 4 Jan 2024 22:35:44 +0800
Subject: [PATCH 1/3] refactor/tool/image_gen

---
 modelscope_agent/agent.py                    |  2 +-
 modelscope_agent/tools/text_to_image_tool.py | 94 ++++----------------
 tests/tools/test_image_gen.py                | 38 ++++++++
 tests/utils.py                               | 30 +------
 4 files changed, 60 insertions(+), 104 deletions(-)
 create mode 100644 tests/tools/test_image_gen.py

diff --git a/modelscope_agent/agent.py b/modelscope_agent/agent.py
index 7aec9845..ed538606 100644
--- a/modelscope_agent/agent.py
+++ b/modelscope_agent/agent.py
@@ -61,7 +61,7 @@ def run(self, *args, **kwargs) -> Union[str, Iterator[str]]:
 
     @abstractmethod
     def _run(self, *args, **kwargs) -> Union[str, Iterator[str]]:
-        raise NotImplementedError
+        pass
 
     def _call_llm(self,
                   prompt: Optional[str] = None,
diff --git a/modelscope_agent/tools/text_to_image_tool.py b/modelscope_agent/tools/text_to_image_tool.py
index d65fb13f..c50162a6 100644
--- a/modelscope_agent/tools/text_to_image_tool.py
+++ b/modelscope_agent/tools/text_to_image_tool.py
@@ -5,61 +5,46 @@
 import dashscope
 import json
 from dashscope import ImageSynthesis
-from modelscope_agent.output_wrapper import ImageWrapper
 
-from modelscope.utils.constant import Tasks
-from .pipeline_tool import ModelscopePipelineTool
+from modelscope_agent.tools.base import BaseTool, register_tool
 
 
-class TextToImageTool(ModelscopePipelineTool):
-    default_model = 'AI-ModelScope/stable-diffusion-xl-base-1.0'
+@register_tool('image_gen')
+class TextToImageTool(BaseTool):
     description = 'AI绘画（图像生成）服务，输入文本描述和图像分辨率，返回根据文本信息绘制的图片URL。'
     name = 'image_gen'
     parameters: list = [{
         'name': 'text',
         'description': '详细描述了希望生成的图像具有什么内容，例如人物、环境、动作等细节描述',
         'required': True,
-        'schema': {
-            'type': 'string'
-        }
+        'type': 'string'
     }, {
         'name': 'resolution',
         'description':
         '格式是 数字*数字，表示希望生成的图像的分辨率大小，选项有[1024*1024, 720*1280, 1280*720]',
         'required': True,
-        'schema': {
-            'type': 'string'
-        }
+        'type': 'string'
     }]
-    model_revision = 'v1.0.0'
-    task = Tasks.text_to_image_synthesis
 
-    # def _remote_parse_input(self, *args, **kwargs):
-    #     params = {
-    #         'input': {
-    #             'text': kwargs['text'],
-    #             'resolution': kwargs['resolution']
-    #         }
-    #     }
-    #     if kwargs.get('seed', None):
-    #         params['input']['seed'] = kwargs['seed']
-    #     return params
+    def call(self, params: str, **kwargs) -> str:
+        params = self._verify_args(params)
+        if isinstance(params, str):
+            return 'Parameter Error'
 
-    def _remote_call(self, *args, **kwargs):
-
-        if ('resolution' in kwargs) and (kwargs['resolution'] in [
+        if params['resolution'] in [
                 '1024*1024', '720*1280', '1280*720'
-        ]):
-            resolution = kwargs['resolution']
+        ]:
+            resolution = params['resolution']
         else:
             resolution = '1280*720'
 
-        prompt = kwargs['text']
-        seed = kwargs.get('seed', None)
+        prompt = params['text']
         if prompt is None:
             return None
-        dashscope.api_key = os.getenv('DASHSCOPE_API_KEY')
+        seed = kwargs.get('seed', None)
         model = kwargs.get('model', 'wanx-v1')
+        dashscope.api_key = os.getenv('DASHSCOPE_API_KEY')
+
         response = ImageSynthesis.call(
             model=model,
             prompt=prompt,
@@ -67,49 +52,6 @@ def _remote_call(self, *args, **kwargs):
             size=resolution,
             steps=10,
             seed=seed)
-        final_result = self._parse_output(response, remote=True)
-        return final_result
-
-    def _local_parse_input(self, *args, **kwargs):
-
-        text = kwargs.pop('text', '')
-
-        parsed_args = ({'text': text}, )
+        image_url = response.output['results'][0]['url']
+        return image_url
 
-        return parsed_args, {}
-
-    def _parse_output(self, origin_result, remote=True):
-        if not remote:
-            image = cv2.cvtColor(origin_result['output_imgs'][0],
-                                 cv2.COLOR_BGR2RGB)
-        else:
-            image = origin_result.output['results'][0]['url']
-
-        return {'result': ImageWrapper(image)}
-
-    def _handle_input_fallback(self, **kwargs):
-        """
-        an alternative method is to parse image is that get item between { and }
-        for last try
-
-        :param fallback_text:
-        :return: language, cocde
-        """
-
-        text = kwargs.get('text', None)
-        fallback = kwargs.get('fallback', None)
-
-        if text:
-            return text
-        elif fallback:
-            try:
-                text = fallback
-                json_block = re.search(r'\{([\s\S]+)\}', text)  # noqa W^05
-                if json_block:
-                    result = json_block.group(1)
-                    result_json = json.loads('{' + result + '}')
-                    return result_json['text']
-            except ValueError:
-                return text
-        else:
-            return text
diff --git a/tests/tools/test_image_gen.py b/tests/tools/test_image_gen.py
new file mode 100644
index 00000000..9c68b20a
--- /dev/null
+++ b/tests/tools/test_image_gen.py
@@ -0,0 +1,38 @@
+from modelscope_agent.tools.text_to_image_tool import TextToImageTool
+from modelscope_agent.prompts.role_play import RolePlay  # NOQA
+from modelscope_agent.agent import Agent
+
+
+def test_image_gen():
+    params = """{'text': '画一只小猫', 'resolution': '1024*1024'}"""
+
+    t2i = TextToImageTool()
+    res = t2i.call(params)
+    assert(res.startswith("http"))
+
+
+def test_image_gen_wrong_resolution():
+    params = """{'text': '画一只小猫', 'resolution': '1024'}"""
+
+    t2i = TextToImageTool()
+    res = t2i.call(params)
+    assert(res.startswith("http"))
+
+def test_image_gen_role():
+    role_template = '你扮演一个画家，用尽可能丰富的描述调用工具绘制图像。'
+    
+    llm_config = {'model': 'qwen-max', 'model_server': 'dashscope'}
+    
+    # input tool args
+    function_list = [{'name': 'image_gen'}]
+    
+    bot = RolePlay(
+        function_list=function_list, llm=llm_config, instruction=role_template)
+    
+    response = bot.run('朝阳区天气怎样？')
+    
+    text = ''
+    for chunk in response:
+        text += chunk
+    print(text)
+
diff --git a/tests/utils.py b/tests/utils.py
index 13c1ccf5..b45d1b19 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,10 +1,8 @@
-from modelscope_agent.action_parser import ActionParser
-from modelscope_agent.llm import LLM
-from modelscope_agent.prompts import PromptGenerator
-from modelscope_agent.tools import Tool
+from agent_scope.llm import BaseChatModel
+from agent_scope.tools import Tool
 
 
-class MockLLM(LLM):
+class MockLLM(BaseChatModel):
 
     def __init__(self, responses=['mock_llm_response']):
         super().__init__({})
@@ -21,28 +19,6 @@ def stream_generate(self, prompt: str, function_list=[], **kwargs) -> str:
         yield 'mock llm response'
 
 
-class MockPromptGenerator(PromptGenerator):
-
-    def __init__(self):
-        super().__init__()
-
-
-class MockOutParser(ActionParser):
-
-    def __init__(self, action, args, count=1):
-        super().__init__()
-        self.action = action
-        self.args = args
-        self.count = count
-
-    def parse_response(self, response: str):
-        if self.count > 0:
-            self.count -= 1
-            return self.action, self.args
-        else:
-            return None, None
-
-
 class MockTool(Tool):
 
     def __init__(self, name, func, description, parameters=[]):

From fd6537e9e1341366e87211886f265479f1f587a6 Mon Sep 17 00:00:00 2001
From: "suluyan.sly" <suluyan.sly@alibaba-inc.com>
Date: Thu, 4 Jan 2024 22:44:46 +0800
Subject: [PATCH 2/3] fix pre-commit

---
 modelscope_agent/tools/text_to_image_tool.py |  6 +-----
 tests/tools/test_image_gen.py                | 19 ++++++++++---------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/modelscope_agent/tools/text_to_image_tool.py b/modelscope_agent/tools/text_to_image_tool.py
index c50162a6..f17b3fc6 100644
--- a/modelscope_agent/tools/text_to_image_tool.py
+++ b/modelscope_agent/tools/text_to_image_tool.py
@@ -5,7 +5,6 @@
 import dashscope
 import json
 from dashscope import ImageSynthesis
-
 from modelscope_agent.tools.base import BaseTool, register_tool
 
 
@@ -31,9 +30,7 @@ def call(self, params: str, **kwargs) -> str:
         if isinstance(params, str):
             return 'Parameter Error'
 
-        if params['resolution'] in [
-                '1024*1024', '720*1280', '1280*720'
-        ]:
+        if params['resolution'] in ['1024*1024', '720*1280', '1280*720']:
             resolution = params['resolution']
         else:
             resolution = '1280*720'
@@ -54,4 +51,3 @@ def call(self, params: str, **kwargs) -> str:
             seed=seed)
         image_url = response.output['results'][0]['url']
         return image_url
-
diff --git a/tests/tools/test_image_gen.py b/tests/tools/test_image_gen.py
index 9c68b20a..b76b401a 100644
--- a/tests/tools/test_image_gen.py
+++ b/tests/tools/test_image_gen.py
@@ -1,6 +1,7 @@
+from modelscope_agent.agent import Agent
 from modelscope_agent.tools.text_to_image_tool import TextToImageTool
+
 from modelscope_agent.prompts.role_play import RolePlay  # NOQA
-from modelscope_agent.agent import Agent
 
 
 def test_image_gen():
@@ -8,7 +9,7 @@ def test_image_gen():
 
     t2i = TextToImageTool()
     res = t2i.call(params)
-    assert(res.startswith("http"))
+    assert (res.startswith('http'))
 
 
 def test_image_gen_wrong_resolution():
@@ -16,23 +17,23 @@ def test_image_gen_wrong_resolution():
 
     t2i = TextToImageTool()
     res = t2i.call(params)
-    assert(res.startswith("http"))
+    assert (res.startswith('http'))
+
 
 def test_image_gen_role():
     role_template = '你扮演一个画家，用尽可能丰富的描述调用工具绘制图像。'
-    
+
     llm_config = {'model': 'qwen-max', 'model_server': 'dashscope'}
-    
+
     # input tool args
     function_list = [{'name': 'image_gen'}]
-    
+
     bot = RolePlay(
         function_list=function_list, llm=llm_config, instruction=role_template)
-    
+
     response = bot.run('朝阳区天气怎样？')
-    
+
     text = ''
     for chunk in response:
         text += chunk
     print(text)
-

From 4689856d6e34d007286cd0491017d356193be285 Mon Sep 17 00:00:00 2001
From: "suluyan.sly" <suluyan.sly@alibaba-inc.com>
Date: Fri, 5 Jan 2024 09:17:54 +0800
Subject: [PATCH 3/3] revert modelscope_agent/agent.py

---
 modelscope_agent/agent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modelscope_agent/agent.py b/modelscope_agent/agent.py
index ed538606..7aec9845 100644
--- a/modelscope_agent/agent.py
+++ b/modelscope_agent/agent.py
@@ -61,7 +61,7 @@ def run(self, *args, **kwargs) -> Union[str, Iterator[str]]:
 
     @abstractmethod
     def _run(self, *args, **kwargs) -> Union[str, Iterator[str]]:
-        pass
+        raise NotImplementedError
 
     def _call_llm(self,
                   prompt: Optional[str] = None,