From b60db379d136cc5e10bef800fc9b510dcce35494 Mon Sep 17 00:00:00 2001 From: "suluyan.sly" Date: Thu, 4 Jan 2024 22:35:44 +0800 Subject: [PATCH 1/3] refactor/tool/image_gen --- modelscope_agent/agent.py | 2 +- modelscope_agent/tools/text_to_image_tool.py | 94 ++++---------------- tests/tools/test_image_gen.py | 38 ++++++++ tests/utils.py | 30 +------ 4 files changed, 60 insertions(+), 104 deletions(-) create mode 100644 tests/tools/test_image_gen.py diff --git a/modelscope_agent/agent.py b/modelscope_agent/agent.py index 7aec9845..ed538606 100644 --- a/modelscope_agent/agent.py +++ b/modelscope_agent/agent.py @@ -61,7 +61,7 @@ def run(self, *args, **kwargs) -> Union[str, Iterator[str]]: @abstractmethod def _run(self, *args, **kwargs) -> Union[str, Iterator[str]]: - raise NotImplementedError + pass def _call_llm(self, prompt: Optional[str] = None, diff --git a/modelscope_agent/tools/text_to_image_tool.py b/modelscope_agent/tools/text_to_image_tool.py index d65fb13f..c50162a6 100644 --- a/modelscope_agent/tools/text_to_image_tool.py +++ b/modelscope_agent/tools/text_to_image_tool.py @@ -5,61 +5,46 @@ import dashscope import json from dashscope import ImageSynthesis -from modelscope_agent.output_wrapper import ImageWrapper -from modelscope.utils.constant import Tasks -from .pipeline_tool import ModelscopePipelineTool +from modelscope_agent.tools.base import BaseTool, register_tool -class TextToImageTool(ModelscopePipelineTool): - default_model = 'AI-ModelScope/stable-diffusion-xl-base-1.0' +@register_tool('image_gen') +class TextToImageTool(BaseTool): description = 'AI绘画(图像生成)服务,输入文本描述和图像分辨率,返回根据文本信息绘制的图片URL。' name = 'image_gen' parameters: list = [{ 'name': 'text', 'description': '详细描述了希望生成的图像具有什么内容,例如人物、环境、动作等细节描述', 'required': True, - 'schema': { - 'type': 'string' - } + 'type': 'string' }, { 'name': 'resolution', 'description': '格式是 数字*数字,表示希望生成的图像的分辨率大小,选项有[1024*1024, 720*1280, 1280*720]', 'required': True, - 'schema': { - 'type': 'string' - } + 'type': 'string' }] - model_revision = 'v1.0.0' - task = Tasks.text_to_image_synthesis - # def _remote_parse_input(self, *args, **kwargs): - # params = { - # 'input': { - # 'text': kwargs['text'], - # 'resolution': kwargs['resolution'] - # } - # } - # if kwargs.get('seed', None): - # params['input']['seed'] = kwargs['seed'] - # return params + def call(self, params: str, **kwargs) -> str: + params = self._verify_args(params) + if isinstance(params, str): + return 'Parameter Error' - def _remote_call(self, *args, **kwargs): - - if ('resolution' in kwargs) and (kwargs['resolution'] in [ + if params['resolution'] in [ '1024*1024', '720*1280', '1280*720' - ]): - resolution = kwargs['resolution'] + ]: + resolution = params['resolution'] else: resolution = '1280*720' - prompt = kwargs['text'] - seed = kwargs.get('seed', None) + prompt = params['text'] if prompt is None: return None - dashscope.api_key = os.getenv('DASHSCOPE_API_KEY') + seed = kwargs.get('seed', None) model = kwargs.get('model', 'wanx-v1') + dashscope.api_key = os.getenv('DASHSCOPE_API_KEY') + response = ImageSynthesis.call( model=model, prompt=prompt, @@ -67,49 +52,6 @@ def _remote_call(self, *args, **kwargs): size=resolution, steps=10, seed=seed) - final_result = self._parse_output(response, remote=True) - return final_result - - def _local_parse_input(self, *args, **kwargs): - - text = kwargs.pop('text', '') - - parsed_args = ({'text': text}, ) + image_url = response.output['results'][0]['url'] + return image_url - return parsed_args, {} - - def _parse_output(self, origin_result, remote=True): - if not remote: - image = cv2.cvtColor(origin_result['output_imgs'][0], - cv2.COLOR_BGR2RGB) - else: - image = origin_result.output['results'][0]['url'] - - return {'result': ImageWrapper(image)} - - def _handle_input_fallback(self, **kwargs): - """ - an alternative method is to parse image is that get item between { and } - for last try - - :param fallback_text: - :return: language, cocde - """ - - text = kwargs.get('text', None) - fallback = kwargs.get('fallback', None) - - if text: - return text - elif fallback: - try: - text = fallback - json_block = re.search(r'\{([\s\S]+)\}', text) # noqa W^05 - if json_block: - result = json_block.group(1) - result_json = json.loads('{' + result + '}') - return result_json['text'] - except ValueError: - return text - else: - return text diff --git a/tests/tools/test_image_gen.py b/tests/tools/test_image_gen.py new file mode 100644 index 00000000..9c68b20a --- /dev/null +++ b/tests/tools/test_image_gen.py @@ -0,0 +1,38 @@ +from modelscope_agent.tools.text_to_image_tool import TextToImageTool +from modelscope_agent.prompts.role_play import RolePlay # NOQA +from modelscope_agent.agent import Agent + + +def test_image_gen(): + params = """{'text': '画一只小猫', 'resolution': '1024*1024'}""" + + t2i = TextToImageTool() + res = t2i.call(params) + assert(res.startswith("http")) + + +def test_image_gen_wrong_resolution(): + params = """{'text': '画一只小猫', 'resolution': '1024'}""" + + t2i = TextToImageTool() + res = t2i.call(params) + assert(res.startswith("http")) + +def test_image_gen_role(): + role_template = '你扮演一个画家,用尽可能丰富的描述调用工具绘制图像。' + + llm_config = {'model': 'qwen-max', 'model_server': 'dashscope'} + + # input tool args + function_list = [{'name': 'image_gen'}] + + bot = RolePlay( + function_list=function_list, llm=llm_config, instruction=role_template) + + response = bot.run('朝阳区天气怎样?') + + text = '' + for chunk in response: + text += chunk + print(text) + diff --git a/tests/utils.py b/tests/utils.py index 13c1ccf5..b45d1b19 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,10 +1,8 @@ -from modelscope_agent.action_parser import ActionParser -from modelscope_agent.llm import LLM -from modelscope_agent.prompts import PromptGenerator -from modelscope_agent.tools import Tool +from agent_scope.llm import BaseChatModel +from agent_scope.tools import Tool -class MockLLM(LLM): +class MockLLM(BaseChatModel): def __init__(self, responses=['mock_llm_response']): super().__init__({}) @@ -21,28 +19,6 @@ def stream_generate(self, prompt: str, function_list=[], **kwargs) -> str: yield 'mock llm response' -class MockPromptGenerator(PromptGenerator): - - def __init__(self): - super().__init__() - - -class MockOutParser(ActionParser): - - def __init__(self, action, args, count=1): - super().__init__() - self.action = action - self.args = args - self.count = count - - def parse_response(self, response: str): - if self.count > 0: - self.count -= 1 - return self.action, self.args - else: - return None, None - - class MockTool(Tool): def __init__(self, name, func, description, parameters=[]): From fd6537e9e1341366e87211886f265479f1f587a6 Mon Sep 17 00:00:00 2001 From: "suluyan.sly" Date: Thu, 4 Jan 2024 22:44:46 +0800 Subject: [PATCH 2/3] fix pre-commit --- modelscope_agent/tools/text_to_image_tool.py | 6 +----- tests/tools/test_image_gen.py | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/modelscope_agent/tools/text_to_image_tool.py b/modelscope_agent/tools/text_to_image_tool.py index c50162a6..f17b3fc6 100644 --- a/modelscope_agent/tools/text_to_image_tool.py +++ b/modelscope_agent/tools/text_to_image_tool.py @@ -5,7 +5,6 @@ import dashscope import json from dashscope import ImageSynthesis - from modelscope_agent.tools.base import BaseTool, register_tool @@ -31,9 +30,7 @@ def call(self, params: str, **kwargs) -> str: if isinstance(params, str): return 'Parameter Error' - if params['resolution'] in [ - '1024*1024', '720*1280', '1280*720' - ]: + if params['resolution'] in ['1024*1024', '720*1280', '1280*720']: resolution = params['resolution'] else: resolution = '1280*720' @@ -54,4 +51,3 @@ def call(self, params: str, **kwargs) -> str: seed=seed) image_url = response.output['results'][0]['url'] return image_url - diff --git a/tests/tools/test_image_gen.py b/tests/tools/test_image_gen.py index 9c68b20a..b76b401a 100644 --- a/tests/tools/test_image_gen.py +++ b/tests/tools/test_image_gen.py @@ -1,6 +1,7 @@ +from modelscope_agent.agent import Agent from modelscope_agent.tools.text_to_image_tool import TextToImageTool + from modelscope_agent.prompts.role_play import RolePlay # NOQA -from modelscope_agent.agent import Agent def test_image_gen(): @@ -8,7 +9,7 @@ def test_image_gen(): t2i = TextToImageTool() res = t2i.call(params) - assert(res.startswith("http")) + assert (res.startswith('http')) def test_image_gen_wrong_resolution(): @@ -16,23 +17,23 @@ def test_image_gen_wrong_resolution(): t2i = TextToImageTool() res = t2i.call(params) - assert(res.startswith("http")) + assert (res.startswith('http')) + def test_image_gen_role(): role_template = '你扮演一个画家,用尽可能丰富的描述调用工具绘制图像。' - + llm_config = {'model': 'qwen-max', 'model_server': 'dashscope'} - + # input tool args function_list = [{'name': 'image_gen'}] - + bot = RolePlay( function_list=function_list, llm=llm_config, instruction=role_template) - + response = bot.run('朝阳区天气怎样?') - + text = '' for chunk in response: text += chunk print(text) - From 4689856d6e34d007286cd0491017d356193be285 Mon Sep 17 00:00:00 2001 From: "suluyan.sly" Date: Fri, 5 Jan 2024 09:17:54 +0800 Subject: [PATCH 3/3] revert modelscope_agent/agent.py --- modelscope_agent/agent.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelscope_agent/agent.py b/modelscope_agent/agent.py index ed538606..7aec9845 100644 --- a/modelscope_agent/agent.py +++ b/modelscope_agent/agent.py @@ -61,7 +61,7 @@ def run(self, *args, **kwargs) -> Union[str, Iterator[str]]: @abstractmethod def _run(self, *args, **kwargs) -> Union[str, Iterator[str]]: - pass + raise NotImplementedError def _call_llm(self, prompt: Optional[str] = None,