diff --git a/generate/__init__.py b/generate/__init__.py index 462e45d..0a18270 100644 --- a/generate/__init__.py +++ b/generate/__init__.py @@ -19,6 +19,8 @@ HunyuanChatParameters, MinimaxChat, MinimaxChatParameters, + MinimaxLegacyChat, + MinimaxLegacyChatParameters, MinimaxProChat, MinimaxProChatParameters, MoonshotChat, @@ -93,10 +95,12 @@ 'AzureChat', 'OpenAIChat', 'OpenAIChatParameters', - 'MinimaxProChat', - 'MinimaxProChatParameters', 'MinimaxChat', 'MinimaxChatParameters', + 'MinimaxProChat', + 'MinimaxProChatParameters', + 'MinimaxLegacyChat', + 'MinimaxLegacyChatParameters', 'ZhipuChat', 'ZhipuChatParameters', 'ZhipuCharacterChat', diff --git a/generate/chat_completion/__init__.py b/generate/chat_completion/__init__.py index ba1cd6e..c179414 100644 --- a/generate/chat_completion/__init__.py +++ b/generate/chat_completion/__init__.py @@ -32,6 +32,8 @@ HunyuanChatParameters, MinimaxChat, MinimaxChatParameters, + MinimaxLegacyChat, + MinimaxLegacyChatParameters, MinimaxProChat, MinimaxProChatParameters, MoonshotChat, @@ -55,8 +57,9 @@ (AzureChat, OpenAIChatParameters), (AnthropicChat, AnthropicChatParameters), (OpenAIChat, OpenAIChatParameters), + (MinimaxChat, MinimaxChatParameters), (MinimaxProChat, MinimaxProChatParameters), - (MinimaxChat, MinimaxProChatParameters), + (MinimaxLegacyChat, MinimaxProChatParameters), (ZhipuChat, ZhipuChatParameters), (ZhipuCharacterChat, ZhipuCharacterChatParameters), (WenxinChat, WenxinChatParameters), @@ -82,10 +85,12 @@ 'ChatCompletionStreamOutput', 'ModelParameters', 'AzureChat', - 'MinimaxProChat', - 'MinimaxProChatParameters', 'MinimaxChat', 'MinimaxChatParameters', + 'MinimaxProChat', + 'MinimaxProChatParameters', + 'MinimaxLegacyChat', + 'MinimaxLegacyChatParameters', 'OpenAIChat', 'OpenAIChatParameters', 'ZhipuChat', diff --git a/generate/chat_completion/base.py b/generate/chat_completion/base.py index 94488db..812f732 100644 --- a/generate/chat_completion/base.py +++ b/generate/chat_completion/base.py @@ -69,7 +69,7 @@ def hook(self, **kwargs: Unpack['HookModelKwargs']) -> 'HookChatCompletionModel' class RemoteChatCompletionModel(ChatCompletionModel, ABC): settings: PlatformSettings http_client: HttpClient - avaliable_models: ClassVar[List[str]] = [] + available_models: ClassVar[List[str]] = [] def __init__( self, diff --git a/generate/chat_completion/cost_caculator.py b/generate/chat_completion/cost_caculator.py index 6f04fe5..d7e0c58 100644 --- a/generate/chat_completion/cost_caculator.py +++ b/generate/chat_completion/cost_caculator.py @@ -7,6 +7,11 @@ 'moonshot-v1-32k': (0.024, 0.024), 'moonshot-v1-128k': (0.06, 0.06), }, + 'minimax': { + 'abab5.5-chat': (0.015, 0.015), + 'abab5.5s-chat': (0.005, 0.005), + 'abab6-chat': (0.1, 0.1), + }, } diff --git a/generate/chat_completion/models/__init__.py b/generate/chat_completion/models/__init__.py index 50c7b0f..246e6f1 100644 --- a/generate/chat_completion/models/__init__.py +++ b/generate/chat_completion/models/__init__.py @@ -16,6 +16,7 @@ from generate.chat_completion.models.deepseek import DeepSeekChat, DeepSeekChatParameters from generate.chat_completion.models.hunyuan import HunyuanChat, HunyuanChatParameters from generate.chat_completion.models.minimax import MinimaxChat, MinimaxChatParameters +from generate.chat_completion.models.minimax_legacy import MinimaxLegacyChat, MinimaxLegacyChatParameters from generate.chat_completion.models.minimax_pro import MinimaxProChat, MinimaxProChatParameters from generate.chat_completion.models.moonshot import MoonshotChat, MoonshotChatParameters from generate.chat_completion.models.openai import OpenAIChat, OpenAIChatParameters @@ -38,10 +39,12 @@ 'BailianChatParameters', 'HunyuanChat', 'HunyuanChatParameters', - 'MinimaxChat', - 'MinimaxChatParameters', + 'MinimaxLegacyChat', + 'MinimaxLegacyChatParameters', 'MinimaxProChat', 'MinimaxProChatParameters', + 'MinimaxChat', + 'MinimaxChatParameters', 'OpenAIChat', 'OpenAIChatParameters', 'WenxinChat', diff --git a/generate/chat_completion/models/anthropic.py b/generate/chat_completion/models/anthropic.py index db37ba0..b5d623e 100644 --- a/generate/chat_completion/models/anthropic.py +++ b/generate/chat_completion/models/anthropic.py @@ -56,7 +56,7 @@ class AnthropicParametersDict(RemoteModelParametersDict, total=False): class AnthropicChat(RemoteChatCompletionModel): model_type: ClassVar[str] = 'anthropic' - avaliable_models: ClassVar[List[str]] = ['claude-2.1', 'claude-2.0', 'claude-instant-1.2'] + available_models: ClassVar[List[str]] = ['claude-2.1', 'claude-2.0', 'claude-instant-1.2'] parameters: AnthropicChatParameters settings: AnthropicSettings diff --git a/generate/chat_completion/models/baichuan.py b/generate/chat_completion/models/baichuan.py index 7e5c149..d2d15ee 100644 --- a/generate/chat_completion/models/baichuan.py +++ b/generate/chat_completion/models/baichuan.py @@ -52,7 +52,7 @@ class BaichuanChatParametersDict(RemoteModelParametersDict, total=False): class BaichuanChat(RemoteChatCompletionModel): model_type: ClassVar[str] = 'baichuan' - avaliable_models: ClassVar[List[str]] = ['Baichuan2-Turbo', 'Baichuan2-53B', 'Baichuan2-Turbo-192k'] + available_models: ClassVar[List[str]] = ['Baichuan2-Turbo', 'Baichuan2-53B', 'Baichuan2-Turbo-192k'] parameters: BaichuanChatParameters settings: BaichuanSettings diff --git a/generate/chat_completion/models/dashscope.py b/generate/chat_completion/models/dashscope.py index 23ae8b0..43971d0 100644 --- a/generate/chat_completion/models/dashscope.py +++ b/generate/chat_completion/models/dashscope.py @@ -57,7 +57,7 @@ class DashScopeChatParametersDict(RemoteModelParametersDict, total=False): class DashScopeChat(RemoteChatCompletionModel): model_type: ClassVar[str] = 'dashscope' - avaliable_models: ClassVar[List[str]] = ['qwen-turbo', 'qwen-plus', 'qwen-max', 'qwen-max-1201', 'qwen-max-longcontext'] + available_models: ClassVar[List[str]] = ['qwen-turbo', 'qwen-plus', 'qwen-max', 'qwen-max-1201', 'qwen-max-longcontext'] parameters: DashScopeChatParameters settings: DashScopeSettings diff --git a/generate/chat_completion/models/dashscope_multimodal.py b/generate/chat_completion/models/dashscope_multimodal.py index 1bcfc99..327c6b1 100644 --- a/generate/chat_completion/models/dashscope_multimodal.py +++ b/generate/chat_completion/models/dashscope_multimodal.py @@ -54,7 +54,7 @@ class DashScopeMultiModalMessage(TypedDict): class DashScopeMultiModalChat(RemoteChatCompletionModel): model_type: ClassVar[str] = 'dashscope_multimodal' - avaliable_models: ClassVar[List[str]] = ['qwen-vl-max', 'qwen-vl-plus'] + available_models: ClassVar[List[str]] = ['qwen-vl-max', 'qwen-vl-plus'] parameters: DashScopeMultiModalChatParameters settings: DashScopeSettings diff --git a/generate/chat_completion/models/deepseek.py b/generate/chat_completion/models/deepseek.py index 34c9b1c..250b325 100644 --- a/generate/chat_completion/models/deepseek.py +++ b/generate/chat_completion/models/deepseek.py @@ -34,7 +34,7 @@ class DeepSeekParametersDict(RemoteModelParametersDict, total=False): class DeepSeekChat(OpenAILikeChat): model_type: ClassVar[str] = 'deepseek' - avaliable_models: ClassVar[List[str]] = ['deepseek-chat', 'deepseek-coder'] + available_models: ClassVar[List[str]] = ['deepseek-chat', 'deepseek-coder'] parameters: DeepSeekChatParameters settings: DeepSeekSettings @@ -68,5 +68,5 @@ def stream_generate(self, prompt: Prompt, **kwargs: Unpack[DeepSeekParametersDic async def async_stream_generate( self, prompt: Prompt, **kwargs: Unpack[DeepSeekParametersDict] ) -> AsyncIterator[ChatCompletionStreamOutput]: - async for i in super().async_stream_generate(prompt, **kwargs): - yield i + async for stream_output in super().async_stream_generate(prompt, **kwargs): + yield stream_output diff --git a/generate/chat_completion/models/minimax.py b/generate/chat_completion/models/minimax.py index 7d72151..39eafc4 100644 --- a/generate/chat_completion/models/minimax.py +++ b/generate/chat_completion/models/minimax.py @@ -1,102 +1,54 @@ from __future__ import annotations import json -from typing import Any, AsyncIterator, ClassVar, Iterator, List, Literal, Optional +import uuid +from typing import Any, AsyncIterator, ClassVar, Dict, Iterator, List, Optional -from pydantic import Field, PositiveInt -from typing_extensions import Annotated, TypedDict, Unpack, override +from pydantic import PositiveInt, field_validator +from typing_extensions import Unpack, override -from generate.chat_completion.base import RemoteChatCompletionModel from generate.chat_completion.message import ( - AssistantMessage, - Message, - Messages, - MessageTypeError, Prompt, - SystemMessage, - UserMessage, - ensure_messages, ) +from generate.chat_completion.message.core import AssistantMessage, FunctionMessage, Messages, ToolCall, ToolMessage from generate.chat_completion.model_output import ChatCompletionOutput, ChatCompletionStreamOutput -from generate.chat_completion.stream_manager import StreamManager +from generate.chat_completion.models.openai_like import OpenAILikeChat, OpenAIMessage, OpenAITool from generate.http import ( HttpClient, HttpxPostKwargs, - ResponseValue, - UnexpectedResponseError, ) from generate.model import ModelParameters, RemoteModelParametersDict from generate.platforms.minimax import MinimaxSettings from generate.types import Probability, Temperature -class MinimaxMessage(TypedDict): - sender_type: Literal['USER', 'BOT'] - text: str - - -class RoleMeta(TypedDict): - user_name: str - bot_name: str - - -DEFAULT_MINIMAX_SYSTEM_PROMPT = 'MM智能助理是一款由MiniMax自研的,没有调用其他产品的接口的大型语言模型。MiniMax是一家中国科技公司,一直致力于进行大模型相关的研究。' - - class MinimaxChatParameters(ModelParameters): - system_prompt: str = Field(default=DEFAULT_MINIMAX_SYSTEM_PROMPT, serialization_alias='prompt') - role_meta: RoleMeta = {'user_name': '用户', 'bot_name': 'MM智能助理'} - beam_width: Optional[Annotated[int, Field(ge=1, le=4)]] = None temperature: Optional[Temperature] = None top_p: Optional[Probability] = None - max_tokens: Optional[Annotated[PositiveInt, Field(serialization_alias='tokens_to_generate')]] = None - skip_info_mask: Optional[bool] = None - continue_last_message: Optional[bool] = None + max_tokens: Optional[PositiveInt] = None + tool_choice: Optional[str] = None + tools: Optional[List[OpenAITool]] = None - def custom_model_dump(self) -> dict[str, Any]: - output = super().custom_model_dump() - if 'temperature' in output: - output['temperature'] = max(0.01, output['temperature']) - if 'top_p' in output: - output['top_p'] = max(0.01, output['top_p']) - return output + @field_validator('temperature', 'top_p') + @classmethod + def can_not_equal_zero(cls, value: Optional[Temperature]) -> Optional[Temperature]: + if value == 0: + return 0.01 + return value class MinimaxChatParametersDict(RemoteModelParametersDict, total=False): - system_prompt: str - role_meta: RoleMeta - beam_width: Optional[int] temperature: Optional[Temperature] top_p: Optional[Probability] - max_tokens: Optional[int] - skip_info_mask: Optional[bool] - continue_last_message: Optional[bool] + max_tokens: Optional[PositiveInt] + tool_choice: Optional[str] + tools: Optional[List[OpenAITool]] -def _convert_message_to_minimax_message(message: Message) -> MinimaxMessage: - if isinstance(message, UserMessage): - return { - 'sender_type': 'USER', - 'text': message.content, - } - if isinstance(message, AssistantMessage): - return { - 'sender_type': 'BOT', - 'text': message.content, - } - raise MessageTypeError(message, (UserMessage, AssistantMessage)) - - -def _convert_messages(messages: Messages) -> list[MinimaxMessage]: - if isinstance(system_message := messages[0], SystemMessage): - prepend_messages = [UserMessage(content=system_message.content), AssistantMessage(content='好的')] - messages = prepend_messages + messages[1:] - return [_convert_message_to_minimax_message(message) for message in messages] - - -class MinimaxChat(RemoteChatCompletionModel): +class MinimaxChat(OpenAILikeChat): model_type: ClassVar[str] = 'minimax' - avaliable_models: ClassVar[List[str]] = ['abab5.5-chat', 'abab5.5s-chat'] + available_models: ClassVar[List[str]] = ['abab5.5-chat', 'abab5.5s-chat', 'abab6-chat'] + CHAT_COMPLETION_ENDPOINT: ClassVar[str] = '/text/chatcompletion_v2' parameters: MinimaxChatParameters settings: MinimaxSettings @@ -104,8 +56,8 @@ class MinimaxChat(RemoteChatCompletionModel): def __init__( self, model: str = 'abab5.5-chat', - settings: MinimaxSettings | None = None, parameters: MinimaxChatParameters | None = None, + settings: MinimaxSettings | None = None, http_client: HttpClient | None = None, ) -> None: parameters = parameters or MinimaxChatParameters() @@ -113,6 +65,52 @@ def __init__( http_client = http_client or HttpClient() super().__init__(model=model, parameters=parameters, settings=settings, http_client=http_client) + @override + def _get_request_parameters(self, prompt: Prompt, stream: bool = False, **kwargs: Any) -> HttpxPostKwargs: + http_kwargs = super()._get_request_parameters(prompt, stream, **kwargs) + http_kwargs['url'] = self.settings.api_base + self.CHAT_COMPLETION_ENDPOINT + if 'tools' in http_kwargs['json']: + # Serialize jsonschema dict to JSON string for Minimax compatibility + for tool in http_kwargs['json']['tools']: + if 'function' in tool: + tool['function']['parameters'] = json.dumps(tool['function']['parameters']) + if http_kwargs['json'].get('tool_choice', None): + http_kwargs['json']['tool_choice'] = 'auto' + return http_kwargs + + @override + def _determine_finish_reason(self, response: Dict[str, Any]) -> str | None: + choice = response['choices'][0] + if 'finish_reason' in choice and 'delta' not in choice: + return choice['finish_reason'] + return None + + @override + def _convert_to_openai_messages(self, messages: Messages) -> List[OpenAIMessage]: + converted_messages = [] + temp_tool_call_id = self.generate_tool_call_id() + for message in messages: + # Convert FunctionMessage to ToolMessage with self-generated tool_call_id + if isinstance(message, AssistantMessage): + if message.function_call is not None: + tool_call = ToolCall( + id=temp_tool_call_id, + function=message.function_call, + ) + message.tool_calls = [tool_call] + message.function_call = None + elif isinstance(message, FunctionMessage): + tool_message = ToolMessage( + name=message.name, + content=message.content, + tool_call_id=temp_tool_call_id, + ) + temp_tool_call_id = self.generate_tool_call_id() + converted_messages.append(tool_message) + continue + converted_messages.append(message.model_copy(deep=True)) + return super()._convert_to_openai_messages(converted_messages) + @override def generate(self, prompt: Prompt, **kwargs: Unpack[MinimaxChatParametersDict]) -> ChatCompletionOutput: return super().generate(prompt, **kwargs) @@ -131,74 +129,5 @@ def stream_generate( async def async_stream_generate( self, prompt: Prompt, **kwargs: Unpack[MinimaxChatParametersDict] ) -> AsyncIterator[ChatCompletionStreamOutput]: - async for output in super().async_stream_generate(prompt, **kwargs): - yield output - - @override - def _get_request_parameters( - self, prompt: Prompt, stream: bool = False, **kwargs: Unpack[MinimaxChatParametersDict] - ) -> HttpxPostKwargs: - messages = ensure_messages(prompt) - parameters = self.parameters.clone_with_changes(**kwargs) - minimax_messages = _convert_messages(messages) - parameters_dict = parameters.custom_model_dump() - json_data = { - 'model': self.model, - 'messages': minimax_messages, - **parameters_dict, - } - if stream: - json_data['stream'] = True - json_data['use_standard_sse'] = True - - headers = { - 'Authorization': f'Bearer {self.settings.api_key.get_secret_value()}', - 'Content-Type': 'application/json', - } - return { - 'url': self.settings.api_base + 'text/chatcompletion', - 'json': json_data, - 'headers': headers, - 'params': {'GroupId': self.settings.group_id}, - } - - @override - def _process_reponse(self, response: ResponseValue) -> ChatCompletionOutput: - try: - return ChatCompletionOutput( - model_info=self.model_info, - message=AssistantMessage(content=response['choices'][0]['text']), - finish_reason=response['choices'][0]['finish_reason'], - cost=self._calculate_cost(response['usage']), - extra={ - 'logprobes': response['choices'][0]['logprobes'], - 'input_sensitive': False, - 'output_sensitive': False, - 'usage': response['usage'], - }, - ) - except (KeyError, IndexError, TypeError) as e: - raise UnexpectedResponseError(response) from e - - @override - def _process_stream_line(self, line: str, stream_manager: StreamManager) -> ChatCompletionStreamOutput | None: - try: - data = json.loads(line) - except json.JSONDecodeError: - return None - stream_manager.delta = data['choices'][0]['delta'] - - if data['reply']: - stream_manager.finish_reason = data['choices'][0]['finish_reason'] - extra = { - 'logprobes': data['choices'][0]['logprobes'], - 'input_sensitive': False, - 'output_sensitive': False, - 'usage': data['usage'], - } - stream_manager.extra.update(extra) - stream_manager.cost = self._calculate_cost(data['usage']) - return stream_manager.build_stream_output() - - def _calculate_cost(self, usage: dict[str, int]) -> float: - return 0.015 * (usage['total_tokens'] / 1000) + async for stream_output in super().async_stream_generate(prompt, **kwargs): + yield stream_output diff --git a/generate/chat_completion/models/minimax_legacy.py b/generate/chat_completion/models/minimax_legacy.py new file mode 100644 index 0000000..47cc88f --- /dev/null +++ b/generate/chat_completion/models/minimax_legacy.py @@ -0,0 +1,208 @@ +from __future__ import annotations + +import json +from typing import Any, AsyncIterator, ClassVar, Iterator, List, Literal, Optional + +from pydantic import Field, PositiveInt +from typing_extensions import Annotated, TypedDict, Unpack, override + +from generate.chat_completion.base import RemoteChatCompletionModel +from generate.chat_completion.message import ( + AssistantMessage, + Message, + Messages, + MessageTypeError, + Prompt, + SystemMessage, + UserMessage, + ensure_messages, +) +from generate.chat_completion.model_output import ChatCompletionOutput, ChatCompletionStreamOutput +from generate.chat_completion.stream_manager import StreamManager +from generate.http import ( + HttpClient, + HttpxPostKwargs, + ResponseValue, + UnexpectedResponseError, +) +from generate.model import ModelParameters, RemoteModelParametersDict +from generate.platforms.minimax import MinimaxSettings +from generate.types import Probability, Temperature + + +class MinimaxMessage(TypedDict): + sender_type: Literal['USER', 'BOT'] + text: str + + +class RoleMeta(TypedDict): + user_name: str + bot_name: str + + +DEFAULT_MINIMAX_SYSTEM_PROMPT = 'MM智能助理是一款由MiniMax自研的,没有调用其他产品的接口的大型语言模型。MiniMax是一家中国科技公司,一直致力于进行大模型相关的研究。' + + +class MinimaxLegacyChatParameters(ModelParameters): + system_prompt: str = Field(default=DEFAULT_MINIMAX_SYSTEM_PROMPT, serialization_alias='prompt') + role_meta: RoleMeta = {'user_name': '用户', 'bot_name': 'MM智能助理'} + beam_width: Optional[Annotated[int, Field(ge=1, le=4)]] = None + temperature: Optional[Temperature] = None + top_p: Optional[Probability] = None + max_tokens: Optional[Annotated[PositiveInt, Field(serialization_alias='tokens_to_generate')]] = None + skip_info_mask: Optional[bool] = None + continue_last_message: Optional[bool] = None + + def custom_model_dump(self) -> dict[str, Any]: + output = super().custom_model_dump() + if 'temperature' in output: + output['temperature'] = max(0.01, output['temperature']) + if 'top_p' in output: + output['top_p'] = max(0.01, output['top_p']) + return output + + +class MinimaxLegacyChatParametersDict(RemoteModelParametersDict, total=False): + system_prompt: str + role_meta: RoleMeta + beam_width: Optional[int] + temperature: Optional[Temperature] + top_p: Optional[Probability] + max_tokens: Optional[int] + skip_info_mask: Optional[bool] + continue_last_message: Optional[bool] + + +def _convert_message_to_minimax_message(message: Message) -> MinimaxMessage: + if isinstance(message, UserMessage): + return { + 'sender_type': 'USER', + 'text': message.content, + } + if isinstance(message, AssistantMessage): + return { + 'sender_type': 'BOT', + 'text': message.content, + } + raise MessageTypeError(message, (UserMessage, AssistantMessage)) + + +def _convert_messages(messages: Messages) -> list[MinimaxMessage]: + if isinstance(system_message := messages[0], SystemMessage): + prepend_messages = [UserMessage(content=system_message.content), AssistantMessage(content='好的')] + messages = prepend_messages + messages[1:] + return [_convert_message_to_minimax_message(message) for message in messages] + + +class MinimaxLegacyChat(RemoteChatCompletionModel): + model_type: ClassVar[str] = 'minimax_legacy' + available_models: ClassVar[List[str]] = ['abab5.5-chat', 'abab5.5s-chat'] + + parameters: MinimaxLegacyChatParameters + settings: MinimaxSettings + + def __init__( + self, + model: str = 'abab5.5-chat', + settings: MinimaxSettings | None = None, + parameters: MinimaxLegacyChatParameters | None = None, + http_client: HttpClient | None = None, + ) -> None: + parameters = parameters or MinimaxLegacyChatParameters() + settings = settings or MinimaxSettings() # type: ignore + http_client = http_client or HttpClient() + if not settings.group_id: + raise ValueError( + 'group_id is required for MinimaxLegacyChat, you can set it in settings or environment variable MINIMAX_GROUP_ID' + ) + super().__init__(model=model, parameters=parameters, settings=settings, http_client=http_client) + + @override + def generate(self, prompt: Prompt, **kwargs: Unpack[MinimaxLegacyChatParametersDict]) -> ChatCompletionOutput: + return super().generate(prompt, **kwargs) + + @override + async def async_generate(self, prompt: Prompt, **kwargs: Unpack[MinimaxLegacyChatParametersDict]) -> ChatCompletionOutput: + return await super().async_generate(prompt, **kwargs) + + @override + def stream_generate( + self, prompt: Prompt, **kwargs: Unpack[MinimaxLegacyChatParametersDict] + ) -> Iterator[ChatCompletionStreamOutput]: + yield from super().stream_generate(prompt, **kwargs) + + @override + async def async_stream_generate( + self, prompt: Prompt, **kwargs: Unpack[MinimaxLegacyChatParametersDict] + ) -> AsyncIterator[ChatCompletionStreamOutput]: + async for output in super().async_stream_generate(prompt, **kwargs): + yield output + + @override + def _get_request_parameters( + self, prompt: Prompt, stream: bool = False, **kwargs: Unpack[MinimaxLegacyChatParametersDict] + ) -> HttpxPostKwargs: + messages = ensure_messages(prompt) + parameters = self.parameters.clone_with_changes(**kwargs) + minimax_messages = _convert_messages(messages) + parameters_dict = parameters.custom_model_dump() + json_data = { + 'model': self.model, + 'messages': minimax_messages, + **parameters_dict, + } + if stream: + json_data['stream'] = True + json_data['use_standard_sse'] = True + + headers = { + 'Authorization': f'Bearer {self.settings.api_key.get_secret_value()}', + 'Content-Type': 'application/json', + } + return { + 'url': self.settings.api_base + '/text/chatcompletion', + 'json': json_data, + 'headers': headers, + 'params': {'GroupId': self.settings.group_id}, + } + + @override + def _process_reponse(self, response: ResponseValue) -> ChatCompletionOutput: + try: + return ChatCompletionOutput( + model_info=self.model_info, + message=AssistantMessage(content=response['choices'][0]['text']), + finish_reason=response['choices'][0]['finish_reason'], + cost=self._calculate_cost(response['usage']), + extra={ + 'logprobes': response['choices'][0]['logprobes'], + 'input_sensitive': False, + 'output_sensitive': False, + 'usage': response['usage'], + }, + ) + except (KeyError, IndexError, TypeError) as e: + raise UnexpectedResponseError(response) from e + + @override + def _process_stream_line(self, line: str, stream_manager: StreamManager) -> ChatCompletionStreamOutput | None: + try: + data = json.loads(line) + except json.JSONDecodeError: + return None + stream_manager.delta = data['choices'][0]['delta'] + + if data['reply']: + stream_manager.finish_reason = data['choices'][0]['finish_reason'] + extra = { + 'logprobes': data['choices'][0]['logprobes'], + 'input_sensitive': False, + 'output_sensitive': False, + 'usage': data['usage'], + } + stream_manager.extra.update(extra) + stream_manager.cost = self._calculate_cost(data['usage']) + return stream_manager.build_stream_output() + + def _calculate_cost(self, usage: dict[str, int]) -> float: + return 0.015 * (usage['total_tokens'] / 1000) diff --git a/generate/chat_completion/models/minimax_pro.py b/generate/chat_completion/models/minimax_pro.py index f9aa58d..c9145b4 100644 --- a/generate/chat_completion/models/minimax_pro.py +++ b/generate/chat_completion/models/minimax_pro.py @@ -259,7 +259,7 @@ def minimax_calculate_cost(model_name: str, usage: dict[str, int], num_web_searc class MinimaxProChat(RemoteChatCompletionModel, ToolCallMixin): model_type: ClassVar[str] = 'minimax_pro' - avaliable_models: ClassVar[List[str]] = ['abab5.5-chat', 'abab5.5s-chat', 'abab6-chat'] + available_models: ClassVar[List[str]] = ['abab5.5-chat', 'abab5.5s-chat', 'abab6-chat'] parameters: MinimaxProChatParameters settings: MinimaxSettings @@ -274,6 +274,10 @@ def __init__( parameters = parameters or MinimaxProChatParameters() settings = settings or MinimaxSettings() # type: ignore http_client = http_client or HttpClient() + if not settings.group_id: + raise ValueError( + 'group_id is required for MinimaxProChat, you can set it in settings or environment variable MINIMAX_GROUP_ID' + ) super().__init__(model=model, parameters=parameters, settings=settings, http_client=http_client) self.default_user_name = '用户' @@ -360,7 +364,7 @@ def _get_request_parameters( 'Content-Type': 'application/json', } return { - 'url': self.settings.api_base + 'text/chatcompletion_pro', + 'url': self.settings.api_base + '/text/chatcompletion_pro', 'json': json_data, 'headers': headers, 'params': {'GroupId': self.settings.group_id}, diff --git a/generate/chat_completion/models/moonshot.py b/generate/chat_completion/models/moonshot.py index 3b8647f..16cfe2c 100644 --- a/generate/chat_completion/models/moonshot.py +++ b/generate/chat_completion/models/moonshot.py @@ -28,7 +28,7 @@ class MoonshotParametersDict(RemoteModelParametersDict, total=False): class MoonshotChat(OpenAILikeChat): model_type: ClassVar[str] = 'moonshot' - avaliable_models: ClassVar[List[str]] = ['moonshot-v1-8k', 'moonshot-v1-32k', 'moonshot-v1-128k'] + available_models: ClassVar[List[str]] = ['moonshot-v1-8k', 'moonshot-v1-32k', 'moonshot-v1-128k'] parameters: MoonshotChatParameters settings: MoonshotSettings @@ -62,5 +62,5 @@ def stream_generate(self, prompt: Prompt, **kwargs: Unpack[MoonshotParametersDic async def async_stream_generate( self, prompt: Prompt, **kwargs: Unpack[MoonshotParametersDict] ) -> AsyncIterator[ChatCompletionStreamOutput]: - async for i in super().async_stream_generate(prompt, **kwargs): - yield i + async for stream_output in super().async_stream_generate(prompt, **kwargs): + yield stream_output diff --git a/generate/chat_completion/models/openai.py b/generate/chat_completion/models/openai.py index 5489cdc..a6088f9 100644 --- a/generate/chat_completion/models/openai.py +++ b/generate/chat_completion/models/openai.py @@ -61,7 +61,7 @@ class OpenAIChatParametersDict(RemoteModelParametersDict, total=False): class OpenAIChat(OpenAILikeChat, ToolCallMixin): model_type: ClassVar[str] = 'openai' - avaliable_models: ClassVar[List[str]] = [ + available_models: ClassVar[List[str]] = [ 'gpt-4-turbo-preview', 'gpt-3.5-turbo', 'gpt-4-vision-preview', @@ -100,8 +100,8 @@ def stream_generate( async def async_stream_generate( self, prompt: Prompt, **kwargs: Unpack[OpenAIChatParametersDict] ) -> AsyncIterator[ChatCompletionStreamOutput]: - async for i in super().async_stream_generate(prompt, **kwargs): - yield i + async for stream_output in super().async_stream_generate(prompt, **kwargs): + yield stream_output @override def add_tools(self, tools: OrIterable[Tool]) -> None: diff --git a/generate/chat_completion/models/openai_like.py b/generate/chat_completion/models/openai_like.py index 3f8efcb..773ffff 100644 --- a/generate/chat_completion/models/openai_like.py +++ b/generate/chat_completion/models/openai_like.py @@ -7,6 +7,7 @@ from typing import Any, Callable, Dict, List, Literal, Type, Union, cast from typing_extensions import NotRequired, TypedDict, override +import uuid from generate.chat_completion.base import RemoteChatCompletionModel from generate.chat_completion.cost_caculator import GeneralCostCalculator @@ -26,6 +27,7 @@ UserMultiPartMessage, ensure_messages, ) +from generate.chat_completion.message.core import Messages from generate.chat_completion.model_output import ChatCompletionOutput, ChatCompletionStreamOutput from generate.chat_completion.stream_manager import StreamManager from generate.chat_completion.tool import FunctionJsonSchema, Tool @@ -262,7 +264,7 @@ class OpenAILikeChat(RemoteChatCompletionModel, ABC): def _get_request_parameters(self, prompt: Prompt, stream: bool = False, **kwargs: Any) -> HttpxPostKwargs: messages = ensure_messages(prompt) parameters = self.parameters.clone_with_changes(**kwargs) - openai_messages = [convert_to_openai_message(message) for message in messages] + openai_messages = self._convert_to_openai_messages(messages) headers = { 'Authorization': f'Bearer {self.settings.api_key.get_secret_value()}', } @@ -280,6 +282,13 @@ def _get_request_parameters(self, prompt: Prompt, stream: bool = False, **kwargs 'json': params, } + def _convert_to_openai_messages(self, messages: Messages) -> List[OpenAIMessage]: + return [convert_to_openai_message(message) for message in messages] + + @staticmethod + def generate_tool_call_id() -> str: + return f'call_{uuid.uuid4()}' + @override def _process_reponse(self, response: Dict[str, Any]) -> ChatCompletionOutput: return process_openai_like_model_reponse(response, model_type=self.model_type) @@ -291,7 +300,7 @@ def _process_stream_line(self, line: str, stream_manager: StreamManager) -> Chat except json.JSONDecodeError: return None - delta_dict = data['choices'][0]['delta'] + delta_dict = data['choices'][0].get('delta', {}) self._update_delta(delta_dict, stream_manager=stream_manager) stream_manager.extra = self._extract_extra_info(data) stream_manager.cost = self._calculate_cost(data) @@ -347,11 +356,16 @@ def _calculate_cost(self, response: ResponseValue) -> float | None: ) cost_calculator = GeneralCostCalculator() + input_tokens = response['usage'].get('prompt_tokens', 0) + output_tokens = response['usage'].get('completion_tokens', 0) + if 'total_tokens' in response['usage']: + input_tokens = 0 + output_tokens = response['usage']['total_tokens'] return cost_calculator.calculate( model_type=self.model_type, model_name=response['model'], - input_tokens=response['usage']['prompt_tokens'], - output_tokens=response['usage']['completion_tokens'], + input_tokens=input_tokens, + output_tokens=output_tokens, ) def _determine_finish_reason(self, response: ResponseValue) -> str | None: diff --git a/generate/chat_completion/models/wenxin.py b/generate/chat_completion/models/wenxin.py index 95cf51e..e93e700 100644 --- a/generate/chat_completion/models/wenxin.py +++ b/generate/chat_completion/models/wenxin.py @@ -140,7 +140,7 @@ class WenxinChat(RemoteChatCompletionModel, ToolCallMixin): 'ERNIE-Bot-turbo': 'eb-instant', 'ERNIE-Bot-4': 'completions_pro', } - avaliable_models: ClassVar[List[str]] = ['ERNIE-Bot', 'ERNIE-Bot-turbo', 'ERNIE-Bot-4'] + available_models: ClassVar[List[str]] = ['ERNIE-Bot', 'ERNIE-Bot-turbo', 'ERNIE-Bot-4'] parameters: WenxinChatParameters settings: QianfanSettings diff --git a/generate/chat_completion/models/yi.py b/generate/chat_completion/models/yi.py index 5dbf763..145ebbf 100644 --- a/generate/chat_completion/models/yi.py +++ b/generate/chat_completion/models/yi.py @@ -25,7 +25,7 @@ class YiParametersDict(RemoteModelParametersDict, total=False): class YiChat(OpenAILikeChat): model_type: ClassVar[str] = 'yi' - avaliable_models: ClassVar[List[str]] = ['yi-34b-chat', 'Yi-34B-Chat-200K'] + available_models: ClassVar[List[str]] = ['yi-34b-chat', 'Yi-34B-Chat-200K'] parameters: YiChatParameters settings: YiSettings @@ -59,5 +59,5 @@ def stream_generate(self, prompt: Prompt, **kwargs: Unpack[YiParametersDict]) -> async def async_stream_generate( self, prompt: Prompt, **kwargs: Unpack[YiParametersDict] ) -> AsyncIterator[ChatCompletionStreamOutput]: - async for i in super().async_stream_generate(prompt, **kwargs): - yield i + async for stream_output in super().async_stream_generate(prompt, **kwargs): + yield stream_output diff --git a/generate/chat_completion/models/zhipu.py b/generate/chat_completion/models/zhipu.py index 9e8a958..0b07b6c 100644 --- a/generate/chat_completion/models/zhipu.py +++ b/generate/chat_completion/models/zhipu.py @@ -332,7 +332,7 @@ def determine_finish_reason(self, response: ResponseValue) -> str | None: class ZhipuChat(RemoteChatCompletionModel): model_type: ClassVar[str] = 'zhipu' - avaliable_models: ClassVar[List[str]] = ['glm-4', 'glm-3-turbo', 'glm-4v'] + available_models: ClassVar[List[str]] = ['glm-4', 'glm-3-turbo', 'glm-4v'] parameters: ZhipuChatParameters settings: ZhipuSettings @@ -455,7 +455,7 @@ class ZhipuCharacterChatParametersDict(RemoteModelParametersDict, total=False): class ZhipuCharacterChat(RemoteChatCompletionModel): model_type: ClassVar[str] = 'zhipu_character' - avaliable_models: ClassVar[List[str]] = ['charglm-3'] + available_models: ClassVar[List[str]] = ['charglm-3'] parameters: ZhipuCharacterChatParameters settings: ZhipuSettings diff --git a/generate/platforms/minimax.py b/generate/platforms/minimax.py index f909d66..985a7d7 100644 --- a/generate/platforms/minimax.py +++ b/generate/platforms/minimax.py @@ -1,13 +1,13 @@ from pydantic import SecretStr from pydantic_settings import SettingsConfigDict -from generate.platforms.base import PlatformSettings +from generate.platforms.openai_like import OpenAILikeSettings -class MinimaxSettings(PlatformSettings): +class MinimaxSettings(OpenAILikeSettings): model_config = SettingsConfigDict(extra='ignore', env_prefix='minimax_', env_file='.env') - group_id: str + group_id: str = '' api_key: SecretStr - api_base: str = 'https://api.minimax.chat/v1/' + api_base: str = 'https://api.minimax.chat/v1' platform_url: str = 'https://api.minimax.chat/document/introduction'