diff --git a/gptme/llm.py b/gptme/llm.py index 798a7080..aec718a6 100644 --- a/gptme/llm.py +++ b/gptme/llm.py @@ -166,7 +166,8 @@ def _stream_openai(messages: list[Message], model: str) -> Generator[str, None, top_p=top_p, stream=True, # the llama-cpp-python server needs this explicitly set, otherwise unreliable results - max_tokens=1000 if not model.startswith("gpt-") else None, + # TODO: make this better + max_tokens=1000 if not model.startswith("gpt-") else 4096, ): if not chunk.choices: # type: ignore # Got a chunk with no choices, Azure always sends one of these at the start diff --git a/gptme/message.py b/gptme/message.py index 64f87384..d3a23bdb 100644 --- a/gptme/message.py +++ b/gptme/message.py @@ -68,18 +68,24 @@ def __eq__(self, other): def to_dict(self, keys=None, anthropic=False) -> dict: """Return a dict representation of the message, serializable to JSON.""" content: str | list[dict | str] = self.content - - # if anthropic, make sure content is a list of dicts, to support multiple types of content - if anthropic: - content = [{"type": "text", "text": self.content}] - for f in self.files: - ext = f.suffix[1:] - if ext not in ["jpg", "jpeg", "png", "gif"]: - logger.warning("Unsupported file type: %s", ext) - continue - else: - logger.warning("Found image file: %s", f) - media_type = f"image/{ext}" + allowed_file_exts = ["jpg", "jpeg", "png", "gif"] + + content = [{"type": "text", "text": self.content}] + for f in self.files: + ext = f.suffix[1:] + if ext not in allowed_file_exts: + logger.warning("Unsupported file type: %s", ext) + continue + else: + logger.warning("Found image file: %s", f) + media_type = f"image/{ext}" + content.append( + { + "type": "text", + "text": f"![{f.name}]({f.name}):", + } + ) + if anthropic: content.append( { "type": "image", @@ -90,8 +96,16 @@ def to_dict(self, keys=None, anthropic=False) -> dict: }, } ) - else: - content = self.content + else: + # OpenAI format + content.append( + { + "type": "image_url", + "image_url": { + "url": f"data:{media_type};base64,{base64.b64encode(f.read_bytes()).decode('utf-8')}" + }, + } + ) d = { "role": self.role,