Skip to content

Commit

Permalink
core[patch]: remove prompt img loading (#27807)
Browse files Browse the repository at this point in the history
  • Loading branch information
baskaryan authored Oct 31, 2024
1 parent 33a5397 commit 7d481f1
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 75 deletions.
34 changes: 18 additions & 16 deletions libs/core/langchain_core/prompts/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from langchain_core.prompts.base import BasePromptTemplate
from langchain_core.pydantic_v1 import Field
from langchain_core.runnables import run_in_executor
from langchain_core.utils import image as image_utils


class ImagePromptTemplate(BasePromptTemplate[ImageURL]):
Expand Down Expand Up @@ -71,8 +70,8 @@ def format(
A formatted string.
Raises:
ValueError: If the url or path is not provided.
ValueError: If the path or url is not a string.
ValueError: If the url is not provided.
ValueError: If the url is not a string.
Example:
Expand All @@ -87,20 +86,24 @@ def format(
else:
formatted[k] = v
url = kwargs.get("url") or formatted.get("url")
path = kwargs.get("path") or formatted.get("path")
if kwargs.get("path") or formatted.get("path"):
msg = (
"Loading images from 'path' has been removed as of 0.3.15 for security "
"reasons. Please specify images by 'url'."
)
raise ValueError(msg)
detail = kwargs.get("detail") or formatted.get("detail")
if not url and not path:
raise ValueError("Must provide either url or path.")
if not url:
if not isinstance(path, str):
raise ValueError("path must be a string.")
url = image_utils.image_to_data_url(path)
if not isinstance(url, str):
raise ValueError("url must be a string.")
output: ImageURL = {"url": url}
if detail:
# Don't check literal values here: let the API check them
output["detail"] = detail # type: ignore[typeddict-item]
msg = "Must provide url."
raise ValueError(msg)
elif not isinstance(url, str):
msg = "url must be a string."
raise ValueError(msg)
else:
output: ImageURL = {"url": url}
if detail:
# Don't check literal values here: let the API check them
output["detail"] = detail # type: ignore[typeddict-item]
return output

async def aformat(self, **kwargs: Any) -> ImageURL:
Expand All @@ -113,7 +116,6 @@ async def aformat(self, **kwargs: Any) -> ImageURL:
A formatted string.
Raises:
ValueError: If the url or path is not provided.
ValueError: If the path or url is not a string.
"""
return await run_in_executor(None, self.format, **kwargs)
Expand Down
42 changes: 13 additions & 29 deletions libs/core/langchain_core/utils/image.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,13 @@
import base64
import mimetypes


def encode_image(image_path: str) -> str:
"""Get base64 string from image URI.
Args:
image_path: The path to the image.
Returns:
The base64 string of the image.
"""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")


def image_to_data_url(image_path: str) -> str:
"""Get data URL from image URI.
Args:
image_path: The path to the image.
Returns:
The data URL of the image.
"""
encoding = encode_image(image_path)
mime_type = mimetypes.guess_type(image_path)[0]
return f"data:{mime_type};base64,{encoding}"
from typing import Any


def __getattr__(name: str) -> Any:
if name in ("encode_image", "image_to_data_url"):
msg = (
f"'{name}' has been removed for security reasons.\n\n"
f"Usage of this utility in environments with user-input paths is a "
f"security vulnerability. Out of an abundance of caution, the utility "
f"has been removed to prevent possible misuse."
)
raise ValueError(msg)
raise AttributeError(name)
44 changes: 14 additions & 30 deletions libs/core/tests/unit_tests/prompts/test_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ async def test_chat_tmpl_from_messages_multipart_image() -> None:


async def test_chat_tmpl_from_messages_multipart_formatting_with_path() -> None:
"""Verify that we can pass `path` for an image as a variable."""
"""Verify that we cannot pass `path` for an image as a variable."""
in_mem = "base64mem"
in_file_data = "base64file01"

Expand All @@ -672,35 +672,19 @@ async def test_chat_tmpl_from_messages_multipart_formatting_with_path() -> None:
),
]
)
expected = [
SystemMessage(content="You are an AI assistant named R2D2."),
HumanMessage(
content=[
{"type": "text", "text": "What's in this image?"},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{in_mem}"},
},
{
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{in_file_data}"},
},
]
),
]
messages = template.format_messages(
name="R2D2",
in_mem=in_mem,
file_path=temp_file.name,
)
assert messages == expected

messages = await template.aformat_messages(
name="R2D2",
in_mem=in_mem,
file_path=temp_file.name,
)
assert messages == expected
with pytest.raises(ValueError):
template.format_messages(
name="R2D2",
in_mem=in_mem,
file_path=temp_file.name,
)

with pytest.raises(ValueError):
await template.aformat_messages(
name="R2D2",
in_mem=in_mem,
file_path=temp_file.name,
)


def test_messages_placeholder() -> None:
Expand Down

0 comments on commit 7d481f1

Please sign in to comment.