Skip to content

Commit

Permalink
partners: Fixed the procedure of initializing pad_token_id (#29500)
Browse files Browse the repository at this point in the history
- **Description:** Add to check pad_token_id and eos_token_id of model
config. It seems that this is the same bug as the HuggingFace TGI bug.
It's same bug as #29434
- **Issue:** #29431
- **Dependencies:** none
- **Twitter handle:** tell14

Example code is followings:
```python
from langchain_huggingface.llms import HuggingFacePipeline

hf = HuggingFacePipeline.from_model_id(
    model_id="meta-llama/Llama-3.2-3B-Instruct",
    task="text-generation",
    pipeline_kwargs={"max_new_tokens": 10},
)

from langchain_core.prompts import PromptTemplate

template = """Question: {question}

Answer: Let's think step by step."""
prompt = PromptTemplate.from_template(template)

chain = prompt | hf

question = "What is electroencephalography?"

print(chain.invoke({"question": question}))
```
  • Loading branch information
tishizaki authored Feb 4, 2025
1 parent e8b9128 commit aeb42dc
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,16 @@ def from_model_id(
model = model_cls.from_pretrained(model_id, **_model_kwargs)

if tokenizer.pad_token is None:
tokenizer.pad_token_id = model.config.eos_token_id
if model.config.pad_token_id is not None:
tokenizer.pad_token_id = model.config.pad_token_id
elif model.config.eos_token_id is not None and isinstance(
model.config.eos_token_id, int
):
tokenizer.pad_token_id = model.config.eos_token_id
elif tokenizer.eos_token_id is not None:
tokenizer.pad_token_id = tokenizer.eos_token_id
else:
tokenizer.add_special_tokens({"pad_token": "[PAD]"})

if (
(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,14 @@ def test_bind_runnables_as_tools(self, model: BaseChatModel) -> None:
super().test_bind_runnables_as_tools(model)

@pytest.mark.xfail(reason=("Not implemented"))
def test_structured_output(self, model: BaseChatModel) -> None:
super().test_structured_output(model)
def test_structured_output(self, model: BaseChatModel, schema_type: str) -> None:
super().test_structured_output(model, schema_type)

@pytest.mark.xfail(reason=("Not implemented"))
def test_structured_output_async(self, model: BaseChatModel) -> None: # type: ignore[override]
super().test_structured_output(model)
async def test_structured_output_async(
self, model: BaseChatModel, schema_type: str
) -> None: # type: ignore[override]
super().test_structured_output(model, schema_type)

@pytest.mark.xfail(reason=("Not implemented"))
def test_structured_output_pydantic_2_v1(self, model: BaseChatModel) -> None:
Expand Down
11 changes: 7 additions & 4 deletions libs/partners/huggingface/tests/unit_tests/test_chat_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,10 +248,13 @@ def test_bind_tools_errors(

def test_bind_tools(chat_hugging_face: Any) -> None:
tools = [MagicMock(spec=BaseTool)]
with patch(
"langchain_huggingface.chat_models.huggingface.convert_to_openai_tool",
side_effect=lambda x: x,
), patch("langchain_core.runnables.base.Runnable.bind") as mock_super_bind:
with (
patch(
"langchain_huggingface.chat_models.huggingface.convert_to_openai_tool",
side_effect=lambda x: x,
),
patch("langchain_core.runnables.base.Runnable.bind") as mock_super_bind,
):
chat_hugging_face.bind_tools(tools, tool_choice="auto")
mock_super_bind.assert_called_once()
_, kwargs = mock_super_bind.call_args
Expand Down

0 comments on commit aeb42dc

Please sign in to comment.