Skip to content

Commit

Permalink
google-genai[minor]: Add tool calling support (#5507)
Browse files Browse the repository at this point in the history
* google-genai[minor]: Add tool calling support

* cr

* chore: lint files

* fixes

* docs and build error fixes

* dont set name in response

* add ls links

* cr

* Multi modal docs

* :cr

* fixup multi modal tool calls

* cr
  • Loading branch information
bracesproul authored May 22, 2024
1 parent 4482001 commit d574ca4
Show file tree
Hide file tree
Showing 11 changed files with 797 additions and 26 deletions.
153 changes: 139 additions & 14 deletions docs/core_docs/docs/how_to/tool_calls_multi_modal.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"id": "0d9fd81a-b7f0-445a-8e3d-cfc2d31fdd59",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -74,7 +74,7 @@
" {\n",
" name: \"multiply\",\n",
" args: { weather: \"sunny\" },\n",
" id: \"call_MbIAYS9ESBG1EWNM2sMlinjR\"\n",
" id: \"call_ZaBYUggmrTSuDjcuZpMVKpMR\"\n",
" }\n",
"]\n"
]
Expand Down Expand Up @@ -128,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 3,
"id": "d90c4590-71c8-42b1-99ff-03a9eca8082e",
"metadata": {},
"outputs": [
Expand All @@ -140,7 +140,7 @@
" {\n",
" name: \"multiply\",\n",
" args: { weather: \"sunny\" },\n",
" id: \"toolu_01KnRZWQkgWYSzL2x28crXFm\"\n",
" id: \"toolu_01HLY1KmXZkKMn7Ar4ZtFuAM\"\n",
" }\n",
"]\n"
]
Expand Down Expand Up @@ -178,28 +178,153 @@
"console.log(response.tool_calls);"
]
},
{
"cell_type": "markdown",
"id": "a66b7d2f",
"metadata": {},
"source": [
"## Google Generative AI\n",
"\n",
"For Google GenAI, we can format a base64-encoded image into a content block of type \"image\", as below:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3955ada",
"execution_count": 4,
"id": "f8184909",
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ { name: 'multiply', args: { weather: 'sunny' } } ]\n"
]
}
],
"source": [
"import { ChatGoogleGenerativeAI } from \"@langchain/google-genai\";\n",
"import axios from \"axios\";\n",
"import { ChatPromptTemplate, MessagesPlaceholder } from \"@langchain/core/prompts\";\n",
"import { HumanMessage } from \"@langchain/core/messages\";\n",
"\n",
"const axiosRes = await axios.get(imageUrl, { responseType: \"arraybuffer\" });\n",
"const base64 = btoa(\n",
" new Uint8Array(axiosRes.data).reduce(\n",
" (data, byte) => data + String.fromCharCode(byte),\n",
" ''\n",
" )\n",
");\n",
"\n",
"const model = new ChatGoogleGenerativeAI({ model: \"gemini-1.5-pro-latest\" }).bindTools([weatherTool]);\n",
"\n",
"const prompt = ChatPromptTemplate.fromMessages([\n",
" [\"system\", \"describe the weather in this image\"],\n",
" new MessagesPlaceholder(\"message\")\n",
"]);\n",
"\n",
"const response = await prompt.pipe(model).invoke({\n",
" message: new HumanMessage({\n",
" content: [{\n",
" type: \"media\",\n",
" mimeType: \"image/jpeg\",\n",
" data: base64,\n",
" }]\n",
" })\n",
"});\n",
"console.log(response.tool_calls);"
]
},
{
"cell_type": "markdown",
"id": "c5dd4ef4",
"metadata": {},
"source": [
"### Audio input\n",
"\n",
"Google's Gemini also supports audio inputs. In this next example we'll see how we can pass an audio file to the model, and get back a summary in structured format."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "c04c883e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[\n",
" {\n",
" name: 'summary_tool',\n",
" args: { summary: 'The video shows a person clapping their hands.' }\n",
" }\n",
"]\n"
]
}
],
"source": [
"import { SystemMessage } from \"@langchain/core/messages\";\n",
"import { StructuredTool } from \"@langchain/core/tools\";\n",
"\n",
"class SummaryTool extends StructuredTool {\n",
" schema = z.object({\n",
" summary: z.string().describe(\"The summary of the content to log\")\n",
" })\n",
"\n",
" description = \"Log the summary of the content\"\n",
"\n",
" name = \"summary_tool\"\n",
"\n",
" async _call(input: z.infer<typeof this.schema>) {\n",
" return input.summary\n",
" }\n",
"}\n",
"const summaryTool = new SummaryTool()\n",
"\n",
"const audioUrl = \"https://www.pacdv.com/sounds/people_sound_effects/applause-1.wav\";\n",
"\n",
"const axiosRes = await axios.get(audioUrl, { responseType: \"arraybuffer\" });\n",
"const base64 = btoa(\n",
" new Uint8Array(axiosRes.data).reduce(\n",
" (data, byte) => data + String.fromCharCode(byte),\n",
" ''\n",
" )\n",
");\n",
"\n",
"const model = new ChatGoogleGenerativeAI({ model: \"gemini-1.5-pro-latest\" }).bindTools([summaryTool]);\n",
"\n",
"const response = await model.invoke([\n",
" new SystemMessage(\"Summarize this content. always use the summary_tool in your response\"),\n",
" new HumanMessage({\n",
" content: [{\n",
" type: \"media\",\n",
" mimeType: \"audio/wav\",\n",
" data: base64,\n",
" }]\n",
"})]);\n",
"\n",
"console.log(response.tool_calls);"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Deno",
"display_name": "TypeScript",
"language": "typescript",
"name": "deno"
"name": "tslab"
},
"language_info": {
"codemirror_mode": {
"mode": "typescript",
"name": "javascript",
"typescript": true
},
"file_extension": ".ts",
"mimetype": "text/x.typescript",
"mimetype": "text/typescript",
"name": "typescript",
"nb_converter": "script",
"pygments_lexer": "typescript",
"version": "5.3.3"
"version": "3.7.2"
}
},
"nbformat": 4,
Expand Down
20 changes: 20 additions & 0 deletions docs/core_docs/docs/integrations/chat/google_generativeai.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,26 @@ import GoogleGenerativeAI from "@examples/models/chat/googlegenerativeai.ts";

<CodeBlock language="typescript">{GoogleGenerativeAI}</CodeBlock>

## Tool calling

import GoogleGenerativeAIToolCalling from "@examples/models/chat/googlegenerativeai_tools.ts";

<CodeBlock language="typescript">{GoogleGenerativeAIToolCalling}</CodeBlock>

:::tip
See the above run's LangSmith trace [here](https://smith.langchain.com/public/31faf31b-dbd0-436c-a425-b9eb1bccf8b7/r)
:::

## `.withStructuredOutput`

import GoogleGenerativeAIWSO from "@examples/models/chat/googlegenerativeai_wso.ts";

<CodeBlock language="typescript">{GoogleGenerativeAIWSO}</CodeBlock>

:::tip
See the above run's LangSmith trace [here](https://smith.langchain.com/public/4506314e-21ea-43a9-9718-22cad0bbbb38/r)
:::

## Multimodal support

To provide an image, pass a human message with a `content` field set to an array of content objects. Each content object
Expand Down
52 changes: 52 additions & 0 deletions examples/src/models/chat/googlegenerativeai_tools.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import { StructuredTool } from "@langchain/core/tools";
import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
import { z } from "zod";

const model = new ChatGoogleGenerativeAI({
model: "gemini-pro",
});

// Define your tool
class FakeBrowserTool extends StructuredTool {
schema = z.object({
url: z.string(),
query: z.string().optional(),
});

name = "fake_browser_tool";

description =
"useful for when you need to find something on the web or summarize a webpage.";

async _call(_: z.infer<this["schema"]>): Promise<string> {
return "fake_browser_tool";
}
}

// Bind your tools to the model
const modelWithTools = model.bind({
tools: [new FakeBrowserTool()],
});
// Or, you can use `.bindTools` which works the same under the hood
// const modelWithTools = model.bindTools([new FakeBrowserTool()]);

const res = await modelWithTools.invoke([
[
"human",
"Search the web and tell me what the weather will be like tonight in new york. use a popular weather website",
],
]);

console.log(res.tool_calls);

/*
[
{
name: 'fake_browser_tool',
args: {
query: 'weather in new york',
url: 'https://www.google.com/search?q=weather+in+new+york'
}
}
]
*/
49 changes: 49 additions & 0 deletions examples/src/models/chat/googlegenerativeai_wso.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { StructuredTool } from "@langchain/core/tools";
import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
import { z } from "zod";

const model = new ChatGoogleGenerativeAI({
model: "gemini-pro",
});

// Define your tool
class FakeBrowserTool extends StructuredTool {
schema = z.object({
url: z.string(),
query: z.string().optional(),
});

name = "fake_browser_tool";

description =
"useful for when you need to find something on the web or summarize a webpage.";

async _call(_: z.infer<this["schema"]>): Promise<string> {
return "fake_browser_tool";
}
}
const tool = new FakeBrowserTool();

// Bind your tools to the model
const modelWithTools = model.withStructuredOutput(tool.schema, {
name: tool.name, // this is optional
});
// Optionally, you can pass just a Zod schema, or JSONified Zod schema
// const modelWithTools = model.withStructuredOutput(
// zodSchema,
// );

const res = await modelWithTools.invoke([
[
"human",
"Search the web and tell me what the weather will be like tonight in new york. use a popular weather website",
],
]);

console.log(res);
/*
{
url: 'https://www.accuweather.com/en/us/new-york-ny/10007/night-weather-forecast/349014',
query: 'weather tonight'
}
*/
6 changes: 4 additions & 2 deletions libs/langchain-google-genai/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@
"license": "MIT",
"dependencies": {
"@google/generative-ai": "^0.7.0",
"@langchain/core": ">0.1.5 <0.3.0"
"@langchain/core": ">0.1.5 <0.3.0",
"zod-to-json-schema": "^3.22.4"
},
"devDependencies": {
"@jest/globals": "^29.5.0",
Expand All @@ -65,7 +66,8 @@
"release-it": "^15.10.1",
"rollup": "^4.5.2",
"ts-jest": "^29.1.0",
"typescript": "<5.2.0"
"typescript": "<5.2.0",
"zod": "^3.22.4"
},
"publishConfig": {
"access": "public"
Expand Down
Loading

0 comments on commit d574ca4

Please sign in to comment.