Update

Prince-Mendiratta · Aug 3, 2023 · 2e64045 · 2e64045
1 parent 5c0caac
commit 2e64045
Show file tree

Hide file tree

Showing 4 changed files with 121 additions and 31 deletions.
diff --git a/docs/evaluation/additional-resources/recommendations.mdx b/docs/evaluation/additional-resources/recommendations.mdx
@@ -23,7 +23,7 @@ These datasets can range from anywhere between 10-100+ examples and will continu
 
 ### Create domain-specific evaluators
 
-LangChain has strong and configurable built-in evaluators for common tasks, and everyone will benefit from your [contributions to these evaluators](https://github.com/hwchase17/langchain/tree/master/langchain/evaluation). However, often the best evaluation metrics are domain-specific. Some examples include:
+LangChain has strong and configurable built-in evaluators for common tasks, and everyone will benefit from your [contributions to these evaluators](https://github.com/langchain-ai/langchain/tree/master/libs/langchain/langchain/evaluation). However, often the best evaluation metrics are domain-specific. Some examples include:
 
 - Evaluate the validity and efficiency of domain-specific code
 - Applying custom rules to check the response output against a proprietary system

diff --git a/docs/evaluation/custom-evaluators.mdx b/docs/evaluation/custom-evaluators.mdx
@@ -93,7 +93,7 @@ evaluation_config = RunEvalConfig(
 run_on_dataset(
     client,
     "<my_dataset_name>",
-    "llm or function constructing ,
+    <llm or function constructing chain>,
     evaluation=evaluation_config,
 )
 ```
@@ -176,7 +176,7 @@ evaluation_config = RunEvalConfig(
 run_on_dataset(
     client,
     "<my_dataset_name>",
-    "llm or function constructing ,
+    <llm or function constructing chain>,
     evaluation=evaluation_config,
 )
 ```

diff --git a/docs/evaluation/datasets.mdx b/docs/evaluation/datasets.mdx
@@ -37,6 +37,7 @@ From there, we select the dataset to organize it in and update the ground truth
 
 ![Modify example](static/modify_example.png)
 
+
 ### Upload a CSV
 
 The easiest way to create a dataset from your own data is by clicking the 'upload a CSV dataset' button on the home page or in the top right-hand corner of the 'Datasets & Testing' page.
@@ -64,9 +65,71 @@ This will open a modal where you can select the format you want to export to.
 
 You can create a dataset from existing runs or upload a CSV file (or pandas dataframe in python).
 
+Once you have a dataset created, you can continue to add new runs to it as examples. We recommend that you organize datasets to target a single "task", usually served by a single chain or LLM. For more discussions on datasets and evaluations, check out the [recommendations](additional-resources/recommendations).
+
+### Create from list of examples
+
+The most flexible way to make a dataset using the client is by creating examples from a list of inputs and optional outputs. Below is an example.
+
+<CodeTabs
+  tabs={[
+    PythonBlock(`from langsmith import Client\n
+example_inputs = [
+  ("What is the largest mammal?", "The blue whale"),
+  ("What do mammals and birds have in common?", "They are both warm-blooded"),
+  ("What are reptiles known for?", "Having scales"),
+  ("What's the main characteristic of amphibians?", "They live both in water and on land"),
+]\n
+client = Client()
+dataset_name = "Elementary Animal Questions"\n
+# Storing inputs in a dataset lets us
+# run chains and LLMs over a shared set of examples.
+dataset = client.create_dataset(
+    dataset_name=dataset_name, description="Questions and answers about animal phylogenetics.",
+)
+for input_prompt, output_answer in example_inputs:
+    client.create_example(
+        inputs={"question": input_prompt},
+        outputs={"answer": output_answer},
+        dataset_id=dataset.id,
+    )`),
+    TypeScriptBlock(`import { Client } from "langsmith";\n
+const client = new Client({
+  // apiUrl: "https://api.langchain.com", // Defaults to the LANGCHAIN_ENDPOINT env var
+  // apiKey: "my_api_key", // Defaults to the LANGCHAIN_API_KEY env var
+  /* callerOptions: {
+         maxConcurrency?: Infinity; // Maximum number of concurrent requests to make
+         maxRetries?: 6; // Maximum number of retries to make
+  }*/
+});\n
+const exampleInputs: [string, string][] = [
+  ["What is the largest mammal?", "The blue whale"],
+  ["What do mammals and birds have in common?", "They are both warm-blooded"],
+  ["What are reptiles known for?", "Having scales"],
+  ["What's the main characteristic of amphibians?", "They live both in water and on land"],
+];\n
+const datasetName = "Elementary Animal Questions";\n
+// Storing inputs in a dataset lets us
+// run chains and LLMs over a shared set of examples.
+const dataset = await client.createDataset(datasetName, {
+  description: "Questions and answers about animal phylogenetics",
+});\n
+for (const [inputPrompt, outputAnswer] of exampleInputs) {
+  await client.createExample(
+    { question: inputPrompt },
+    { answer: outputAnswer },
+    {
+      datasetId: dataset.id,
+    }
+  );
+}`),
+  ]}
+  groupId="client-language"
+/>
+
 ### Create from existing runs
 
-If you have already logged runs to LangSmith, you can create a dataset from them using the client:
+To create datasets from existing runs, you can use the same approach. Below is an example:
 
 <CodeTabs
   tabs={[
@@ -121,8 +184,6 @@ for (const run of runs) {
   groupId="client-language"
 />
 
-Once you have a dataset created, you can continue to add new runs to it as examples. We recommend that you organize datasets to target a single "task", usually served by a single chain or LLM. For more discussions on datasets and evaluations, check out the [recommendations](additional-resources/recommendations).
-
 ### Create dataset from CSV
 
 In this section, we will demonstrate how you can create a dataset by uploading a CSV file.

diff --git a/src/components/QuickStart.js b/src/components/QuickStart.js
@@ -56,11 +56,33 @@ export const LangChainQuickStartCodeTabs = ({}) => (
     tabs={[
       PythonBlock(`from langchain.chat_models import ChatOpenAI\n
 llm = ChatOpenAI()
-llm.predict("Hello, world!")
-  `),
+llm.predict("Hello, world!")`),
       TypeScriptBlock(`import { ChatOpenAI } from "langchain/chat_models/openai";\n
 const llm = new ChatOpenAI()
-await llm.call("Hello, world!");`),
+await llm.predict("Hello, world!");
+
+/**
+ * For environments where process.env is not defined,
+ * initialize by explicitly passing keys:
+ */
+
+import { Client } from "langsmith";
+import { LangChainTracer } from "langchain/callbacks";
+
+const client = new Client({
+  apiUrl: "https://api.smith.langchain.com",
+  apiKey: "YOUR_API_KEY"
+});
+
+const tracer = new LangChainTracer({
+  projectName: "YOUR_PROJECT_NAME",
+  client
+});
+
+const model = new ChatOpenAI({
+  openAIApiKey: "YOUR_OPENAI_API_KEY",
+  callbacks: [tracer]
+});`),
     ]}
     groupId="client-language"
   />
@@ -73,36 +95,43 @@ const TraceableQuickStart = {
   content: `import datetime
 from typing import Any\n
 import openai
-from langsmith.run_helpers import traceable\n\n
+from langsmith.run_helpers import traceable
+
+
 @traceable(run_type="llm")
 def my_llm(prompt: str, temperature: float = 0.0, **kwargs: Any) -> str:
-messages = [
-    {
-        "role": "system",
-        "content": "You are an AI Assistant. The time is "
-        + str(datetime.datetime.now()),
-    },
-    {"role": "user", "content": prompt},
-]
-return (
-    openai.ChatCompletion.create(
-        model="gpt-3.5-turbo", messages=messages, temperature=temperature, **kwargs
-    )
-    .choices[0]
-    .message.content
-)\n\n
+  messages = [
+      {
+          "role": "system",
+          "content": "You are an AI Assistant. The time is "
+          + str(datetime.datetime.now()),
+      },
+      {"role": "user", "content": prompt},
+  ]
+  return (
+      openai.ChatCompletion.create(
+          model="gpt-3.5-turbo", messages=messages, temperature=temperature, **kwargs
+      )
+      .choices[0]
+      .message.content
+  )
+
+
 @traceable(run_type="tool")
 def my_tool(tool_input: str) -> str:
-return tool_input.upper()\n\n
+  return tool_input.upper()
+
+
 @traceable(run_type="chain")
 def my_chat_bot(text: str) -> str:
-generated = my_llm(text, temperature=0.0)
+  generated = my_llm(text, temperature=0.0)
 
-if "meeting" in generated:
+  if "meeting" in generated:
     return my_tool(generated)
-else:
+  else:
     return generated\n\n
-my_chat_bot("Summarize this morning's meetings.")`,
+my_chat_bot("Summarize this morning's meetings.")
+# See an example run at: https://smith.langchain.com/public/b5e2666d-f570-4b83-a611-86a2503ed91b/r`,
 };
 
 export const TraceableQuickStartCodeBlock = ({}) => (
@@ -223,7 +252,7 @@ outputs: {
 // False means post all nested runs as a batch
 // (don't exclude child runs)
 await parentRun.postRun(false);
-        
+
   `),
     ]}
     groupId="client-language"