From 930643a00841cc46d3b43496e11b5e872ae980e5 Mon Sep 17 00:00:00 2001
From: shreya-51 <48033781+shreya-51@users.noreply.github.com>
Date: Thu, 5 Sep 2024 19:41:17 -0700
Subject: [PATCH 1/2] revise sgicl

---
 .../few_shot/example_generation/sg_icl.md     | 103 ++++++++----------
 docs/prompting/index.md                       |   2 +-
 mkdocs.yml                                    |   2 +-
 3 files changed, 46 insertions(+), 61 deletions(-)

diff --git a/docs/prompting/few_shot/example_generation/sg_icl.md b/docs/prompting/few_shot/example_generation/sg_icl.md
index da8ac3513..2abcfd1e9 100644
--- a/docs/prompting/few_shot/example_generation/sg_icl.md
+++ b/docs/prompting/few_shot/example_generation/sg_icl.md
@@ -1,115 +1,100 @@
 ---
 title: "Generate In-Context Examples"
-description: ""
+description: "If we do not have examples for our task, we can utilize *self-generated* in-context learning (SG-ICL), where we use a model to generate in-context examples."
 ---
 
-How can we generate examples for our prompt?
+How can we generate examples of our task to improve model outputs?
 
-Self-Generated In-Context Learning (SG-ICL) is a technique which uses an LLM to generate examples to be used during the task. This allows for in-context learning, where examples of the task are provided in the prompt.
+In-context learning is a prompting technique where examples are provided in the prompt for the model to learn from at inference time. If we do not already have examples for our task, we can utilize *self-generated* in-context learning (SG-ICL), where we use a model to generate these in-context examples.
 
-We can implement SG-ICL using `instructor` as seen below.
+## Implementation
 
 ```python
 import instructor
-from pydantic import BaseModel
 from openai import OpenAI
-from typing import Literal
-
-n = 4  # num examples to generate per class
+from pydantic import BaseModel
 
 
-class GeneratedReview(BaseModel):
-    review: str
-    sentiment: Literal["positive", "negative"]
+class GeneratedExample(BaseModel):
+    input: str
+    output: str
 
 
-class SentimentPrediction(BaseModel):
-    sentiment: Literal["positive", "negative"]
+class Response(BaseModel):
+    output: str
 
 
 client = instructor.from_openai(OpenAI())
 
 
-def generate_sample(input_review, sentiment):
+def generate_example(task, input, case):
     return client.chat.completions.create(
         model="gpt-4o",
-        response_model=GeneratedReview,
+        response_model=GeneratedExample,
         messages=[
             {
                 "role": "user",
                 "content": f"""
-                           Generate a '{sentiment}' review similar to: {input_review}
-                           Generated review:
+                           Generate an example for this task
+                           {task}
+                           that has this output
+                           {case}
+                           similar to this input
+                           {input}
                            """,
             }
         ],
     )
 
 
-def predict_sentiment(input_review, in_context_samples):
+def inference(examples, task, input):
     return client.chat.completions.create(
         model="gpt-4o",
-        response_model=SentimentPrediction,
+        response_model=Response,
         messages=[
             {
                 "role": "user",
-                "content": "".join(
-                    [
-                        f"Review: {sample.review}\nSentiment: {sample.sentiment}\n\n"
-                        for sample in in_context_samples
-                    ]
-                )
-                + f"Review: {input_review}\nSentiment:",
+                "content": f"""
+                           {examples}
+                           {task}
+                           {input}
+                           """,
             }
         ],
-    ).sentiment
+    )
 
 
 if __name__ == "__main__":
-    input_review = (
-        "This movie was a rollercoaster of emotions, keeping me engaged throughout."
-    )
+    task = "Predict the sentiment of the following text:"
+    input = "This movie was a rollercoaster of emotions, keeping me engaged throughout."
+    example_cases = ["positive", "negative"]
 
-    # Generate in-context samples
-    samples = [
-        generate_sample(input_review, sentiment)
-        for sentiment in ('positive', 'negative')
-        for _ in range(n)
-    ]
-    for sample in samples:
-        print(sample)
-        """
-        review='This film was an emotional journey, keeping me captivated from start to finish.' sentiment='positive'
-        """
-        """
-        review='This film was an emotional journey, captivating me from start to finish.' sentiment='positive'
-        """
-        """
-        review='This film captivated me from start to finish with its thrilling plot and emotional depth.' sentiment='positive'
-        """
-        """
-        review='This movie was a breathtaking journey, capturing my attention from start to finish.' sentiment='positive'
-        """
+    examples = [
+        generate_example(task, input, case)
+        for case in example_cases
+        for _ in range(2)
+    ]  # Generate 2 examples per case
+
+    for example in examples:
+        print(example)
         """
-        review='This movie was a chaotic mess of emotions, losing me at every turn.' sentiment='negative'
+        input='The performance of the lead actor was stellar, leaving a lasting impression.' output='positive'
         """
         """
-        review='This movie was a confusing mess, leaving me disengaged throughout.' sentiment='negative'
+        input="The weather today has been absolutely wonderful, lifting everyone's spirits." output='positive'
         """
         """
-        review='This movie was a chore to sit through, leaving me bored most of the time.' sentiment='negative'
+        input='The meal was overpriced and underwhelming, not worth the hype.' output='negative'
         """
         """
-        review='This movie was a mishmash of confusing scenes, leaving me frustrated throughout.' sentiment='negative'
+        input='The customer service experience was frustrating and disappointing.' output='negative'
         """
 
-    # Predict sentiment
-    print(predict_sentiment(input_review, samples))
-    #> positive
+    print(inference(examples, task, input))
+    #> output='positive'
 ```
 
-### References
+## References
 
 <sup id="ref-1">1</sup>: [Self-Generated In-Context Learning: Leveraging Auto-regressive Language Models as a Demonstration Generator](https://arxiv.org/abs/2206.08082)
 
-<sup id="ref-asterisk">\*</sup>: [The Prompt Report: A Systematic Survey of Prompting Techniques](https://arxiv.org/abs/2406.06608)
diff --git a/docs/prompting/index.md b/docs/prompting/index.md
index 0ed58b93d..a7ee4ae79 100644
--- a/docs/prompting/index.md
+++ b/docs/prompting/index.md
@@ -24,7 +24,7 @@ How do we increase the performance of our model without any examples?
 
 How do we choose effective examples to include in our prompt?
 
-1. [Auto-Generate Examples](few_shot/example_generation/sg_icl.md)
+1. [Generate Examples](few_shot/example_generation/sg_icl.md)
 2. [Re-Order Examples](few_shot/example_ordering.md)
 3. [Choose Examples Similar to the Query (KNN)](few_shot/exemplar_selection/knn.md)
 4. [Choose Examples Similar to the Query (Vote-K)](few_shot/exemplar_selection/vote_k.md)
diff --git a/mkdocs.yml b/mkdocs.yml
index 310438fb7..64ce475ef 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -231,7 +231,7 @@ nav:
       - Generate Follow-Up Questions: 'prompting/zero_shot/self_ask.md'
     - Few-Shot:
       - Example Generation:
-        - Generate In-Context Examples: 'prompting/few_shot/example_generation/sg_icl.md'
+        - Generate Examples: 'prompting/few_shot/example_generation/sg_icl.md'
       - Example Ordering: 'prompting/few_shot/example_ordering.md'
       - Exemplar Selection:
         - Select Effective Examples: 'prompting/few_shot/exemplar_selection/knn.md'

From 3f06ebfd7cf10a68fb5385dc1e8b108e63198888 Mon Sep 17 00:00:00 2001
From: shreya-51 <48033781+shreya-51@users.noreply.github.com>
Date: Sun, 8 Sep 2024 13:26:31 -0700
Subject: [PATCH 2/2] example ordering updates

---
 docs/prompting/few_shot/example_ordering.md | 76 ++++++++++++++++-----
 docs/prompting/index.md                     |  2 +-
 mkdocs.yml                                  |  2 +-
 3 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/docs/prompting/few_shot/example_ordering.md b/docs/prompting/few_shot/example_ordering.md
index 574a448a2..1f4ea3587 100644
--- a/docs/prompting/few_shot/example_ordering.md
+++ b/docs/prompting/few_shot/example_ordering.md
@@ -1,34 +1,78 @@
 ---
 title: "Example Ordering"
-description: "LLM outputs are heavily impacted by ordering of few shot examples"
+description: "LLMs can be sensitive to the order of examples in prompts."
 ---
 
-# Example Ordering
+Does the order of in-context examples affect your task's output? If so, which ordering provides the best output?
 
-The order of few-shot examples in the prompt can affect LLM outputs <sup><a href="https://arxiv.org/abs/2104.08786">1</a><a href="https://arxiv.org/abs/2106.01751">2</a><a href="https://arxiv.org/abs/2101.06804">3</a><a href="https://aclanthology.org/2022.naacl-main.191/">4</a></sup><sup><a href="https://arxiv.org/abs/2406.06608">\*</a></sup>. Consider permutating the order of these examples in your prompt to achieve better results.
+LLMs can be sensitive to the order of examples in prompts<sup><a href="https://arxiv.org/abs/2104.08786">1</a><a href="https://arxiv.org/abs/2106.01751">2</a><a href="https://arxiv.org/abs/2101.06804">3</a><a href="https://aclanthology.org/2022.naacl-main.191/">4</a></sup>. The script below uses `instructor` to test different example permutations and see how the output changes.
 
-## Choosing Your Examples
+## Implementation
 
-Depending on your use-case, here are a few different methods that you can consider using to improve the quality of your examples.
+```python
+from pydantic import BaseModel
+import instructor
+from openai import OpenAI
+from itertools import permutations
 
-### Combinatorics
+client = instructor.from_openai(OpenAI())
 
-One of the easiest methods is for us to manually iterate over each of the examples that we have and try all possible combinations we could create. This will in turn allow us to find the best combination that we can find.
 
-### KATE
+class Example(BaseModel):  # (1)!
+    input: str
+    output: str
 
-KATE (k-Nearest Example Tuning) is a method designed to enhance GPT-3's performance by selecting the most relevant in-context examples. The method involves:
 
-For each example in the test set, K nearest neighbors (examples) are retrieved based on semantic similarity.
-Among these K examples, those that appear most frequently across different queries are selected as the best in-context examples.
+class Response(BaseModel):
+    response: str
 
-### Using a Unsupervised Retriever
 
-![Retriever Image](../../img/retriever.png)
+def inference(examples, query):
+    return client.chat.completions.create(
+        model="gpt-4o",
+        response_model=Response,
+        messages=[
+            {
+                "role": "user",
+                "content": f"{examples} {query}",  # (2)!
+            }
+        ],
+    ).response
 
-We can use a large LLM to compute a single score for each example with respect to a given prompt. This allows us to create a training set that scores an example's relevance when compared against a prompt. Using this training set, we can train a model that mimics this functionality. This allows us to determine the top `k` most relevant and most irrelevant examples when a user makes a query so that we can include this in our final prompt.
 
-### References
+if __name__ == "__main__":
+    examples = [
+        Example(input="The movie was so good", output="positive"),
+        Example(input="The movie was somewhat good", output="negative"),
+    ]
+    query = "The movie was okay"
+
+    permutations = list(permutations(examples))
+    results = [inference(permutation, query) for permutation in permutations]
+    print(permutations)
+    """
+    [
+        (
+            Example(input='The movie was so good', output='positive'),
+            Example(input='The movie was somewhat good', output='negative'),
+        ),
+        (
+            Example(input='The movie was somewhat good', output='negative'),
+            Example(input='The movie was so good', output='positive'),
+        ),
+    ]
+    """
+    print(results)
+    #> ['negative', 'positive']
+```
+
+1. This class can be customized to a specific task
+2. This prompt can be customized to a specific task
+
+!!! info
+    For scenarios with a large number of examples, check out example selection techniques ([KNN](https://python.useinstructor.com/prompting/few_shot/exemplar_selection/knn/), [Vote-K](https://python.useinstructor.com/prompting/few_shot/exemplar_selection/vote_k/)).
+
+## References
 
 <sup id="ref-1">1</sup>: [Fantastically Ordered Prompts and Where to Find Them: Overcoming Few-Shot Prompt Order Sensitivity](https://arxiv.org/abs/2104.08786)
 
@@ -37,5 +81,3 @@ We can use a large LLM to compute a single score for each example with respect t
 <sup id="ref-2">3</sup>: [What Makes Good In-Context Examples for GPT-3?](https://arxiv.org/abs/2101.06804)
 
 <sup id="ref-3">4</sup>: [Learning To Retrieve Prompts for In-Context Learning](https://aclanthology.org/2022.naacl-main.191/)
-
-<sup id="ref-asterisk">\*</sup>: [The Prompt Report: A Systematic Survey of Prompting Techniques](https://arxiv.org/abs/2406.06608)
diff --git a/docs/prompting/index.md b/docs/prompting/index.md
index a7ee4ae79..75d6959bd 100644
--- a/docs/prompting/index.md
+++ b/docs/prompting/index.md
@@ -25,7 +25,7 @@ How do we increase the performance of our model without any examples?
 How do we choose effective examples to include in our prompt?
 
 1. [Generate Examples](few_shot/example_generation/sg_icl.md)
-2. [Re-Order Examples](few_shot/example_ordering.md)
+2. [Order Examples](few_shot/example_ordering.md)
 3. [Choose Examples Similar to the Query (KNN)](few_shot/exemplar_selection/knn.md)
 4. [Choose Examples Similar to the Query (Vote-K)](few_shot/exemplar_selection/vote_k.md)
 
diff --git a/mkdocs.yml b/mkdocs.yml
index 64ce475ef..62063528d 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -232,7 +232,7 @@ nav:
     - Few-Shot:
       - Example Generation:
         - Generate Examples: 'prompting/few_shot/example_generation/sg_icl.md'
-      - Example Ordering: 'prompting/few_shot/example_ordering.md'
+      - Order Examples: 'prompting/few_shot/example_ordering.md'
       - Exemplar Selection:
         - Select Effective Examples: 'prompting/few_shot/exemplar_selection/knn.md'
         - Vote-K: 'prompting/few_shot/exemplar_selection/vote_k.md'