From ee1ec0317a88e88f9c2f8ee3710f0c34b756c254 Mon Sep 17 00:00:00 2001 From: NirantK Date: Mon, 4 Sep 2023 19:37:24 +0530 Subject: [PATCH 01/38] * feat(fine-tuned-RAG): add .gitignore file for ignoring *.parquet files * feat(fine-tuned-RAG): add DatasetPrep.ipynb file for dataset preparation * feat(DatasetPrep.ipynb): add code for downloading validation.json file --- examples/fine-tuned-RAG/.gitignore | 1 + examples/fine-tuned-RAG/DatasetPrep.ipynb | 213 ++++++++++++++++++++++ 2 files changed, 214 insertions(+) create mode 100644 examples/fine-tuned-RAG/.gitignore create mode 100644 examples/fine-tuned-RAG/DatasetPrep.ipynb diff --git a/examples/fine-tuned-RAG/.gitignore b/examples/fine-tuned-RAG/.gitignore new file mode 100644 index 0000000000..bccc1450f2 --- /dev/null +++ b/examples/fine-tuned-RAG/.gitignore @@ -0,0 +1 @@ +*.parquet \ No newline at end of file diff --git a/examples/fine-tuned-RAG/DatasetPrep.ipynb b/examples/fine-tuned-RAG/DatasetPrep.ipynb new file mode 100644 index 0000000000..d24a14f572 --- /dev/null +++ b/examples/fine-tuned-RAG/DatasetPrep.ipynb @@ -0,0 +1,213 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Validation" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-04 16:34:26-- https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 4370528 (4.2M) [application/json]\n", + "Saving to: ‘validation.json’\n", + "\n", + "validation.json 100%[===================>] 4.17M 5.66MB/s in 0.7s \n", + "\n", + "2023-09-04 16:34:28 (5.66 MB/s) - ‘validation.json’ saved [4370528/4370528]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O validation.json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install pandas qdrant-client openai --quiet" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import json\n", + "\n", + "def json_to_dataframe_with_titles(json_data):\n", + " qas = []\n", + " context = []\n", + " is_impossible = []\n", + " answers = []\n", + " titles = []\n", + "\n", + " for article in json_data['data']:\n", + " title = article['title']\n", + " for paragraph in article['paragraphs']:\n", + " for qa in paragraph['qas']:\n", + " qas.append(qa['question'].strip())\n", + " context.append(paragraph['context'])\n", + " is_impossible.append(qa['is_impossible'])\n", + " \n", + " ans_list = []\n", + " for ans in qa['answers']:\n", + " ans_list.append(ans['text'])\n", + " answers.append(ans_list)\n", + " titles.append(title)\n", + "\n", + " df = pd.DataFrame({'title': titles, 'question': qas, 'context': context, 'is_impossible': is_impossible, 'answers': answers})\n", + " return df\n", + "\n", + "def get_diverse_sample(df, sample_size=100, random_state=42):\n", + " sample_df = df.groupby(['title', 'is_impossible']).apply(lambda x: x.sample(min(len(x), max(1, sample_size // 50)), random_state=random_state)).reset_index(drop=True)\n", + " \n", + " if len(sample_df) < sample_size:\n", + " remaining_sample_size = sample_size - len(sample_df)\n", + " remaining_df = df.drop(sample_df.index).sample(remaining_sample_size, random_state=random_state)\n", + " sample_df = pd.concat([sample_df, remaining_df]).sample(frac=1, random_state=random_state).reset_index(drop=True)\n", + "\n", + " return sample_df.sample(min(sample_size, len(sample_df)), random_state=random_state).reset_index(drop=True)\n", + "\n", + "validation = json.load(open('validation.json', 'r'))\n", + "validation_df = json_to_dataframe_with_titles(validation)\n", + "df = get_diverse_sample(validation_df, sample_size=1000, random_state=37)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "is_impossible\n", + "False 53\n", + "True 47\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.value_counts('is_impossible')" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "title\n", + "Southern_California 33\n", + "Geology 33\n", + "Jacksonville,_Florida 32\n", + "University_of_Chicago 32\n", + "Imperialism 31\n", + "1973_oil_crisis 31\n", + "Fresno,_California 31\n", + "Harvard_University 31\n", + "Scottish_Parliament 31\n", + "Sky_(United_Kingdom) 31\n", + "Normans 30\n", + "Packet_switching 30\n", + "Ctenophora 30\n", + "Rhine 30\n", + "Pharmacy 30\n", + "Intergovernmental_Panel_on_Climate_Change 30\n", + "Warsaw 29\n", + "Steam_engine 29\n", + "Victoria_(Australia) 29\n", + "Computational_complexity_theory 29\n", + "Amazon_rainforest 29\n", + "Civil_disobedience 29\n", + "European_Union_law 29\n", + "Huguenot 28\n", + "Construction 28\n", + "Private_school 28\n", + "Black_Death 26\n", + "Yuan_dynasty 26\n", + "Islamism 25\n", + "Oxygen 24\n", + "Prime_number 24\n", + "Immune_system 24\n", + "Force 23\n", + "Economic_inequality 23\n", + "French_and_Indian_War 22\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.title.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "# write to csv\n", + "df.to_json('v2_1K_Seed=37_sample.json', orient='records', lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fst", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 18f5031f5a4a0b1c6cd1b10b6b03c4b4f15bbc76 Mon Sep 17 00:00:00 2001 From: NirantK Date: Mon, 4 Sep 2023 20:45:56 +0530 Subject: [PATCH 02/38] Add model finetune nbs --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 840 ++++++++++++++++++++ 1 file changed, 840 insertions(+) create mode 100644 examples/fine-tuned-RAG/ModelFinetune.ipynb diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb new file mode 100644 index 0000000000..eb8af4d1e0 --- /dev/null +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -0,0 +1,840 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !pip install pandas openai tqdm tenacity pandarallel scikit-learn tiktoken" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import openai\n", + "import os\n", + "import time\n", + "from tenacity import retry, stop_after_attempt, wait_exponential\n", + "from sklearn.metrics import confusion_matrix\n", + "from tqdm import tqdm\n", + "tqdm.pandas()\n", + "openai.api_key = os.getenv(\"OPENAI_API_KEY\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_json(\"v2_1K_Seed=37_sample.json\", lines=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlequestioncontextis_impossibleanswers
0Southern_CaliforniaWhat is the United State's second-busiest comm...Southern California is also home to the Port o...True[]
1Jacksonville,_FloridaWho did Jacksonville support with supplies dur...During the American Civil War, Jacksonville wa...True[]
2Imperialismcolonial rule would not be considered what typ...Imperialism is defined as \"A policy of extendi...True[]
3Intergovernmental_Panel_on_Climate_ChangeWhere are dissenting opinions published?Michael Oppenheimer, a long-time participant i...True[]
4Southern_CaliforniaHow many people does the Greater Los Angeles A...Southern California includes the heavily built...False[17.5 million, over 17.5 million, 17.5 million]
\n", + "
" + ], + "text/plain": [ + " title \\\n", + "0 Southern_California \n", + "1 Jacksonville,_Florida \n", + "2 Imperialism \n", + "3 Intergovernmental_Panel_on_Climate_Change \n", + "4 Southern_California \n", + "\n", + " question \\\n", + "0 What is the United State's second-busiest comm... \n", + "1 Who did Jacksonville support with supplies dur... \n", + "2 colonial rule would not be considered what typ... \n", + "3 Where are dissenting opinions published? \n", + "4 How many people does the Greater Los Angeles A... \n", + "\n", + " context is_impossible \\\n", + "0 Southern California is also home to the Port o... True \n", + "1 During the American Civil War, Jacksonville wa... True \n", + "2 Imperialism is defined as \"A policy of extendi... True \n", + "3 Michael Oppenheimer, a long-time participant i... True \n", + "4 Southern California includes the heavily built... False \n", + "\n", + " answers \n", + "0 [] \n", + "1 [] \n", + "2 [] \n", + "3 [] \n", + "4 [17.5 million, over 17.5 million, 17.5 million] " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# Function to get prompt messages\n", + "def get_prompt(row):\n", + " return [\n", + " {'role': 'system', 'content': 'You are a helpful assistant.'},\n", + " {'role': 'user', 'content': f\"\"\"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\n", + " Question: {row.question}\\n\\n\n", + " Context: {row.context}\\n\\n\n", + " Answer:\\n\"\"\"},\n", + " ]\n", + "\n", + "# Function with tenacity for retries\n", + "@retry(wait=wait_exponential(multiplier=1, min=2, max=6))\n", + "def api_call(messages, model):\n", + " return openai.ChatCompletion.create(\n", + " model=model,\n", + " messages=messages,\n", + " stop=[\"\\n\\n\"],\n", + " max_tokens=100,\n", + " temperature=0.0,\n", + " )\n", + "\n", + "# Main function to answer question\n", + "def answer_question(row, model=\"gpt-3.5-turbo-0613\"):\n", + " messages = get_prompt(row)\n", + " response = api_call(messages, model)\n", + " return response['choices'][0]['message']['content']\n", + "\n", + "# Use progress_apply with tqdm for progress bar\n", + "df[\"generated_answer\"] = df.progress_apply(answer_question, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "generated_answer\n", + "I don't know. 150\n", + "Oxygen 3\n", + "Arthur Woolf 2\n", + "For certain physical scenarios, forces are impossible to model as being due to the gradient of potentials because they arise from a macroscopic statistical average of microstates. 1\n", + "The derogatory term for the Christian academies that arose in the wake of school desegregation is \"segregation academies\". 1\n", + " ... \n", + "The Longwood Medical area is located in Boston. 1\n", + "On December 12, Washington and his men reached Fort Le Boeuf. 1\n", + "Non-governmental organizations are participants of the plenary sessions. 1\n", + "The Middle East was not a match for becoming another superpower confrontation with the USSR. 1\n", + "Temür Khan 1\n", + "Name: count, Length: 848, dtype: int64" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"generated_answer\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "is_impossible\n", + "False 503\n", + "True 497\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.is_impossible.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_json(\"1K_with_generated_answers.json\", lines=True, orient=\"records\")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Expected and Right 42.30%\n", + "Expected but IDK 0.00%\n", + "Expected but Wrong 8.00%\n", + "Hallucination 49.70%\n", + "Did not Expect and IDK 0.00%\n", + "Name: count, dtype: object" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Debugging and correcting the ConfusionMatrixEvaluator class to produce an accurate confusion matrix\n", + "class ConfusionMatrixEvaluator:\n", + " def __init__(self, df, answers_column=\"generated_answer\"):\n", + " self.df = df\n", + " self.y_pred = []\n", + " self.labels = [\n", + " 'Expected and Right', 'Expected but IDK', \n", + " 'Expected but Wrong', 'Hallucination', \n", + " 'Did not Expect and IDK'\n", + " ]\n", + " self.answers_column = answers_column\n", + " \n", + " def _evaluate_single_row(self, row):\n", + " is_impossible = row['is_impossible']\n", + " generated_answer = row[self.answers_column].lower()\n", + " actual_answers = [ans.lower() for ans in row['answers']]\n", + " \n", + " y_pred = (\n", + " 'Expected and Right' if not is_impossible and any(ans in generated_answer for ans in actual_answers) else\n", + " 'Expected but IDK' if not is_impossible and generated_answer == \"i don't know\" else\n", + " 'Expected but Wrong' if not is_impossible and generated_answer not in actual_answers else\n", + " 'Hallucination' if is_impossible and generated_answer != \"i don't know\" else\n", + " 'Did not Expect and IDK'\n", + " )\n", + " return y_pred\n", + " \n", + " def evaluate_answers(self):\n", + " self.y_pred = self.df.apply(self._evaluate_single_row, axis=1)\n", + " \n", + " def generate_matrices(self, use_percentages=False):\n", + " # Using value_counts to create a Series of frequencies, then reindexing to include missing labels with count 0\n", + " freq_series = self.y_pred.value_counts().reindex(self.labels, fill_value=0)\n", + " if use_percentages:\n", + " total = freq_series.sum()\n", + " freq_series = (freq_series / total * 100).apply(\"{0:.2f}%\".format)\n", + " return freq_series\n", + "\n", + "# Initialize the new evaluator\n", + "new_evaluator = ConfusionMatrixEvaluator(df, answers_column=\"generated_answer\")\n", + "\n", + "# Run the new evaluation\n", + "new_evaluator.evaluate_answers()\n", + "\n", + "# Generate the new confusion matrix\n", + "new_matrix = new_evaluator.generate_matrices(use_percentages=True)\n", + "\n", + "# Display the new matrix\n", + "new_matrix\n" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1000/1000 [00:00<00:00, 59898.09it/s]\n" + ] + } + ], + "source": [ + "import json\n", + "def dataframe_to_jsonl_parallel(df):\n", + " def create_jsonl_entry(row):\n", + " answer = row['answers'][0] if row['answers'] else \"I don't know\"\n", + " messages = [\n", + " {'role': 'system', 'content': 'You are a helpful assistant.'},\n", + " {'role': 'user', 'content': f\"\"\"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\n", + " Question: {row.question}\\n\\n\n", + " Context: {row.context}\\n\\n\n", + " Answer:\\n\"\"\"},\n", + " {'role': 'assistant', 'content': answer}\n", + " ]\n", + " return json.dumps({'messages': messages})\n", + "\n", + " jsonl_output = df.progress_apply(create_jsonl_entry, axis=1)\n", + " return \"\\n\".join(jsonl_output)\n", + "\n", + "with open(\"squad-stratified-1000-ft-v2.json\", \"w\") as f:\n", + " f.write(dataframe_to_jsonl_parallel(df))" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# # We start by importing the required packages\n", + "\n", + "# import json\n", + "# import os\n", + "# import tiktoken\n", + "# import numpy as np\n", + "# from collections import defaultdict\n", + "\n", + "# # Next, we specify the data path and open the JSONL file\n", + "\n", + "# data_path = \"squad-stratified-100-ft-v1.json\"\n", + "\n", + "# # Load dataset\n", + "# with open(data_path) as f:\n", + "# dataset = [json.loads(line) for line in f]\n", + "\n", + "# # We can inspect the data quickly by checking the number of examples and the first item\n", + "\n", + "# # Initial dataset stats\n", + "# print(\"Num examples:\", len(dataset))\n", + "# print(\"First example:\")\n", + "# for message in dataset[0][\"messages\"]:\n", + "# print(message)\n", + "\n", + "# # Now that we have a sense of the data, we need to go through all the different examples and check to make sure the formatting is correct and matches the Chat completions message structure\n", + "\n", + "# # Format error checks\n", + "# format_errors = defaultdict(int)\n", + "\n", + "# for ex in dataset:\n", + "# if not isinstance(ex, dict):\n", + "# format_errors[\"data_type\"] += 1\n", + "# continue\n", + "\n", + "# messages = ex.get(\"messages\", None)\n", + "# if not messages:\n", + "# format_errors[\"missing_messages_list\"] += 1\n", + "# continue\n", + "\n", + "# for message in messages:\n", + "# if \"role\" not in message or \"content\" not in message:\n", + "# format_errors[\"message_missing_key\"] += 1\n", + "\n", + "# if any(k not in (\"role\", \"content\", \"name\") for k in message):\n", + "# format_errors[\"message_unrecognized_key\"] += 1\n", + "\n", + "# if message.get(\"role\", None) not in (\"system\", \"user\", \"assistant\"):\n", + "# format_errors[\"unrecognized_role\"] += 1\n", + "\n", + "# content = message.get(\"content\", None)\n", + "# if not content or not isinstance(content, str):\n", + "# format_errors[\"missing_content\"] += 1\n", + "\n", + "# if not any(message.get(\"role\", None) == \"assistant\" for message in messages):\n", + "# format_errors[\"example_missing_assistant_message\"] += 1\n", + "\n", + "# if format_errors:\n", + "# print(\"Found errors:\")\n", + "# for k, v in format_errors.items():\n", + "# print(f\"{k}: {v}\")\n", + "# else:\n", + "# print(\"No errors found\")\n", + "\n", + "# # Beyond the structure of the message, we also need to ensure that the length does not exceed the 4096 token limit.\n", + "\n", + "# # Token counting functions\n", + "# encoding = tiktoken.get_encoding(\"cl100k_base\")\n", + "\n", + "# # not exact!\n", + "# # simplified from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb\n", + "# def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):\n", + "# num_tokens = 0\n", + "# for message in messages:\n", + "# num_tokens += tokens_per_message\n", + "# for key, value in message.items():\n", + "# num_tokens += len(encoding.encode(value))\n", + "# if key == \"name\":\n", + "# num_tokens += tokens_per_name\n", + "# num_tokens += 3\n", + "# return num_tokens\n", + "\n", + "# def num_assistant_tokens_from_messages(messages):\n", + "# num_tokens = 0\n", + "# for message in messages:\n", + "# if message[\"role\"] == \"assistant\":\n", + "# num_tokens += len(encoding.encode(message[\"content\"]))\n", + "# return num_tokens\n", + "\n", + "# def print_distribution(values, name):\n", + "# print(f\"\\n#### Distribution of {name}:\")\n", + "# print(f\"min / max: {min(values)}, {max(values)}\")\n", + "# print(f\"mean / median: {np.mean(values)}, {np.median(values)}\")\n", + "# print(f\"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}\")\n", + "\n", + "# # Last, we can look at the results of the different formatting operations before proceeding with creating a fine-tuning job:\n", + "\n", + "# # Warnings and tokens counts\n", + "# n_missing_system = 0\n", + "# n_missing_user = 0\n", + "# n_messages = []\n", + "# convo_lens = []\n", + "# assistant_message_lens = []\n", + "\n", + "# for ex in dataset:\n", + "# messages = ex[\"messages\"]\n", + "# if not any(message[\"role\"] == \"system\" for message in messages):\n", + "# n_missing_system += 1\n", + "# if not any(message[\"role\"] == \"user\" for message in messages):\n", + "# n_missing_user += 1\n", + "# n_messages.append(len(messages))\n", + "# convo_lens.append(num_tokens_from_messages(messages))\n", + "# assistant_message_lens.append(num_assistant_tokens_from_messages(messages))\n", + "\n", + "# print(\"Num examples missing system message:\", n_missing_system)\n", + "# print(\"Num examples missing user message:\", n_missing_user)\n", + "# print_distribution(n_messages, \"num_messages_per_example\")\n", + "# print_distribution(convo_lens, \"num_total_tokens_per_example\")\n", + "# print_distribution(assistant_message_lens, \"num_assistant_tokens_per_example\")\n", + "# n_too_long = sum(l > 4096 for l in convo_lens)\n", + "# print(f\"\\n{n_too_long} examples may be over the 4096 token limit, they will be truncated during fine-tuning\")\n", + "\n", + "# # Pricing and default n_epochs estimate\n", + "# MAX_TOKENS_PER_EXAMPLE = 4096\n", + "\n", + "# MIN_TARGET_EXAMPLES = 100\n", + "# MAX_TARGET_EXAMPLES = 25000\n", + "# TARGET_EPOCHS = 3\n", + "# MIN_EPOCHS = 1\n", + "# MAX_EPOCHS = 25\n", + "\n", + "# n_epochs = TARGET_EPOCHS\n", + "# n_train_examples = len(dataset)\n", + "# if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:\n", + "# n_epochs = min(MAX_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)\n", + "# elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:\n", + "# n_epochs = max(MIN_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)\n", + "\n", + "# n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)\n", + "# print(f\"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training\")\n", + "# print(f\"By default, you'll train for {n_epochs} epochs on this dataset\")\n", + "# print(f\"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens\")\n", + "# print(\"See pricing page to estimate total costs\")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " JSON: {\n", + " \"object\": \"file\",\n", + " \"id\": \"file-TjkDD0C39b4S1JZstMly98pv\",\n", + " \"purpose\": \"fine-tune\",\n", + " \"filename\": \"file\",\n", + " \"bytes\": 131291,\n", + " \"created_at\": 1693836561,\n", + " \"status\": \"uploaded\",\n", + " \"status_details\": null\n", + "}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "file_object = openai.File.create(\n", + " file=open(\"squad-stratified-100-ft-v1.json\", \"r\"),\n", + " purpose='fine-tune',\n", + "\n", + ")\n", + "file_object" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "while file_object['status'] != 'processed':\n", + " file_object = openai.File.retrieve(file_object['id'])\n", + " time.sleep(1)\n", + "\n", + "ft_job = openai.FineTuningJob.create(training_file=file_object['id'], model=\"gpt-3.5-turbo\", suffix=\"v1\")" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + " JSON: {\n", + " \"object\": \"list\",\n", + " \"data\": [\n", + " {\n", + " \"object\": \"fine_tuning.job\",\n", + " \"id\": \"ftjob-x29xZik7Ey48fp3P0y8W7s7j\",\n", + " \"model\": \"gpt-3.5-turbo-0613\",\n", + " \"created_at\": 1693836623,\n", + " \"finished_at\": null,\n", + " \"fine_tuned_model\": null,\n", + " \"organization_id\": \"org-eC1aoPhsmmzdbEKUb4A3nXwv\",\n", + " \"result_files\": [],\n", + " \"status\": \"running\",\n", + " \"validation_file\": null,\n", + " \"training_file\": \"file-TjkDD0C39b4S1JZstMly98pv\",\n", + " \"hyperparameters\": {\n", + " \"n_epochs\": 3\n", + " },\n", + " \"trained_tokens\": null\n", + " }\n", + " ],\n", + " \"has_more\": false\n", + "}" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "openai.FineTuningJob.list(limit=10)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "while openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model == None:\n", + " time.sleep(10)\n", + "ft_model = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "model_id = ft_model" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"role\": \"assistant\",\n", + " \"content\": \"I don't know\"\n", + "}\n" + ] + } + ], + "source": [ + "completion = openai.ChatCompletion.create(\n", + " model=model_id,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\"role\": \"user\", \"content\": \"Hello!\"},\n", + " {\"role\": \"assistant\", \"content\": \"Hi, how can I help you today?\"},\n", + " {\"role\": \"user\", \"content\": \"Can you answer the following question based on the given context? If not, say, I don't know:\\n\\nQuestion: What is the capital of France?\\n\\nContext: The capital of Mars is Gaia. Answer:\"},\n", + " ]\n", + ")\n", + "\n", + "print(completion.choices[0].message)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 1000/1000 [12:14<00:00, 1.36it/s] \n" + ] + } + ], + "source": [ + "df[\"ft_generated_answer\"] = df.progress_apply(answer_question, model=model_id, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ft_generated_answer\n", + "I don't know 578\n", + "ITV Digital 2\n", + "They viewed the economic value of the Caribbean islands' sugar cane to be greater and easier to defend than the furs from the continent 2\n", + "Hoesung Lee 2\n", + "Bendigo 2\n", + " ... \n", + "15,000 1\n", + "Ibn Sina 1\n", + "Consolidated City of Jacksonville 1\n", + "Allston Science Complex 1\n", + "1850 1\n", + "Name: count, Length: 415, dtype: int64" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df[\"ft_generated_answer\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Expected and Right 30.10%\n", + "Expected but IDK 15.20%\n", + "Expected but Wrong 5.00%\n", + "Hallucination 7.10%\n", + "Did not Expect and IDK 42.60%\n", + "Name: count, dtype: object" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Initialize the new evaluator\n", + "new_evaluator = ConfusionMatrixEvaluator(df, answers_column=\"ft_generated_answer\")\n", + "\n", + "# Run the new evaluation\n", + "new_evaluator.evaluate_answers()\n", + "\n", + "# Generate the new confusion matrix\n", + "new_matrix = new_evaluator.generate_matrices(use_percentages=True)\n", + "\n", + "# Display the new matrix\n", + "new_matrix" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_json(\"1K_with_ft_generated_answers.json\", lines=True, orient=\"records\")" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Importing required libraries for plotting\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "\n", + "def evaluate_matrix(df, answers_column):\n", + " \"\"\"\n", + " Evaluate the confusion matrix for a given DataFrame and answer column.\n", + " \"\"\"\n", + " evaluator = ConfusionMatrixEvaluator(df, answers_column=answers_column)\n", + " evaluator.evaluate_answers()\n", + " matrix = evaluator.generate_matrices(use_percentages=True)\n", + " return matrix\n", + "\n", + "def plot_overall_error(matrix1, matrix2, label1, label2):\n", + " \"\"\"\n", + " Plot a bar chart showing only the overall error between two confusion matrices.\n", + " \"\"\"\n", + " # Calculate overall error\n", + " error_categories = ['Expected but Wrong', 'Hallucination']\n", + " matrix1_error = sum([float(matrix1.loc[cat].replace('%', '')) for cat in error_categories])\n", + " matrix2_error = sum([float(matrix2.loc[cat].replace('%', '')) for cat in error_categories])\n", + " \n", + " labels = ['Overall Error']\n", + " matrix1_values = [matrix1_error]\n", + " matrix2_values = [matrix2_error]\n", + " \n", + " x = np.arange(len(labels))\n", + " width = 0.35\n", + " \n", + " fig, ax = plt.subplots()\n", + " rects1 = ax.bar(x - width/2, matrix1_values, width, label=label1)\n", + " rects2 = ax.bar(x + width/2, matrix2_values, width, label=label2)\n", + " \n", + " ax.set_ylabel('Percentage (%)')\n", + " ax.set_title('Overall Error Comparison between {} and {}'.format(label1, label2))\n", + " ax.set_xticks(x)\n", + " ax.set_xticklabels(labels)\n", + " ax.legend()\n", + " \n", + " fig.tight_layout()\n", + " plt.show()\n", + "\n", + "# Plot only the overall error\n", + "plot_overall_error(matrix_plain, matrix_ft, \"gpt-3.5-turbo-0613\", \"FineTuned gpt-3.5-turbo-0613\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "fst", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.17" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 18aed3d23f9393833f90ec9df501567a965c2251 Mon Sep 17 00:00:00 2001 From: NirantK Date: Mon, 4 Sep 2023 21:19:47 +0530 Subject: [PATCH 03/38] Make it easier to follow! --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 325 ++++++++++---------- 1 file changed, 159 insertions(+), 166 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index eb8af4d1e0..44d7f4543c 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -1,5 +1,35 @@ { "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Finetuning for RAG\n", + "\n", + "Here, we'll take you through the process, complete with code examples, to help you fine-tune your OpenAI model for usage with RAG like a pro.\n", + "\n", + "To begin, we've selected a dataset where we've a guarantee that the retrieval is perfect. We've selected a subset of the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset, which is a collection of questions and answers about Wikipedia articles. We've also included samples where the answer is not present in the context, to demonstrate how RAG handles this case.\n", + "\n", + "## Table of Contents\n", + "1. Setting up the Environment\n", + "2. Data Preparation\n", + "3. Running the Model\n", + "4. Evaluation\n", + "5. Fine-Tuning\n", + "6. Comparison" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setting Up\n", + "\n", + "### Install and Import Dependencies" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -15,17 +45,33 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import openai\n", + "import json\n", "import os\n", "import time\n", - "from tenacity import retry, stop_after_attempt, wait_exponential\n", + "\n", + "import pandas as pd\n", + "import openai\n", "from sklearn.metrics import confusion_matrix\n", + "from tenacity import retry, stop_after_attempt, wait_exponential\n", "from tqdm import tqdm\n", + "import tiktoken\n", + "import numpy as np\n", + "from collections import defaultdict\n", + "\n", "tqdm.pandas()\n", "openai.api_key = os.getenv(\"OPENAI_API_KEY\")" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading the Data\n", + "\n", + "Load your data and take a quick look at the first few rows. Notice that we've included a few samples where the answer is not present in the context." + ] + }, { "cell_type": "code", "execution_count": 3, @@ -152,6 +198,17 @@ "df.head()" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Using the OpenAI Model for Question Answering\n", + "\n", + "### Prompt, API Call, and Answer\n", + "Create functions to get prompt messages and make API calls:" + ] + }, { "cell_type": "code", "execution_count": 30, @@ -183,65 +240,25 @@ "def answer_question(row, model=\"gpt-3.5-turbo-0613\"):\n", " messages = get_prompt(row)\n", " response = api_call(messages, model)\n", - " return response['choices'][0]['message']['content']\n", - "\n", - "# Use progress_apply with tqdm for progress bar\n", - "df[\"generated_answer\"] = df.progress_apply(answer_question, axis=1)" + " return response['choices'][0]['message']['content']\n" ] }, { - "cell_type": "code", - "execution_count": 6, + "attachments": {}, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "generated_answer\n", - "I don't know. 150\n", - "Oxygen 3\n", - "Arthur Woolf 2\n", - "For certain physical scenarios, forces are impossible to model as being due to the gradient of potentials because they arise from a macroscopic statistical average of microstates. 1\n", - "The derogatory term for the Christian academies that arose in the wake of school desegregation is \"segregation academies\". 1\n", - " ... \n", - "The Longwood Medical area is located in Boston. 1\n", - "On December 12, Washington and his men reached Fort Le Boeuf. 1\n", - "Non-governmental organizations are participants of the plenary sessions. 1\n", - "The Middle East was not a match for becoming another superpower confrontation with the USSR. 1\n", - "Temür Khan 1\n", - "Name: count, Length: 848, dtype: int64" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "df[\"generated_answer\"].value_counts()" + "### Running the model" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "is_impossible\n", - "False 503\n", - "True 497\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "df.is_impossible.value_counts()" + "# Use progress_apply with tqdm for progress bar\n", + "df[\"generated_answer\"] = df.progress_apply(answer_question, axis=1)" ] }, { @@ -250,12 +267,29 @@ "metadata": {}, "outputs": [], "source": [ - "df.to_json(\"1K_with_generated_answers.json\", lines=True, orient=\"records\")" + "# Optionally, save the results to a JSON file\n", + "# df.to_json(\"1K_with_generated_answers.json\", lines=True, orient=\"records\") # Save to JSON" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation\n", + "\n", + "To evaluate the model's performance, compare the predicted answer to the actual answers -- if any of the actual answers are present in the predicted answer, then it's a match. We've also created error categories to help you understand where the model is struggling.\n", + "\n", + "1. Expected and Right: The model responsded the correct answer. It may have also included other answers that were not in the context.\n", + "2. Expected but \"IDK\": The model responded with \"I don't know\" (IDK) while the answer was present in the context. **This is a LLM error.**\n", + "3. Expected but Wrong: The model responded with an incorrect answer.\n", + "4. Hallucination: The model responded with an answer, when none was expected. The expected response was \"I don't know\". **This is a LLM error.** \n", + "5. Did not expect and IDK: The model responded with \"I don't know\" (IDK) and the answer was not present in the context. *This is a LLM WIN.*" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -275,7 +309,6 @@ } ], "source": [ - "# Debugging and correcting the ConfusionMatrixEvaluator class to produce an accurate confusion matrix\n", "class ConfusionMatrixEvaluator:\n", " def __init__(self, df, answers_column=\"generated_answer\"):\n", " self.df = df\n", @@ -312,17 +345,20 @@ " freq_series = (freq_series / total * 100).apply(\"{0:.2f}%\".format)\n", " return freq_series\n", "\n", - "# Initialize the new evaluator\n", - "new_evaluator = ConfusionMatrixEvaluator(df, answers_column=\"generated_answer\")\n", - "\n", - "# Run the new evaluation\n", - "new_evaluator.evaluate_answers()\n", - "\n", - "# Generate the new confusion matrix\n", - "new_matrix = new_evaluator.generate_matrices(use_percentages=True)\n", + "evaluator = ConfusionMatrixEvaluator(df, answers_column=\"generated_answer\")\n", + "evaluator.evaluate_answers()\n", + "error_categories = evaluator.generate_matrices(use_percentages=True)\n", + "error_categories" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fine-Tuning\n", "\n", - "# Display the new matrix\n", - "new_matrix\n" + "### Prepare the Fine-Tuning Data" ] }, { @@ -339,8 +375,7 @@ } ], "source": [ - "import json\n", - "def dataframe_to_jsonl_parallel(df):\n", + "def dataframe_to_jsonl(df):\n", " def create_jsonl_entry(row):\n", " answer = row['answers'][0] if row['answers'] else \"I don't know\"\n", " messages = [\n", @@ -357,7 +392,17 @@ " return \"\\n\".join(jsonl_output)\n", "\n", "with open(\"squad-stratified-1000-ft-v2.json\", \"w\") as f:\n", - " f.write(dataframe_to_jsonl_parallel(df))" + " f.write(dataframe_to_jsonl(df))" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### [Optional] Verify the Fine-Tuning Data\n", + "\n", + "The script below will verify that the data is in the format that OpenAI expects." ] }, { @@ -370,9 +415,6 @@ "\n", "# import json\n", "# import os\n", - "# import tiktoken\n", - "# import numpy as np\n", - "# from collections import defaultdict\n", "\n", "# # Next, we specify the data path and open the JSONL file\n", "\n", @@ -510,6 +552,14 @@ "# print(\"See pricing page to estimate total costs\")" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Push the Fine-Tuning data to OpenAI" + ] + }, { "cell_type": "code", "execution_count": 16, @@ -536,68 +586,34 @@ } ], "source": [ - "\n", "file_object = openai.File.create(\n", " file=open(\"squad-stratified-100-ft-v1.json\", \"r\"),\n", " purpose='fine-tune',\n", "\n", ")\n", - "file_object" + "\n", + "while file_object['status'] != 'processed':\n", + " file_object = openai.File.retrieve(file_object['id'])\n", + " time.sleep(5)" ] }, { - "cell_type": "code", - "execution_count": 19, + "attachments": {}, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "while file_object['status'] != 'processed':\n", - " file_object = openai.File.retrieve(file_object['id'])\n", - " time.sleep(1)\n", - "\n", - "ft_job = openai.FineTuningJob.create(training_file=file_object['id'], model=\"gpt-3.5-turbo\", suffix=\"v1\")" + "### Create Fine Tuning Job" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 19, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " JSON: {\n", - " \"object\": \"list\",\n", - " \"data\": [\n", - " {\n", - " \"object\": \"fine_tuning.job\",\n", - " \"id\": \"ftjob-x29xZik7Ey48fp3P0y8W7s7j\",\n", - " \"model\": \"gpt-3.5-turbo-0613\",\n", - " \"created_at\": 1693836623,\n", - " \"finished_at\": null,\n", - " \"fine_tuned_model\": null,\n", - " \"organization_id\": \"org-eC1aoPhsmmzdbEKUb4A3nXwv\",\n", - " \"result_files\": [],\n", - " \"status\": \"running\",\n", - " \"validation_file\": null,\n", - " \"training_file\": \"file-TjkDD0C39b4S1JZstMly98pv\",\n", - " \"hyperparameters\": {\n", - " \"n_epochs\": 3\n", - " },\n", - " \"trained_tokens\": null\n", - " }\n", - " ],\n", - " \"has_more\": false\n", - "}" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "openai.FineTuningJob.list(limit=10)" + "ft_job = openai.FineTuningJob.create(training_file=file_object['id'], model=\"gpt-3.5-turbo\", suffix=\"v1\")\n", + "while openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model == None:\n", + " time.sleep(10)" ] }, { @@ -606,19 +622,15 @@ "metadata": {}, "outputs": [], "source": [ - "import time\n", - "while openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model == None:\n", - " time.sleep(10)\n", - "ft_model = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model" + "model_id = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model" ] }, { - "cell_type": "code", - "execution_count": 28, + "attachments": {}, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "model_id = ft_model" + "### Try out the Fine-Tuned Model" ] }, { @@ -651,6 +663,16 @@ "print(completion.choices[0].message)" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Comparison\n", + "\n", + "### Get Answers from the Fine-Tuned Model" + ] + }, { "cell_type": "code", "execution_count": 32, @@ -668,38 +690,6 @@ "df[\"ft_generated_answer\"] = df.progress_apply(answer_question, model=model_id, axis=1)" ] }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "ft_generated_answer\n", - "I don't know 578\n", - "ITV Digital 2\n", - "They viewed the economic value of the Caribbean islands' sugar cane to be greater and easier to defend than the furs from the continent 2\n", - "Hoesung Lee 2\n", - "Bendigo 2\n", - " ... \n", - "15,000 1\n", - "Ibn Sina 1\n", - "Consolidated City of Jacksonville 1\n", - "Allston Science Complex 1\n", - "1850 1\n", - "Name: count, Length: 415, dtype: int64" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[\"ft_generated_answer\"].value_counts()" - ] - }, { "cell_type": "code", "execution_count": 38, @@ -722,17 +712,13 @@ } ], "source": [ - "# Initialize the new evaluator\n", - "new_evaluator = ConfusionMatrixEvaluator(df, answers_column=\"ft_generated_answer\")\n", - "\n", - "# Run the new evaluation\n", - "new_evaluator.evaluate_answers()\n", + "# Initialize the evaluator\n", + "finetuned_model_evaluator = ConfusionMatrixEvaluator(df, answers_column=\"ft_generated_answer\")\n", "\n", - "# Generate the new confusion matrix\n", - "new_matrix = new_evaluator.generate_matrices(use_percentages=True)\n", - "\n", - "# Display the new matrix\n", - "new_matrix" + "# Run the evaluation\n", + "finetuned_model_evaluator.evaluate_answers()\n", + "finetuned_model_error_categories = finetuned_model_evaluator.generate_matrices(use_percentages=True)\n", + "finetuned_model_error_categories" ] }, { @@ -744,6 +730,13 @@ "df.to_json(\"1K_with_ft_generated_answers.json\", lines=True, orient=\"records\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Plotting the Results" + ] + }, { "cell_type": "code", "execution_count": 46, From b95e5f00761041eeeef8ce91228be56f36fb2447 Mon Sep 17 00:00:00 2001 From: NirantK Date: Mon, 4 Sep 2023 21:24:32 +0530 Subject: [PATCH 04/38] * docs(ModelFinetune.ipynb): update error categories descriptions * docs(ModelFinetune.ipynb): add comments to code --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 44d7f4543c..c8bf9855ab 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -5,10 +5,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Finetuning for RAG\n", + "# Finetuning for RAG\n", + "\n", + "\n", "\n", "Here, we'll take you through the process, complete with code examples, to help you fine-tune your OpenAI model for usage with RAG like a pro.\n", "\n", + "\n", + "\n", "To begin, we've selected a dataset where we've a guarantee that the retrieval is perfect. We've selected a subset of the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset, which is a collection of questions and answers about Wikipedia articles. We've also included samples where the answer is not present in the context, to demonstrate how RAG handles this case.\n", "\n", "## Table of Contents\n", @@ -281,10 +285,10 @@ "To evaluate the model's performance, compare the predicted answer to the actual answers -- if any of the actual answers are present in the predicted answer, then it's a match. We've also created error categories to help you understand where the model is struggling.\n", "\n", "1. Expected and Right: The model responsded the correct answer. It may have also included other answers that were not in the context.\n", - "2. Expected but \"IDK\": The model responded with \"I don't know\" (IDK) while the answer was present in the context. **This is a LLM error.**\n", - "3. Expected but Wrong: The model responded with an incorrect answer.\n", - "4. Hallucination: The model responded with an answer, when none was expected. The expected response was \"I don't know\". **This is a LLM error.** \n", - "5. Did not expect and IDK: The model responded with \"I don't know\" (IDK) and the answer was not present in the context. *This is a LLM WIN.*" + "2. Expected but \"IDK\": The model responded with \"I don't know\" (IDK) while the answer was present in the context. *This is a model error* and better than giving the wrong answer. We exclude this from the overall error rate.\n", + "3. Expected but Wrong: The model responded with an incorrect answer. *This is a model ERROR.*\n", + "4. Hallucination: The model responded with an answer, when \"I don't know\" was expected. **This is a model error.** \n", + "5. Did not expect and IDK: The model responded with \"I don't know\" (IDK) and the answer was not present in the context. *This is a model WIN.*" ] }, { @@ -727,10 +731,12 @@ "metadata": {}, "outputs": [], "source": [ - "df.to_json(\"1K_with_ft_generated_answers.json\", lines=True, orient=\"records\")" + "# Optionally, save the results to a JSON file\n", + "# df.to_json(\"1K_with_ft_generated_answers.json\", lines=True, orient=\"records\") " ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ From 3088e4dbe76d2670c5ccd812c9f67471475bc266 Mon Sep 17 00:00:00 2001 From: NirantK Date: Mon, 4 Sep 2023 21:29:16 +0530 Subject: [PATCH 05/38] * docs(fine-tuned-RAG): add documentation for few-shot learning with Qdrant to improve RAG model * feat(fine-tuned-RAG): use Qdrant for finetuning and inference changes --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index c8bf9855ab..7b551e9712 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -807,11 +807,18 @@ ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "# Few Shot Learning with Qdrant to Improve RAG\n", + "\n", + "So far, we've been using the OpenAI model to answer questions where the answer is present in the context. But what if we want to answer questions where the answer is not present in the context? This is where few-shot learning comes in. Few-shot learning is a type of transfer learning that allows us to answer questions where the answer is not present in the context. We can do this by providing a few examples of the answer we're looking for, and the model will learn to answer questions where the answer is not present in the context.\n", + "\n", + "## Finetuning and Inference Changes\n", + "We will use the same dataset as before, but this time we will only provide a few examples of the answer we're looking for. We will also provide a few examples of the answer we're not looking for. This will allow the model to learn to handle questions where the answer is not present in the context. \n", + "\n", + "We assumed perfect retrieval in the previous section, but we will not assume perfect retrieval here. Instead, we will use a vector search engine to find similar questions and answers, and then use those to finetuning the model. We will use [Qdrant](https://qdrant.tech/), an open-source vector search engine. We will use Qdrant to find similar questions and answers, and then use those to finetuning the model." + ] } ], "metadata": { From d058e3e3eb342c1ab3fdc6eee9bd71afd8d94138 Mon Sep 17 00:00:00 2001 From: NirantK Date: Mon, 4 Sep 2023 21:37:27 +0530 Subject: [PATCH 06/38] Better labels --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 34 ++++++--------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 7b551e9712..455e6b2c9e 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -293,25 +293,9 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Expected and Right 42.30%\n", - "Expected but IDK 0.00%\n", - "Expected but Wrong 8.00%\n", - "Hallucination 49.70%\n", - "Did not Expect and IDK 0.00%\n", - "Name: count, dtype: object" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "class ConfusionMatrixEvaluator:\n", " def __init__(self, df, answers_column=\"generated_answer\"):\n", @@ -727,7 +711,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -745,12 +729,12 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 8, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -793,8 +777,8 @@ " rects1 = ax.bar(x - width/2, matrix1_values, width, label=label1)\n", " rects2 = ax.bar(x + width/2, matrix2_values, width, label=label2)\n", " \n", - " ax.set_ylabel('Percentage (%)')\n", - " ax.set_title('Overall Error Comparison between {} and {}'.format(label1, label2))\n", + " ax.set_ylabel('Error (%) - Lower is Better')\n", + " ax.set_title('Comparison between {} and {}'.format(label1, label2))\n", " ax.set_xticks(x)\n", " ax.set_xticklabels(labels)\n", " ax.legend()\n", @@ -802,8 +786,10 @@ " fig.tight_layout()\n", " plt.show()\n", "\n", + "matrix_plain = evaluate_matrix(df, \"generated_answer\")\n", + "matrix_ft = evaluate_matrix(df, \"ft_generated_answer\")\n", "# Plot only the overall error\n", - "plot_overall_error(matrix_plain, matrix_ft, \"gpt-3.5-turbo-0613\", \"FineTuned gpt-3.5-turbo-0613\")" + "plot_overall_error(matrix_plain, matrix_ft, \"gpt-3.5-turbo-0613\", \"Fine Tuned\")" ] }, { From 645e56c896a53bc055d8f44d61d2904f56d7b260 Mon Sep 17 00:00:00 2001 From: NirantK Date: Tue, 5 Sep 2023 18:59:08 +0530 Subject: [PATCH 07/38] Add Few Shot prompt creation --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 646 ++++++++++++++++++-- 1 file changed, 587 insertions(+), 59 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 455e6b2c9e..be7032f1fa 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -55,12 +55,9 @@ "\n", "import pandas as pd\n", "import openai\n", - "from sklearn.metrics import confusion_matrix\n", - "from tenacity import retry, stop_after_attempt, wait_exponential\n", + "from tenacity import retry, wait_exponential\n", "from tqdm import tqdm\n", - "import tiktoken\n", "import numpy as np\n", - "from collections import defaultdict\n", "\n", "tqdm.pandas()\n", "openai.api_key = os.getenv(\"OPENAI_API_KEY\")" @@ -222,13 +219,17 @@ "# Function to get prompt messages\n", "def get_prompt(row):\n", " return [\n", - " {'role': 'system', 'content': 'You are a helpful assistant.'},\n", - " {'role': 'user', 'content': f\"\"\"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"\"\"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\n", " Question: {row.question}\\n\\n\n", " Context: {row.context}\\n\\n\n", - " Answer:\\n\"\"\"},\n", + " Answer:\\n\"\"\",\n", + " },\n", " ]\n", "\n", + "\n", "# Function with tenacity for retries\n", "@retry(wait=wait_exponential(multiplier=1, min=2, max=6))\n", "def api_call(messages, model):\n", @@ -240,11 +241,12 @@ " temperature=0.0,\n", " )\n", "\n", + "\n", "# Main function to answer question\n", "def answer_question(row, model=\"gpt-3.5-turbo-0613\"):\n", " messages = get_prompt(row)\n", " response = api_call(messages, model)\n", - " return response['choices'][0]['message']['content']\n" + " return response[\"choices\"][0][\"message\"][\"content\"]" ] }, { @@ -302,29 +304,36 @@ " self.df = df\n", " self.y_pred = []\n", " self.labels = [\n", - " 'Expected and Right', 'Expected but IDK', \n", - " 'Expected but Wrong', 'Hallucination', \n", - " 'Did not Expect and IDK'\n", + " \"Expected and Right\",\n", + " \"Expected but IDK\",\n", + " \"Expected but Wrong\",\n", + " \"Hallucination\",\n", + " \"Did not Expect and IDK\",\n", " ]\n", " self.answers_column = answers_column\n", - " \n", + "\n", " def _evaluate_single_row(self, row):\n", - " is_impossible = row['is_impossible']\n", + " is_impossible = row[\"is_impossible\"]\n", " generated_answer = row[self.answers_column].lower()\n", - " actual_answers = [ans.lower() for ans in row['answers']]\n", - " \n", + " actual_answers = [ans.lower() for ans in row[\"answers\"]]\n", + "\n", " y_pred = (\n", - " 'Expected and Right' if not is_impossible and any(ans in generated_answer for ans in actual_answers) else\n", - " 'Expected but IDK' if not is_impossible and generated_answer == \"i don't know\" else\n", - " 'Expected but Wrong' if not is_impossible and generated_answer not in actual_answers else\n", - " 'Hallucination' if is_impossible and generated_answer != \"i don't know\" else\n", - " 'Did not Expect and IDK'\n", + " \"Expected and Right\"\n", + " if not is_impossible\n", + " and any(ans in generated_answer for ans in actual_answers)\n", + " else \"Expected but IDK\"\n", + " if not is_impossible and generated_answer == \"i don't know\"\n", + " else \"Expected but Wrong\"\n", + " if not is_impossible and generated_answer not in actual_answers\n", + " else \"Hallucination\"\n", + " if is_impossible and generated_answer != \"i don't know\"\n", + " else \"Did not Expect and IDK\"\n", " )\n", " return y_pred\n", - " \n", + "\n", " def evaluate_answers(self):\n", " self.y_pred = self.df.apply(self._evaluate_single_row, axis=1)\n", - " \n", + "\n", " def generate_matrices(self, use_percentages=False):\n", " # Using value_counts to create a Series of frequencies, then reindexing to include missing labels with count 0\n", " freq_series = self.y_pred.value_counts().reindex(self.labels, fill_value=0)\n", @@ -333,6 +342,7 @@ " freq_series = (freq_series / total * 100).apply(\"{0:.2f}%\".format)\n", " return freq_series\n", "\n", + "\n", "evaluator = ConfusionMatrixEvaluator(df, answers_column=\"generated_answer\")\n", "evaluator.evaluate_answers()\n", "error_categories = evaluator.generate_matrices(use_percentages=True)\n", @@ -365,20 +375,24 @@ "source": [ "def dataframe_to_jsonl(df):\n", " def create_jsonl_entry(row):\n", - " answer = row['answers'][0] if row['answers'] else \"I don't know\"\n", + " answer = row[\"answers\"][0] if row[\"answers\"] else \"I don't know\"\n", " messages = [\n", - " {'role': 'system', 'content': 'You are a helpful assistant.'},\n", - " {'role': 'user', 'content': f\"\"\"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"\"\"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\n", " Question: {row.question}\\n\\n\n", " Context: {row.context}\\n\\n\n", - " Answer:\\n\"\"\"},\n", - " {'role': 'assistant', 'content': answer}\n", + " Answer:\\n\"\"\",\n", + " },\n", + " {\"role\": \"assistant\", \"content\": answer},\n", " ]\n", - " return json.dumps({'messages': messages})\n", + " return json.dumps({\"messages\": messages})\n", "\n", " jsonl_output = df.progress_apply(create_jsonl_entry, axis=1)\n", " return \"\\n\".join(jsonl_output)\n", "\n", + "\n", "with open(\"squad-stratified-1000-ft-v2.json\", \"w\") as f:\n", " f.write(dataframe_to_jsonl(df))" ] @@ -575,13 +589,12 @@ ], "source": [ "file_object = openai.File.create(\n", - " file=open(\"squad-stratified-100-ft-v1.json\", \"r\"),\n", - " purpose='fine-tune',\n", - "\n", + " file=open(\"squad-stratified-100-ft-v1.json\", \"r\"),\n", + " purpose=\"fine-tune\",\n", ")\n", "\n", - "while file_object['status'] != 'processed':\n", - " file_object = openai.File.retrieve(file_object['id'])\n", + "while file_object[\"status\"] != \"processed\":\n", + " file_object = openai.File.retrieve(file_object[\"id\"])\n", " time.sleep(5)" ] }, @@ -599,8 +612,10 @@ "metadata": {}, "outputs": [], "source": [ - "ft_job = openai.FineTuningJob.create(training_file=file_object['id'], model=\"gpt-3.5-turbo\", suffix=\"v1\")\n", - "while openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model == None:\n", + "ft_job = openai.FineTuningJob.create(\n", + " training_file=file_object[\"id\"], model=\"gpt-3.5-turbo\", suffix=\"v1\"\n", + ")\n", + "while openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model is None:\n", " time.sleep(10)" ] }, @@ -639,13 +654,16 @@ ], "source": [ "completion = openai.ChatCompletion.create(\n", - " model=model_id,\n", - " messages=[\n", - " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", - " {\"role\": \"user\", \"content\": \"Hello!\"},\n", - " {\"role\": \"assistant\", \"content\": \"Hi, how can I help you today?\"},\n", - " {\"role\": \"user\", \"content\": \"Can you answer the following question based on the given context? If not, say, I don't know:\\n\\nQuestion: What is the capital of France?\\n\\nContext: The capital of Mars is Gaia. Answer:\"},\n", - " ]\n", + " model=model_id,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\"role\": \"user\", \"content\": \"Hello!\"},\n", + " {\"role\": \"assistant\", \"content\": \"Hi, how can I help you today?\"},\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Can you answer the following question based on the given context? If not, say, I don't know:\\n\\nQuestion: What is the capital of France?\\n\\nContext: The capital of Mars is Gaia. Answer:\",\n", + " },\n", + " ],\n", ")\n", "\n", "print(completion.choices[0].message)" @@ -701,11 +719,15 @@ ], "source": [ "# Initialize the evaluator\n", - "finetuned_model_evaluator = ConfusionMatrixEvaluator(df, answers_column=\"ft_generated_answer\")\n", + "finetuned_model_evaluator = ConfusionMatrixEvaluator(\n", + " df, answers_column=\"ft_generated_answer\"\n", + ")\n", "\n", "# Run the evaluation\n", "finetuned_model_evaluator.evaluate_answers()\n", - "finetuned_model_error_categories = finetuned_model_evaluator.generate_matrices(use_percentages=True)\n", + "finetuned_model_error_categories = finetuned_model_evaluator.generate_matrices(\n", + " use_percentages=True\n", + ")\n", "finetuned_model_error_categories" ] }, @@ -716,7 +738,7 @@ "outputs": [], "source": [ "# Optionally, save the results to a JSON file\n", - "# df.to_json(\"1K_with_ft_generated_answers.json\", lines=True, orient=\"records\") " + "# df.to_json(\"1K_with_ft_generated_answers.json\", lines=True, orient=\"records\")" ] }, { @@ -748,6 +770,7 @@ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "\n", + "\n", "def evaluate_matrix(df, answers_column):\n", " \"\"\"\n", " Evaluate the confusion matrix for a given DataFrame and answer column.\n", @@ -757,35 +780,41 @@ " matrix = evaluator.generate_matrices(use_percentages=True)\n", " return matrix\n", "\n", + "\n", "def plot_overall_error(matrix1, matrix2, label1, label2):\n", " \"\"\"\n", " Plot a bar chart showing only the overall error between two confusion matrices.\n", " \"\"\"\n", " # Calculate overall error\n", - " error_categories = ['Expected but Wrong', 'Hallucination']\n", - " matrix1_error = sum([float(matrix1.loc[cat].replace('%', '')) for cat in error_categories])\n", - " matrix2_error = sum([float(matrix2.loc[cat].replace('%', '')) for cat in error_categories])\n", - " \n", - " labels = ['Overall Error']\n", + " error_categories = [\"Expected but Wrong\", \"Hallucination\"]\n", + " matrix1_error = sum(\n", + " [float(matrix1.loc[cat].replace(\"%\", \"\")) for cat in error_categories]\n", + " )\n", + " matrix2_error = sum(\n", + " [float(matrix2.loc[cat].replace(\"%\", \"\")) for cat in error_categories]\n", + " )\n", + "\n", + " labels = [\"Overall Error\"]\n", " matrix1_values = [matrix1_error]\n", " matrix2_values = [matrix2_error]\n", - " \n", + "\n", " x = np.arange(len(labels))\n", " width = 0.35\n", - " \n", + "\n", " fig, ax = plt.subplots()\n", - " rects1 = ax.bar(x - width/2, matrix1_values, width, label=label1)\n", - " rects2 = ax.bar(x + width/2, matrix2_values, width, label=label2)\n", - " \n", - " ax.set_ylabel('Error (%) - Lower is Better')\n", - " ax.set_title('Comparison between {} and {}'.format(label1, label2))\n", + " ax.bar(x - width / 2, matrix1_values, width, label=label1)\n", + " ax.bar(x + width / 2, matrix2_values, width, label=label2)\n", + "\n", + " ax.set_ylabel(\"Error (%) - Lower is Better\")\n", + " ax.set_title(\"Comparison between {} and {}\".format(label1, label2))\n", " ax.set_xticks(x)\n", " ax.set_xticklabels(labels)\n", " ax.legend()\n", - " \n", + "\n", " fig.tight_layout()\n", " plt.show()\n", "\n", + "\n", "matrix_plain = evaluate_matrix(df, \"generated_answer\")\n", "matrix_ft = evaluate_matrix(df, \"ft_generated_answer\")\n", "# Plot only the overall error\n", @@ -793,12 +822,511 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Few Shot Learning with Qdrant to Improve RAG\n", "\n", - "So far, we've been using the OpenAI model to answer questions where the answer is present in the context. But what if we want to answer questions where the answer is not present in the context? This is where few-shot learning comes in. Few-shot learning is a type of transfer learning that allows us to answer questions where the answer is not present in the context. We can do this by providing a few examples of the answer we're looking for, and the model will learn to answer questions where the answer is not present in the context.\n", + "So far, we've been using the OpenAI model to answer questions where the answer is present in the context. But what if we want to answer questions where the answer is not present in the context? This is where few-shot learning comes in. Few-shot learning is a type of transfer learning that allows us to answer questions where the answer is not present in the context. We can do this by providing a few examples of the answer we're looking for, and the model will learn to answer questions where the answer is not present in the context." + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Few Shot Prompting\n", + "\n", + "We'll select a few examples from the dataset, including cases where the answer is not present in the context. We'll then use these examples to create a prompt that we can use to fine-tune the model.\n", + "\n", + "We'll measure the baseline on our previous 1K dataset, and then we'll fine-tune the model on the new dataset. We'll then measure the performance of the fine-tuned model on the same 1K dataset.\n", + "\n", + "### Get the Training Data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O train.json" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlequestioncontextis_impossibleanswers
0BeyoncéWhen did Beyonce start becoming popular?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[in the late 1990s]
1BeyoncéWhat areas did Beyonce compete in when she was...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[singing and dancing]
2BeyoncéWhen did Beyonce leave Destiny's Child and bec...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[2003]
3BeyoncéIn what city and state did Beyonce grow up?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[Houston, Texas]
4BeyoncéIn which decade did Beyonce become famous?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[late 1990s]
\n", + "
" + ], + "text/plain": [ + " title question \\\n", + "0 Beyoncé When did Beyonce start becoming popular? \n", + "1 Beyoncé What areas did Beyonce compete in when she was... \n", + "2 Beyoncé When did Beyonce leave Destiny's Child and bec... \n", + "3 Beyoncé In what city and state did Beyonce grow up? \n", + "4 Beyoncé In which decade did Beyonce become famous? \n", + "\n", + " context is_impossible \\\n", + "0 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", + "1 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", + "2 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", + "3 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", + "4 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", + "\n", + " answers \n", + "0 [in the late 1990s] \n", + "1 [singing and dancing] \n", + "2 [2003] \n", + "3 [Houston, Texas] \n", + "4 [late 1990s] " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import json\n", + "\n", + "\n", + "def json_to_dataframe_with_titles(json_data):\n", + " qas = []\n", + " context = []\n", + " is_impossible = []\n", + " answers = []\n", + " titles = []\n", + "\n", + " for article in json_data[\"data\"]:\n", + " title = article[\"title\"]\n", + " for paragraph in article[\"paragraphs\"]:\n", + " for qa in paragraph[\"qas\"]:\n", + " qas.append(qa[\"question\"].strip())\n", + " context.append(paragraph[\"context\"])\n", + " is_impossible.append(qa[\"is_impossible\"])\n", + "\n", + " ans_list = []\n", + " for ans in qa[\"answers\"]:\n", + " ans_list.append(ans[\"text\"])\n", + " answers.append(ans_list)\n", + " titles.append(title)\n", + "\n", + " df = pd.DataFrame(\n", + " {\n", + " \"title\": titles,\n", + " \"question\": qas,\n", + " \"context\": context,\n", + " \"is_impossible\": is_impossible,\n", + " \"answers\": answers,\n", + " }\n", + " )\n", + " return df\n", + "\n", + "\n", + "train_df = json_to_dataframe_with_titles(json.load(open(\"train.json\", \"r\")))\n", + "train_df.head()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Embed the Training Data" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from qdrant_client import QdrantClient\n", + "from qdrant_client.http import models\n", + "from qdrant_client.http.models import PointStruct\n", + "from qdrant_client.http.models import Distance, VectorParams" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "qdrant_client = QdrantClient(\n", + " url=os.getenv(\"QDRANT_URL\"), api_key=os.getenv(\"QDRANT_API_KEY\")\n", + ")\n", + "\n", + "collection_name = \"cookbook\" # An arbitrary name for the collection\n", + "\n", + "\n", + "# Create the collection\n", + "qdrant_client.recreate_collection(\n", + " collection_name=collection_name,\n", + " vectors_config=VectorParams(size=384, distance=Distance.COSINE),\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: fastembed in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (0.0.4)\n", + "Requirement already satisfied: onnxruntime<2.0.0,>=1.15.1 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (1.15.1)\n", + "Requirement already satisfied: onnxruntime-silicon<2.0.0,>=1.15.0 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (1.15.0)\n", + "Requirement already satisfied: requests<3.0.0,>=2.31.0 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.14.0,>=0.13.3 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (0.13.3)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.65.0 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (4.65.0)\n", + "Requirement already satisfied: coloredlogs in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (15.0.1)\n", + "Requirement already satisfied: flatbuffers in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (23.5.26)\n", + "Requirement already satisfied: numpy>=1.21.6 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (1.24.4)\n", + "Requirement already satisfied: packaging in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (23.1)\n", + "Requirement already satisfied: protobuf in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (4.23.4)\n", + "Requirement already satisfied: sympy in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (1.12)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from requests<3.0.0,>=2.31.0->fastembed) (3.2.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from requests<3.0.0,>=2.31.0->fastembed) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from requests<3.0.0,>=2.31.0->fastembed) (1.26.16)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from requests<3.0.0,>=2.31.0->fastembed) (2023.5.7)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from coloredlogs->onnxruntime<2.0.0,>=1.15.1->fastembed) (10.0)\n", + "Requirement already satisfied: mpmath>=0.19 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from sympy->onnxruntime<2.0.0,>=1.15.1->fastembed) (1.3.0)\n" + ] + } + ], + "source": [ + "# !pip install fastembed" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "from fastembed.embedding import DefaultEmbedding\n", + "from typing import List\n", + "import numpy as np\n", + "import pandas as pd\n", + "from tqdm.notebook import tqdm\n", + "\n", + "tqdm.pandas()\n", + "\n", + "embedding_model = DefaultEmbedding()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b626be2da7bb4b659b37ab54f094d0f1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Generating embeddings: 0%| | 0/1000 [00:00 List[PointStruct]:\n", + " questions = df[\"question\"].tolist()\n", + " pbar = tqdm(total=len(questions), desc=\"Generating embeddings\")\n", + "\n", + " # Generate embeddings for each question and update tqdm progress bar\n", + " embeddings = []\n", + " for question in questions:\n", + " embedding = list(embedding_model.embed([question]))[0]\n", + " embeddings.append(embedding)\n", + " pbar.update(1)\n", + " pbar.close()\n", + "\n", + " # Convert embeddings to list of lists\n", + " embeddings_list = [embedding.tolist() for embedding in embeddings]\n", + "\n", + " # Create a temporary DataFrame to hold the embeddings and existing DataFrame columns\n", + " temp_df = df.copy()\n", + " temp_df[\"embeddings\"] = embeddings_list\n", + " temp_df[\"id\"] = temp_df.index\n", + "\n", + " # Generate PointStruct objects using DataFrame apply method\n", + " points = temp_df.progress_apply(\n", + " lambda row: PointStruct(\n", + " id=row[\"id\"],\n", + " vector=row[\"embeddings\"],\n", + " payload={\n", + " \"question\": row[\"question\"],\n", + " \"title\": row[\"title\"],\n", + " \"context\": row[\"context\"],\n", + " \"is_impossible\": row[\"is_impossible\"],\n", + " \"answers\": row[\"answers\"],\n", + " },\n", + " ),\n", + " axis=1,\n", + " ).tolist()\n", + "\n", + " return points\n", + "\n", + "\n", + "small_df = train_df.sample(1000, random_state=37)\n", + "points = generate_points_from_dataframe(small_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "operation_id=0 status=\n" + ] + } + ], + "source": [ + "operation_info = qdrant_client.upsert(\n", + " collection_name=collection_name, wait=True, points=points\n", + ")\n", + "print(operation_info)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_json(\"1K_with_ft_generated_answers.json\", lines=True, orient=\"records\")" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "70cad3cd68264543a5b90e0a4142f006", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/1000 [00:00 0 else \"I don't know\"\n", + " return [\n", + " {\n", + " \"role\": \"user\", \n", + " \"content\": f\"\"\"Question: {question}\\n\\nContext: {context}\\n\\nAnswer:\"\"\"\n", + " },\n", + " {\"role\": \"assistant\", \"content\": answer},\n", + " ]\n", + "\n", + " rag_prompt = []\n", + " if len(q1) >= 1:\n", + " rag_prompt += q_to_prompt(q1[0])\n", + " # If the next best question is not the same as the question, add it to the prompt\n", + " if len(q2) >= 1 and (q2[0].payload[\"question\"] != q1[0].payload[\"question\"]):\n", + " rag_prompt += q_to_prompt(q2[0])\n", + "\n", + " rag_prompt += [\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": f\"\"\"Question: {query}\\n\\nContext: {row_context}\\n\\nAnswer:\"\"\"\n", + " },\n", + " ]\n", + "\n", + " rag_prompt = [{\"role\": \"system\", \"content\": instruction}] + rag_prompt\n", + " return rag_prompt\n", + "\n", + "df[\"few_shot_prompt_1K\"] = df.progress_apply(get_few_shot_prompt, axis=1)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", "\n", "## Finetuning and Inference Changes\n", "We will use the same dataset as before, but this time we will only provide a few examples of the answer we're looking for. We will also provide a few examples of the answer we're not looking for. This will allow the model to learn to handle questions where the answer is not present in the context. \n", From 147d7700a7c47039a3e15a66425a2c709cd2b664 Mon Sep 17 00:00:00 2001 From: NirantK Date: Wed, 6 Sep 2023 21:18:20 +0530 Subject: [PATCH 08/38] * chore(DatasetPrep.ipynb): remove DatasetPrep.ipynb file --- examples/fine-tuned-RAG/DatasetPrep.ipynb | 213 ---------------------- 1 file changed, 213 deletions(-) delete mode 100644 examples/fine-tuned-RAG/DatasetPrep.ipynb diff --git a/examples/fine-tuned-RAG/DatasetPrep.ipynb b/examples/fine-tuned-RAG/DatasetPrep.ipynb deleted file mode 100644 index d24a14f572..0000000000 --- a/examples/fine-tuned-RAG/DatasetPrep.ipynb +++ /dev/null @@ -1,213 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Validation" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-09-04 16:34:26-- https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\n", - "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n", - "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 4370528 (4.2M) [application/json]\n", - "Saving to: ‘validation.json’\n", - "\n", - "validation.json 100%[===================>] 4.17M 5.66MB/s in 0.7s \n", - "\n", - "2023-09-04 16:34:28 (5.66 MB/s) - ‘validation.json’ saved [4370528/4370528]\n", - "\n" - ] - } - ], - "source": [ - "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O validation.json" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install pandas qdrant-client openai --quiet" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import json\n", - "\n", - "def json_to_dataframe_with_titles(json_data):\n", - " qas = []\n", - " context = []\n", - " is_impossible = []\n", - " answers = []\n", - " titles = []\n", - "\n", - " for article in json_data['data']:\n", - " title = article['title']\n", - " for paragraph in article['paragraphs']:\n", - " for qa in paragraph['qas']:\n", - " qas.append(qa['question'].strip())\n", - " context.append(paragraph['context'])\n", - " is_impossible.append(qa['is_impossible'])\n", - " \n", - " ans_list = []\n", - " for ans in qa['answers']:\n", - " ans_list.append(ans['text'])\n", - " answers.append(ans_list)\n", - " titles.append(title)\n", - "\n", - " df = pd.DataFrame({'title': titles, 'question': qas, 'context': context, 'is_impossible': is_impossible, 'answers': answers})\n", - " return df\n", - "\n", - "def get_diverse_sample(df, sample_size=100, random_state=42):\n", - " sample_df = df.groupby(['title', 'is_impossible']).apply(lambda x: x.sample(min(len(x), max(1, sample_size // 50)), random_state=random_state)).reset_index(drop=True)\n", - " \n", - " if len(sample_df) < sample_size:\n", - " remaining_sample_size = sample_size - len(sample_df)\n", - " remaining_df = df.drop(sample_df.index).sample(remaining_sample_size, random_state=random_state)\n", - " sample_df = pd.concat([sample_df, remaining_df]).sample(frac=1, random_state=random_state).reset_index(drop=True)\n", - "\n", - " return sample_df.sample(min(sample_size, len(sample_df)), random_state=random_state).reset_index(drop=True)\n", - "\n", - "validation = json.load(open('validation.json', 'r'))\n", - "validation_df = json_to_dataframe_with_titles(validation)\n", - "df = get_diverse_sample(validation_df, sample_size=1000, random_state=37)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "is_impossible\n", - "False 53\n", - "True 47\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.value_counts('is_impossible')" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "title\n", - "Southern_California 33\n", - "Geology 33\n", - "Jacksonville,_Florida 32\n", - "University_of_Chicago 32\n", - "Imperialism 31\n", - "1973_oil_crisis 31\n", - "Fresno,_California 31\n", - "Harvard_University 31\n", - "Scottish_Parliament 31\n", - "Sky_(United_Kingdom) 31\n", - "Normans 30\n", - "Packet_switching 30\n", - "Ctenophora 30\n", - "Rhine 30\n", - "Pharmacy 30\n", - "Intergovernmental_Panel_on_Climate_Change 30\n", - "Warsaw 29\n", - "Steam_engine 29\n", - "Victoria_(Australia) 29\n", - "Computational_complexity_theory 29\n", - "Amazon_rainforest 29\n", - "Civil_disobedience 29\n", - "European_Union_law 29\n", - "Huguenot 28\n", - "Construction 28\n", - "Private_school 28\n", - "Black_Death 26\n", - "Yuan_dynasty 26\n", - "Islamism 25\n", - "Oxygen 24\n", - "Prime_number 24\n", - "Immune_system 24\n", - "Force 23\n", - "Economic_inequality 23\n", - "French_and_Indian_War 22\n", - "Name: count, dtype: int64" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.title.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ - "# write to csv\n", - "df.to_json('v2_1K_Seed=37_sample.json', orient='records', lines=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "fst", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.17" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} From 03d6c6a74a52f52198da5a7a83e7c3c6d8a4dd8d Mon Sep 17 00:00:00 2001 From: NirantK Date: Wed, 6 Sep 2023 21:28:53 +0530 Subject: [PATCH 09/38] Simplify datasets --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 933 +++++++++++--------- 1 file changed, 500 insertions(+), 433 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index be7032f1fa..b982bf3934 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -40,12 +40,12 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install pandas openai tqdm tenacity pandarallel scikit-learn tiktoken" + "# !pip install pandas openai tqdm tenacity pandarallel scikit-learn tiktoken python-dotenv" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -55,148 +55,120 @@ "\n", "import pandas as pd\n", "import openai\n", + "import tiktoken\n", "from tenacity import retry, wait_exponential\n", "from tqdm import tqdm\n", + "from collections import defaultdict\n", "import numpy as np\n", "\n", - "tqdm.pandas()\n", - "openai.api_key = os.getenv(\"OPENAI_API_KEY\")" + "tqdm.pandas()" ] }, { - "attachments": {}, - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 49, "metadata": {}, + "outputs": [], "source": [ - "## Loading the Data\n", - "\n", - "Load your data and take a quick look at the first few rows. Notice that we've included a few samples where the answer is not present in the context." + "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" ] }, { - "cell_type": "code", - "execution_count": 3, + "attachments": {}, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "df = pd.read_json(\"v2_1K_Seed=37_sample.json\", lines=True)" + "## Preparing the Data\n", + "\n", + "For the purpose of demonstration, we'll make small slices from the train and validation splits of the [SQuADv2](https://rajpurkar.github.io/SQuAD-explorer/) dataset. This dataset has questions and contexts where the answer is not present in the context, to help us evaluate how LLM handles this case.\n", + "\n", + "### Download the Data" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titlequestioncontextis_impossibleanswers
0Southern_CaliforniaWhat is the United State's second-busiest comm...Southern California is also home to the Port o...True[]
1Jacksonville,_FloridaWho did Jacksonville support with supplies dur...During the American Civil War, Jacksonville wa...True[]
2Imperialismcolonial rule would not be considered what typ...Imperialism is defined as \"A policy of extendi...True[]
3Intergovernmental_Panel_on_Climate_ChangeWhere are dissenting opinions published?Michael Oppenheimer, a long-time participant i...True[]
4Southern_CaliforniaHow many people does the Greater Los Angeles A...Southern California includes the heavily built...False[17.5 million, over 17.5 million, 17.5 million]
\n", - "
" - ], - "text/plain": [ - " title \\\n", - "0 Southern_California \n", - "1 Jacksonville,_Florida \n", - "2 Imperialism \n", - "3 Intergovernmental_Panel_on_Climate_Change \n", - "4 Southern_California \n", - "\n", - " question \\\n", - "0 What is the United State's second-busiest comm... \n", - "1 Who did Jacksonville support with supplies dur... \n", - "2 colonial rule would not be considered what typ... \n", - "3 Where are dissenting opinions published? \n", - "4 How many people does the Greater Los Angeles A... \n", - "\n", - " context is_impossible \\\n", - "0 Southern California is also home to the Port o... True \n", - "1 During the American Civil War, Jacksonville wa... True \n", - "2 Imperialism is defined as \"A policy of extendi... True \n", - "3 Michael Oppenheimer, a long-time participant i... True \n", - "4 Southern California includes the heavily built... False \n", - "\n", - " answers \n", - "0 [] \n", - "1 [] \n", - "2 [] \n", - "3 [] \n", - "4 [17.5 million, over 17.5 million, 17.5 million] " - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-06 19:39:59-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.111.153, 185.199.108.153, 185.199.110.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.111.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 42123633 (40M) [application/json]\n", + "Saving to: ‘local_cache/train.json’\n", + "\n", + "local_cache/train.j 100%[===================>] 40.17M 35.9MB/s in 1.1s \n", + "\n", + "2023-09-06 19:40:03 (35.9 MB/s) - ‘local_cache/train.json’ saved [42123633/42123633]\n", + "\n", + "--2023-09-06 19:40:03-- https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.111.153, 185.199.110.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 4370528 (4.2M) [application/json]\n", + "Saving to: ‘local_cache/dev.json’\n", + "\n", + "local_cache/dev.jso 100%[===================>] 4.17M 13.7MB/s in 0.3s \n", + "\n", + "2023-09-06 19:40:04 (13.7 MB/s) - ‘local_cache/dev.json’ saved [4370528/4370528]\n", + "\n" + ] } ], "source": [ - "df.head()" + "!mkdir -p local_cache\n", + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O local_cache/train.json\n", + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O local_cache/dev.json" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def json_to_dataframe_with_titles(json_data):\n", + " qas = []\n", + " context = []\n", + " is_impossible = []\n", + " answers = []\n", + " titles = []\n", + "\n", + " for article in json_data['data']:\n", + " title = article['title']\n", + " for paragraph in article['paragraphs']:\n", + " for qa in paragraph['qas']:\n", + " qas.append(qa['question'].strip())\n", + " context.append(paragraph['context'])\n", + " is_impossible.append(qa['is_impossible'])\n", + " \n", + " ans_list = []\n", + " for ans in qa['answers']:\n", + " ans_list.append(ans['text'])\n", + " answers.append(ans_list)\n", + " titles.append(title)\n", + "\n", + " df = pd.DataFrame({'title': titles, 'question': qas, 'context': context, 'is_impossible': is_impossible, 'answers': answers})\n", + " return df\n", + "\n", + "def get_diverse_sample(df, sample_size=100, random_state=42):\n", + " sample_df = df.groupby(['title', 'is_impossible']).apply(lambda x: x.sample(min(len(x), max(1, sample_size // 50)), random_state=random_state)).reset_index(drop=True)\n", + " \n", + " if len(sample_df) < sample_size:\n", + " remaining_sample_size = sample_size - len(sample_df)\n", + " remaining_df = df.drop(sample_df.index).sample(remaining_sample_size, random_state=random_state)\n", + " sample_df = pd.concat([sample_df, remaining_df]).sample(frac=1, random_state=random_state).reset_index(drop=True)\n", + "\n", + " return sample_df.sample(min(sample_size, len(sample_df)), random_state=random_state).reset_index(drop=True)\n", + "\n", + "train_df = json_to_dataframe_with_titles(json.load(open('local_cache/train.json')))\n", + "val_df = json_to_dataframe_with_titles(json.load(open('local_cache/dev.json')))\n", + "\n", + "df = get_diverse_sample(val_df, sample_size=100, random_state=42)" ] }, { @@ -212,7 +184,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -259,9 +231,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 100/100 [02:53<00:00, 1.73s/it]\n" + ] + } + ], "source": [ "# Use progress_apply with tqdm for progress bar\n", "df[\"generated_answer\"] = df.progress_apply(answer_question, axis=1)" @@ -269,12 +249,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "# Optionally, save the results to a JSON file\n", - "# df.to_json(\"1K_with_generated_answers.json\", lines=True, orient=\"records\") # Save to JSON" + "df.to_json(\"local_cache/100_val.json\", orient=\"records\", lines=True)" ] }, { @@ -295,9 +274,25 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Expected and Right 43.00%\n", + "Expected but IDK 0.00%\n", + "Expected but Wrong 10.00%\n", + "Hallucination 47.00%\n", + "Did not Expect and IDK 0.00%\n", + "Name: count, dtype: object" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "class ConfusionMatrixEvaluator:\n", " def __init__(self, df, answers_column=\"generated_answer\"):\n", @@ -356,19 +351,21 @@ "source": [ "## Fine-Tuning\n", "\n", - "### Prepare the Fine-Tuning Data" + "### Prepare the Fine-Tuning Data\n", + "\n", + "We need to prepare the data for fine-tuning. We'll use a few samples from train split of same dataset as before, but we'll add the answer to the context. This will help the model learn to retrieve the answer from the context." ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1000/1000 [00:00<00:00, 59898.09it/s]\n" + "100%|██████████| 100/100 [00:00<00:00, 65484.84it/s]\n" ] } ], @@ -392,9 +389,10 @@ " jsonl_output = df.progress_apply(create_jsonl_entry, axis=1)\n", " return \"\\n\".join(jsonl_output)\n", "\n", + "train_sample = get_diverse_sample(train_df, sample_size=100, random_state=42)\n", "\n", - "with open(\"squad-stratified-1000-ft-v2.json\", \"w\") as f:\n", - " f.write(dataframe_to_jsonl(df))" + "with open(\"local_cache/100_train.jsonl\", \"w\") as f:\n", + " f.write(dataframe_to_jsonl(train_sample))" ] }, { @@ -409,149 +407,180 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num examples: 100\n", + "First example:\n", + "{'role': 'system', 'content': 'You are a helpful assistant.'}\n", + "{'role': 'user', 'content': \"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\\n Question: What is a cirque?\\n\\n\\n Context: Glaciers form where the accumulation of snow and ice exceeds ablation. The area in which a glacier forms is called a cirque (corrie or cwm) - a typically armchair-shaped geological feature (such as a depression between mountains enclosed by arêtes) - which collects and compresses through gravity the snow which falls into it. This snow collects and is compacted by the weight of the snow falling above it forming névé. Further crushing of the individual snowflakes and squeezing the air from the snow turns it into 'glacial ice'. This glacial ice will fill the cirque until it 'overflows' through a geological weakness or vacancy, such as the gap between two mountains. When the mass of snow and ice is sufficiently thick, it begins to move due to a combination of surface slope, gravity and pressure. On steeper slopes, this can occur with as little as 15 m (50 ft) of snow-ice.\\n\\n\\n Answer:\\n\"}\n", + "{'role': 'assistant', 'content': 'The area in which a glacier forms'}\n", + "No errors found\n", + "Num examples missing system message: 0\n", + "Num examples missing user message: 0\n", + "\n", + "#### Distribution of num_messages_per_example:\n", + "min / max: 3, 3\n", + "mean / median: 3.0, 3.0\n", + "p5 / p95: 3.0, 3.0\n", + "\n", + "#### Distribution of num_total_tokens_per_example:\n", + "min / max: 114, 689\n", + "mean / median: 236.88, 217.0\n", + "p5 / p95: 166.7, 321.3\n", + "\n", + "#### Distribution of num_assistant_tokens_per_example:\n", + "min / max: 1, 13\n", + "mean / median: 3.81, 4.0\n", + "p5 / p95: 1.0, 5.0\n", + "\n", + "0 examples may be over the 4096 token limit, they will be truncated during fine-tuning\n", + "Dataset has ~23688 tokens that will be charged for during training\n", + "By default, you'll train for 3 epochs on this dataset\n", + "By default, you'll be charged for ~71064 tokens\n", + "See pricing page to estimate total costs\n" + ] + } + ], "source": [ - "# # We start by importing the required packages\n", - "\n", - "# import json\n", - "# import os\n", - "\n", - "# # Next, we specify the data path and open the JSONL file\n", - "\n", - "# data_path = \"squad-stratified-100-ft-v1.json\"\n", - "\n", - "# # Load dataset\n", - "# with open(data_path) as f:\n", - "# dataset = [json.loads(line) for line in f]\n", - "\n", - "# # We can inspect the data quickly by checking the number of examples and the first item\n", - "\n", - "# # Initial dataset stats\n", - "# print(\"Num examples:\", len(dataset))\n", - "# print(\"First example:\")\n", - "# for message in dataset[0][\"messages\"]:\n", - "# print(message)\n", - "\n", - "# # Now that we have a sense of the data, we need to go through all the different examples and check to make sure the formatting is correct and matches the Chat completions message structure\n", - "\n", - "# # Format error checks\n", - "# format_errors = defaultdict(int)\n", - "\n", - "# for ex in dataset:\n", - "# if not isinstance(ex, dict):\n", - "# format_errors[\"data_type\"] += 1\n", - "# continue\n", - "\n", - "# messages = ex.get(\"messages\", None)\n", - "# if not messages:\n", - "# format_errors[\"missing_messages_list\"] += 1\n", - "# continue\n", - "\n", - "# for message in messages:\n", - "# if \"role\" not in message or \"content\" not in message:\n", - "# format_errors[\"message_missing_key\"] += 1\n", - "\n", - "# if any(k not in (\"role\", \"content\", \"name\") for k in message):\n", - "# format_errors[\"message_unrecognized_key\"] += 1\n", - "\n", - "# if message.get(\"role\", None) not in (\"system\", \"user\", \"assistant\"):\n", - "# format_errors[\"unrecognized_role\"] += 1\n", - "\n", - "# content = message.get(\"content\", None)\n", - "# if not content or not isinstance(content, str):\n", - "# format_errors[\"missing_content\"] += 1\n", - "\n", - "# if not any(message.get(\"role\", None) == \"assistant\" for message in messages):\n", - "# format_errors[\"example_missing_assistant_message\"] += 1\n", - "\n", - "# if format_errors:\n", - "# print(\"Found errors:\")\n", - "# for k, v in format_errors.items():\n", - "# print(f\"{k}: {v}\")\n", - "# else:\n", - "# print(\"No errors found\")\n", - "\n", - "# # Beyond the structure of the message, we also need to ensure that the length does not exceed the 4096 token limit.\n", - "\n", - "# # Token counting functions\n", - "# encoding = tiktoken.get_encoding(\"cl100k_base\")\n", - "\n", - "# # not exact!\n", - "# # simplified from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb\n", - "# def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):\n", - "# num_tokens = 0\n", - "# for message in messages:\n", - "# num_tokens += tokens_per_message\n", - "# for key, value in message.items():\n", - "# num_tokens += len(encoding.encode(value))\n", - "# if key == \"name\":\n", - "# num_tokens += tokens_per_name\n", - "# num_tokens += 3\n", - "# return num_tokens\n", - "\n", - "# def num_assistant_tokens_from_messages(messages):\n", - "# num_tokens = 0\n", - "# for message in messages:\n", - "# if message[\"role\"] == \"assistant\":\n", - "# num_tokens += len(encoding.encode(message[\"content\"]))\n", - "# return num_tokens\n", - "\n", - "# def print_distribution(values, name):\n", - "# print(f\"\\n#### Distribution of {name}:\")\n", - "# print(f\"min / max: {min(values)}, {max(values)}\")\n", - "# print(f\"mean / median: {np.mean(values)}, {np.median(values)}\")\n", - "# print(f\"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}\")\n", - "\n", - "# # Last, we can look at the results of the different formatting operations before proceeding with creating a fine-tuning job:\n", - "\n", - "# # Warnings and tokens counts\n", - "# n_missing_system = 0\n", - "# n_missing_user = 0\n", - "# n_messages = []\n", - "# convo_lens = []\n", - "# assistant_message_lens = []\n", - "\n", - "# for ex in dataset:\n", - "# messages = ex[\"messages\"]\n", - "# if not any(message[\"role\"] == \"system\" for message in messages):\n", - "# n_missing_system += 1\n", - "# if not any(message[\"role\"] == \"user\" for message in messages):\n", - "# n_missing_user += 1\n", - "# n_messages.append(len(messages))\n", - "# convo_lens.append(num_tokens_from_messages(messages))\n", - "# assistant_message_lens.append(num_assistant_tokens_from_messages(messages))\n", - "\n", - "# print(\"Num examples missing system message:\", n_missing_system)\n", - "# print(\"Num examples missing user message:\", n_missing_user)\n", - "# print_distribution(n_messages, \"num_messages_per_example\")\n", - "# print_distribution(convo_lens, \"num_total_tokens_per_example\")\n", - "# print_distribution(assistant_message_lens, \"num_assistant_tokens_per_example\")\n", - "# n_too_long = sum(l > 4096 for l in convo_lens)\n", - "# print(f\"\\n{n_too_long} examples may be over the 4096 token limit, they will be truncated during fine-tuning\")\n", - "\n", - "# # Pricing and default n_epochs estimate\n", - "# MAX_TOKENS_PER_EXAMPLE = 4096\n", - "\n", - "# MIN_TARGET_EXAMPLES = 100\n", - "# MAX_TARGET_EXAMPLES = 25000\n", - "# TARGET_EPOCHS = 3\n", - "# MIN_EPOCHS = 1\n", - "# MAX_EPOCHS = 25\n", - "\n", - "# n_epochs = TARGET_EPOCHS\n", - "# n_train_examples = len(dataset)\n", - "# if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:\n", - "# n_epochs = min(MAX_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)\n", - "# elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:\n", - "# n_epochs = max(MIN_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)\n", - "\n", - "# n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)\n", - "# print(f\"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training\")\n", - "# print(f\"By default, you'll train for {n_epochs} epochs on this dataset\")\n", - "# print(f\"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens\")\n", - "# print(\"See pricing page to estimate total costs\")" + "# Specify the data path and open the JSONL file\n", + "\n", + "data_path = \"local_cache/100_train.jsonl\"\n", + "\n", + "# Load dataset\n", + "with open(data_path) as f:\n", + " dataset = [json.loads(line) for line in f]\n", + "\n", + "# We can inspect the data quickly by checking the number of examples and the first item\n", + "\n", + "# Initial dataset stats\n", + "print(\"Num examples:\", len(dataset))\n", + "print(\"First example:\")\n", + "for message in dataset[0][\"messages\"]:\n", + " print(message)\n", + "\n", + "# Now that we have a sense of the data, we need to go through all the different examples and check to make sure the formatting is correct and matches the Chat completions message structure\n", + "\n", + "# Format error checks\n", + "format_errors = defaultdict(int)\n", + "\n", + "for ex in dataset:\n", + " if not isinstance(ex, dict):\n", + " format_errors[\"data_type\"] += 1\n", + " continue\n", + "\n", + " messages = ex.get(\"messages\", None)\n", + " if not messages:\n", + " format_errors[\"missing_messages_list\"] += 1\n", + " continue\n", + "\n", + " for message in messages:\n", + " if \"role\" not in message or \"content\" not in message:\n", + " format_errors[\"message_missing_key\"] += 1\n", + "\n", + " if any(k not in (\"role\", \"content\", \"name\") for k in message):\n", + " format_errors[\"message_unrecognized_key\"] += 1\n", + "\n", + " if message.get(\"role\", None) not in (\"system\", \"user\", \"assistant\"):\n", + " format_errors[\"unrecognized_role\"] += 1\n", + "\n", + " content = message.get(\"content\", None)\n", + " if not content or not isinstance(content, str):\n", + " format_errors[\"missing_content\"] += 1\n", + "\n", + " if not any(message.get(\"role\", None) == \"assistant\" for message in messages):\n", + " format_errors[\"example_missing_assistant_message\"] += 1\n", + "\n", + "if format_errors:\n", + " print(\"Found errors:\")\n", + " for k, v in format_errors.items():\n", + " print(f\"{k}: {v}\")\n", + "else:\n", + " print(\"No errors found\")\n", + "\n", + "# Beyond the structure of the message, we also need to ensure that the length does not exceed the 4096 token limit.\n", + "\n", + "# Token counting functions\n", + "encoding = tiktoken.get_encoding(\"cl100k_base\")\n", + "\n", + "# not exact!\n", + "# simplified from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb\n", + "def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):\n", + " num_tokens = 0\n", + " for message in messages:\n", + " num_tokens += tokens_per_message\n", + " for key, value in message.items():\n", + " num_tokens += len(encoding.encode(value))\n", + " if key == \"name\":\n", + " num_tokens += tokens_per_name\n", + " num_tokens += 3\n", + " return num_tokens\n", + "\n", + "def num_assistant_tokens_from_messages(messages):\n", + " num_tokens = 0\n", + " for message in messages:\n", + " if message[\"role\"] == \"assistant\":\n", + " num_tokens += len(encoding.encode(message[\"content\"]))\n", + " return num_tokens\n", + "\n", + "def print_distribution(values, name):\n", + " print(f\"\\n#### Distribution of {name}:\")\n", + " print(f\"min / max: {min(values)}, {max(values)}\")\n", + " print(f\"mean / median: {np.mean(values)}, {np.median(values)}\")\n", + " print(f\"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}\")\n", + "\n", + "# Last, we can look at the results of the different formatting operations before proceeding with creating a fine-tuning job:\n", + "\n", + "# Warnings and tokens counts\n", + "n_missing_system = 0\n", + "n_missing_user = 0\n", + "n_messages = []\n", + "convo_lens = []\n", + "assistant_message_lens = []\n", + "\n", + "for ex in dataset:\n", + " messages = ex[\"messages\"]\n", + " if not any(message[\"role\"] == \"system\" for message in messages):\n", + " n_missing_system += 1\n", + " if not any(message[\"role\"] == \"user\" for message in messages):\n", + " n_missing_user += 1\n", + " n_messages.append(len(messages))\n", + " convo_lens.append(num_tokens_from_messages(messages))\n", + " assistant_message_lens.append(num_assistant_tokens_from_messages(messages))\n", + "\n", + "print(\"Num examples missing system message:\", n_missing_system)\n", + "print(\"Num examples missing user message:\", n_missing_user)\n", + "print_distribution(n_messages, \"num_messages_per_example\")\n", + "print_distribution(convo_lens, \"num_total_tokens_per_example\")\n", + "print_distribution(assistant_message_lens, \"num_assistant_tokens_per_example\")\n", + "n_too_long = sum(l > 4096 for l in convo_lens)\n", + "print(f\"\\n{n_too_long} examples may be over the 4096 token limit, they will be truncated during fine-tuning\")\n", + "\n", + "# Pricing and default n_epochs estimate\n", + "MAX_TOKENS_PER_EXAMPLE = 4096\n", + "\n", + "MIN_TARGET_EXAMPLES = 100\n", + "MAX_TARGET_EXAMPLES = 25000\n", + "TARGET_EPOCHS = 3\n", + "MIN_EPOCHS = 1\n", + "MAX_EPOCHS = 25\n", + "\n", + "n_epochs = TARGET_EPOCHS\n", + "n_train_examples = len(dataset)\n", + "if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:\n", + " n_epochs = min(MAX_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)\n", + "elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:\n", + " n_epochs = max(MIN_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)\n", + "\n", + "n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)\n", + "print(f\"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training\")\n", + "print(f\"By default, you'll train for {n_epochs} epochs on this dataset\")\n", + "print(f\"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens\")\n", + "print(\"See pricing page to estimate total costs\")" ] }, { @@ -564,38 +593,46 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 50, + "metadata": {}, + "outputs": [], + "source": [ + "file_object = openai.File.create(\n", + " file=open(\"local_cache/100_train.jsonl\", \"r\"),\n", + " purpose=\"fine-tune\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - " JSON: {\n", + " JSON: {\n", " \"object\": \"file\",\n", - " \"id\": \"file-TjkDD0C39b4S1JZstMly98pv\",\n", + " \"id\": \"file-iuSjUY6kK84A1cOB9QffTxfD\",\n", " \"purpose\": \"fine-tune\",\n", " \"filename\": \"file\",\n", - " \"bytes\": 131291,\n", - " \"created_at\": 1693836561,\n", - " \"status\": \"uploaded\",\n", + " \"bytes\": 120415,\n", + " \"created_at\": 1694012894,\n", + " \"status\": \"processed\",\n", " \"status_details\": null\n", "}" ] }, - "execution_count": 16, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "file_object = openai.File.create(\n", - " file=open(\"squad-stratified-100-ft-v1.json\", \"r\"),\n", - " purpose=\"fine-tune\",\n", - ")\n", - "\n", - "while file_object[\"status\"] != \"processed\":\n", - " file_object = openai.File.retrieve(file_object[\"id\"])\n", - " time.sleep(5)" + "while file_object.status!='processed':\n", + " time.sleep(5)\n", + " file_object.refresh()\n", + "file_object" ] }, { @@ -608,24 +645,105 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "ft_job = openai.FineTuningJob.create(\n", - " training_file=file_object[\"id\"], model=\"gpt-3.5-turbo\", suffix=\"v1\"\n", - ")\n", - "while openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model is None:\n", - " time.sleep(10)" + " training_file=file_object[\"id\"], model=\"gpt-3.5-turbo\", suffix=\"100train20230906\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 58, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: succeeded\n" + ] + } + ], "source": [ - "model_id = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model" + "while ft_job.status!='succeeded':\n", + " time.sleep(15)\n", + " ft_job.refresh()\n", + " print(\"Status: \", ft_job.status)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'ft:gpt-3.5-turbo-0613:qdrant:100train20230906:7vp2AzMY'" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_id = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model\n", + "model_id" ] }, { @@ -638,7 +756,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -681,14 +799,14 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 61, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 1000/1000 [12:14<00:00, 1.36it/s] \n" + "100%|██████████| 100/100 [05:16<00:00, 3.17s/it]\n" ] } ], @@ -698,21 +816,21 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 62, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Expected and Right 30.10%\n", - "Expected but IDK 15.20%\n", - "Expected but Wrong 5.00%\n", - "Hallucination 7.10%\n", - "Did not Expect and IDK 42.60%\n", + "Expected and Right 30.00%\n", + "Expected but IDK 21.00%\n", + "Expected but Wrong 2.00%\n", + "Hallucination 5.00%\n", + "Did not Expect and IDK 42.00%\n", "Name: count, dtype: object" ] }, - "execution_count": 38, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -733,12 +851,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 64, "metadata": {}, "outputs": [], "source": [ "# Optionally, save the results to a JSON file\n", - "# df.to_json(\"1K_with_ft_generated_answers.json\", lines=True, orient=\"records\")" + "df.to_json(\"local_cache/100_val_ft.json\", orient=\"records\", lines=True)" ] }, { @@ -751,12 +869,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 65, "metadata": {}, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -840,23 +958,12 @@ "\n", "We'll select a few examples from the dataset, including cases where the answer is not present in the context. We'll then use these examples to create a prompt that we can use to fine-tune the model.\n", "\n", - "We'll measure the baseline on our previous 1K dataset, and then we'll fine-tune the model on the new dataset. We'll then measure the performance of the fine-tuned model on the same 1K dataset.\n", - "\n", - "### Get the Training Data" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O train.json" + "We'll measure the baseline on our previous 1K dataset, and then we'll fine-tune the model on the new dataset. We'll then measure the performance of the fine-tuned model on the same 1K dataset." ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -955,50 +1062,12 @@ "4 [late 1990s] " ] }, - "execution_count": 2, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import pandas as pd\n", - "import json\n", - "\n", - "\n", - "def json_to_dataframe_with_titles(json_data):\n", - " qas = []\n", - " context = []\n", - " is_impossible = []\n", - " answers = []\n", - " titles = []\n", - "\n", - " for article in json_data[\"data\"]:\n", - " title = article[\"title\"]\n", - " for paragraph in article[\"paragraphs\"]:\n", - " for qa in paragraph[\"qas\"]:\n", - " qas.append(qa[\"question\"].strip())\n", - " context.append(paragraph[\"context\"])\n", - " is_impossible.append(qa[\"is_impossible\"])\n", - "\n", - " ans_list = []\n", - " for ans in qa[\"answers\"]:\n", - " ans_list.append(ans[\"text\"])\n", - " answers.append(ans_list)\n", - " titles.append(title)\n", - "\n", - " df = pd.DataFrame(\n", - " {\n", - " \"title\": titles,\n", - " \"question\": qas,\n", - " \"context\": context,\n", - " \"is_impossible\": is_impossible,\n", - " \"answers\": answers,\n", - " }\n", - " )\n", - " return df\n", - "\n", - "\n", - "train_df = json_to_dataframe_with_titles(json.load(open(\"train.json\", \"r\")))\n", "train_df.head()" ] }, @@ -1012,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 67, "metadata": {}, "outputs": [], "source": [ @@ -1025,7 +1094,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 68, "metadata": {}, "outputs": [ { @@ -1034,7 +1103,7 @@ "True" ] }, - "execution_count": 6, + "execution_count": 68, "metadata": {}, "output_type": "execute_result" } @@ -1056,41 +1125,16 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 69, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: fastembed in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (0.0.4)\n", - "Requirement already satisfied: onnxruntime<2.0.0,>=1.15.1 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (1.15.1)\n", - "Requirement already satisfied: onnxruntime-silicon<2.0.0,>=1.15.0 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (1.15.0)\n", - "Requirement already satisfied: requests<3.0.0,>=2.31.0 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (2.31.0)\n", - "Requirement already satisfied: tokenizers<0.14.0,>=0.13.3 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (0.13.3)\n", - "Requirement already satisfied: tqdm<5.0.0,>=4.65.0 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from fastembed) (4.65.0)\n", - "Requirement already satisfied: coloredlogs in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (15.0.1)\n", - "Requirement already satisfied: flatbuffers in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (23.5.26)\n", - "Requirement already satisfied: numpy>=1.21.6 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (1.24.4)\n", - "Requirement already satisfied: packaging in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (23.1)\n", - "Requirement already satisfied: protobuf in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (4.23.4)\n", - "Requirement already satisfied: sympy in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from onnxruntime<2.0.0,>=1.15.1->fastembed) (1.12)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from requests<3.0.0,>=2.31.0->fastembed) (3.2.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from requests<3.0.0,>=2.31.0->fastembed) (3.4)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from requests<3.0.0,>=2.31.0->fastembed) (1.26.16)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from requests<3.0.0,>=2.31.0->fastembed) (2023.5.7)\n", - "Requirement already satisfied: humanfriendly>=9.1 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from coloredlogs->onnxruntime<2.0.0,>=1.15.1->fastembed) (10.0)\n", - "Requirement already satisfied: mpmath>=0.19 in /opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages (from sympy->onnxruntime<2.0.0,>=1.15.1->fastembed) (1.3.0)\n" - ] - } - ], + "outputs": [], "source": [ "# !pip install fastembed" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 70, "metadata": {}, "outputs": [], "source": [ @@ -1107,40 +1151,18 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 72, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "b626be2da7bb4b659b37ab54f094d0f1", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Generating embeddings: 0%| | 0/1000 [00:00 Date: Wed, 6 Sep 2023 21:29:05 +0530 Subject: [PATCH 10/38] * chore(.gitignore): add examples/fine-tuned-RAG/local_cache/ to the gitignore file --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fdb9d24eca..e4a66a7b25 100644 --- a/.gitignore +++ b/.gitignore @@ -132,4 +132,5 @@ dmypy.json *transactions*.jsonl /examples/data/transactions* *.DS_Store -tmp_* \ No newline at end of file +tmp_* +examples/fine-tuned-RAG/local_cache/* From 3c233d6c30b880dbba6518d58e73e890cb8a78be Mon Sep 17 00:00:00 2001 From: NirantK Date: Wed, 6 Sep 2023 21:55:56 +0530 Subject: [PATCH 11/38] Clean dataset prep for 1 shot, embed the few shot dataset --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 114 ++++++++------------ 1 file changed, 44 insertions(+), 70 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index b982bf3934..f306875964 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -87,47 +87,18 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-09-06 19:39:59-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json\n", - "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.111.153, 185.199.108.153, 185.199.110.153, ...\n", - "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.111.153|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 42123633 (40M) [application/json]\n", - "Saving to: ‘local_cache/train.json’\n", - "\n", - "local_cache/train.j 100%[===================>] 40.17M 35.9MB/s in 1.1s \n", - "\n", - "2023-09-06 19:40:03 (35.9 MB/s) - ‘local_cache/train.json’ saved [42123633/42123633]\n", - "\n", - "--2023-09-06 19:40:03-- https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\n", - "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.111.153, 185.199.110.153, ...\n", - "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 4370528 (4.2M) [application/json]\n", - "Saving to: ‘local_cache/dev.json’\n", - "\n", - "local_cache/dev.jso 100%[===================>] 4.17M 13.7MB/s in 0.3s \n", - "\n", - "2023-09-06 19:40:04 (13.7 MB/s) - ‘local_cache/dev.json’ saved [4370528/4370528]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "!mkdir -p local_cache\n", - "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O local_cache/train.json\n", - "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O local_cache/dev.json" + "# !mkdir -p local_cache\n", + "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O local_cache/train.json\n", + "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O local_cache/dev.json" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -358,14 +329,18 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 2, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 100/100 [00:00<00:00, 65484.84it/s]\n" + "ename": "NameError", + "evalue": "name 'get_diverse_sample' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[2], line 20\u001b[0m\n\u001b[1;32m 17\u001b[0m jsonl_output \u001b[39m=\u001b[39m df\u001b[39m.\u001b[39mprogress_apply(create_jsonl_entry, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[1;32m 18\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mjoin(jsonl_output)\n\u001b[0;32m---> 20\u001b[0m train_sample \u001b[39m=\u001b[39m get_diverse_sample(train_df, sample_size\u001b[39m=\u001b[39m\u001b[39m100\u001b[39m, random_state\u001b[39m=\u001b[39m\u001b[39m42\u001b[39m)\n\u001b[1;32m 22\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mlocal_cache/100_train.jsonl\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mw\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 23\u001b[0m f\u001b[39m.\u001b[39mwrite(dataframe_to_jsonl(train_sample))\n", + "\u001b[0;31mNameError\u001b[0m: name 'get_diverse_sample' is not defined" ] } ], @@ -963,7 +938,7 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -1062,7 +1037,7 @@ "4 [late 1990s] " ] }, - "execution_count": 66, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -1081,7 +1056,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1094,7 +1069,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -1103,7 +1078,7 @@ "True" ] }, - "execution_count": 68, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -1113,7 +1088,7 @@ " url=os.getenv(\"QDRANT_URL\"), api_key=os.getenv(\"QDRANT_API_KEY\")\n", ")\n", "\n", - "collection_name = \"cookbook\" # An arbitrary name for the collection\n", + "collection_name = \"squadv2-cookbook\"\n", "\n", "\n", "# Create the collection\n", @@ -1125,7 +1100,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1134,7 +1109,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ @@ -1151,13 +1126,13 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "8e0d768e97884ee78c778c90d1351019", + "model_id": "994a3daab89c4e64b78caf39bcfee553", "version_major": 2, "version_minor": 0 }, @@ -1171,25 +1146,33 @@ ], "source": [ "def generate_points_from_dataframe(df: pd.DataFrame) -> List[PointStruct]:\n", + " batch_size = 512\n", " questions = df[\"question\"].tolist()\n", + " total_batches = len(questions) // batch_size + 1\n", + " \n", " pbar = tqdm(total=len(questions), desc=\"Generating embeddings\")\n", - "\n", - " # Generate embeddings for each question and update tqdm progress bar\n", + " \n", + " # Generate embeddings in batches to improve performance\n", " embeddings = []\n", - " for question in questions:\n", - " embedding = list(embedding_model.embed([question]))[0]\n", - " embeddings.append(embedding)\n", - " pbar.update(1)\n", + " for i in range(total_batches):\n", + " start_idx = i * batch_size\n", + " end_idx = min((i + 1) * batch_size, len(questions))\n", + " batch = questions[start_idx:end_idx]\n", + " \n", + " batch_embeddings = embedding_model.embed(batch, batch_size=batch_size)\n", + " embeddings.extend(batch_embeddings)\n", + " pbar.update(len(batch))\n", + " \n", " pbar.close()\n", - "\n", + " \n", " # Convert embeddings to list of lists\n", " embeddings_list = [embedding.tolist() for embedding in embeddings]\n", - "\n", + " \n", " # Create a temporary DataFrame to hold the embeddings and existing DataFrame columns\n", " temp_df = df.copy()\n", " temp_df[\"embeddings\"] = embeddings_list\n", " temp_df[\"id\"] = temp_df.index\n", - "\n", + " \n", " # Generate PointStruct objects using DataFrame apply method\n", " points = temp_df.progress_apply(\n", " lambda row: PointStruct(\n", @@ -1211,15 +1194,6 @@ "points = generate_points_from_dataframe(train_df)" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "collection_name = \"squadv2-cookbook\"" - ] - }, { "cell_type": "code", "execution_count": null, From 350393d66d2c6351b559a34d56faaa4d357f9a32 Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 15:41:27 +0530 Subject: [PATCH 12/38] Add few shot RAG with Qdrant --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 594 ++++++++++++++++---- 1 file changed, 492 insertions(+), 102 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index f306875964..0c2cc587e5 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -186,8 +186,8 @@ "\n", "\n", "# Main function to answer question\n", - "def answer_question(row, model=\"gpt-3.5-turbo-0613\"):\n", - " messages = get_prompt(row)\n", + "def answer_question(row, prompt_func=get_prompt, model=\"gpt-3.5-turbo-0613\"):\n", + " messages = prompt_func(row)\n", " response = api_call(messages, model)\n", " return response[\"choices\"][0][\"message\"][\"content\"]" ] @@ -245,23 +245,36 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 61, "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "Expected and Right 43.00%\n", - "Expected but IDK 0.00%\n", - "Expected but Wrong 10.00%\n", - "Hallucination 47.00%\n", - "Did not Expect and IDK 0.00%\n", - "Name: count, dtype: object" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" + "ename": "KeyError", + "evalue": "'generated_answer'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/indexes/base.py:3653\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3652\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 3653\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_engine\u001b[39m.\u001b[39;49mget_loc(casted_key)\n\u001b[1;32m 3654\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/_libs/index.pyx:147\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/_libs/index.pyx:176\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'generated_answer'", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[61], line 46\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[39mreturn\u001b[39;00m freq_series\n\u001b[1;32m 45\u001b[0m evaluator \u001b[39m=\u001b[39m ConfusionMatrixEvaluator(df, answers_column\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mgenerated_answer\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m---> 46\u001b[0m evaluator\u001b[39m.\u001b[39;49mevaluate_answers()\n\u001b[1;32m 47\u001b[0m error_categories \u001b[39m=\u001b[39m evaluator\u001b[39m.\u001b[39mgenerate_matrices(use_percentages\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m 48\u001b[0m error_categories\n", + "Cell \u001b[0;32mIn[61], line 34\u001b[0m, in \u001b[0;36mConfusionMatrixEvaluator.evaluate_answers\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mevaluate_answers\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m---> 34\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39my_pred \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdf\u001b[39m.\u001b[39;49mapply(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_evaluate_single_row, axis\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/frame.py:9423\u001b[0m, in \u001b[0;36mDataFrame.apply\u001b[0;34m(self, func, axis, raw, result_type, args, **kwargs)\u001b[0m\n\u001b[1;32m 9412\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mapply\u001b[39;00m \u001b[39mimport\u001b[39;00m frame_apply\n\u001b[1;32m 9414\u001b[0m op \u001b[39m=\u001b[39m frame_apply(\n\u001b[1;32m 9415\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 9416\u001b[0m func\u001b[39m=\u001b[39mfunc,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9421\u001b[0m kwargs\u001b[39m=\u001b[39mkwargs,\n\u001b[1;32m 9422\u001b[0m )\n\u001b[0;32m-> 9423\u001b[0m \u001b[39mreturn\u001b[39;00m op\u001b[39m.\u001b[39;49mapply()\u001b[39m.\u001b[39m__finalize__(\u001b[39mself\u001b[39m, method\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mapply\u001b[39m\u001b[39m\"\u001b[39m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/apply.py:678\u001b[0m, in \u001b[0;36mFrameApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 675\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mraw:\n\u001b[1;32m 676\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_raw()\n\u001b[0;32m--> 678\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/apply.py:798\u001b[0m, in \u001b[0;36mFrameApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 797\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply_standard\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 798\u001b[0m results, res_index \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_series_generator()\n\u001b[1;32m 800\u001b[0m \u001b[39m# wrap results\u001b[39;00m\n\u001b[1;32m 801\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwrap_results(results, res_index)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/apply.py:814\u001b[0m, in \u001b[0;36mFrameApply.apply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 811\u001b[0m \u001b[39mwith\u001b[39;00m option_context(\u001b[39m\"\u001b[39m\u001b[39mmode.chained_assignment\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m):\n\u001b[1;32m 812\u001b[0m \u001b[39mfor\u001b[39;00m i, v \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(series_gen):\n\u001b[1;32m 813\u001b[0m \u001b[39m# ignore SettingWithCopy here in case the user mutates\u001b[39;00m\n\u001b[0;32m--> 814\u001b[0m results[i] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mf(v)\n\u001b[1;32m 815\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(results[i], ABCSeries):\n\u001b[1;32m 816\u001b[0m \u001b[39m# If we have a view on v, we need to make a copy because\u001b[39;00m\n\u001b[1;32m 817\u001b[0m \u001b[39m# series_generator will swap out the underlying data\u001b[39;00m\n\u001b[1;32m 818\u001b[0m results[i] \u001b[39m=\u001b[39m results[i]\u001b[39m.\u001b[39mcopy(deep\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n", + "Cell \u001b[0;32mIn[61], line 16\u001b[0m, in \u001b[0;36mConfusionMatrixEvaluator._evaluate_single_row\u001b[0;34m(self, row)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_evaluate_single_row\u001b[39m(\u001b[39mself\u001b[39m, row):\n\u001b[1;32m 15\u001b[0m is_impossible \u001b[39m=\u001b[39m row[\u001b[39m\"\u001b[39m\u001b[39mis_impossible\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m---> 16\u001b[0m generated_answer \u001b[39m=\u001b[39m row[\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49manswers_column]\u001b[39m.\u001b[39mlower()\n\u001b[1;32m 17\u001b[0m actual_answers \u001b[39m=\u001b[39m [ans\u001b[39m.\u001b[39mlower() \u001b[39mfor\u001b[39;00m ans \u001b[39min\u001b[39;00m row[\u001b[39m\"\u001b[39m\u001b[39manswers\u001b[39m\u001b[39m\"\u001b[39m]]\n\u001b[1;32m 19\u001b[0m y_pred \u001b[39m=\u001b[39m (\n\u001b[1;32m 20\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mExpected and Right\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 21\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_impossible\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mDid not Expect and IDK\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 30\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/series.py:1007\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1004\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_values[key]\n\u001b[1;32m 1006\u001b[0m \u001b[39melif\u001b[39;00m key_is_scalar:\n\u001b[0;32m-> 1007\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_value(key)\n\u001b[1;32m 1009\u001b[0m \u001b[39mif\u001b[39;00m is_hashable(key):\n\u001b[1;32m 1010\u001b[0m \u001b[39m# Otherwise index.get_value will raise InvalidIndexError\u001b[39;00m\n\u001b[1;32m 1011\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1012\u001b[0m \u001b[39m# For labels that don't resolve as scalars like tuples and frozensets\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/series.py:1116\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_values[label]\n\u001b[1;32m 1115\u001b[0m \u001b[39m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[0;32m-> 1116\u001b[0m loc \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mindex\u001b[39m.\u001b[39;49mget_loc(label)\n\u001b[1;32m 1118\u001b[0m \u001b[39mif\u001b[39;00m is_integer(loc):\n\u001b[1;32m 1119\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_values[loc]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/indexes/base.py:3655\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3653\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine\u001b[39m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m 3654\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m-> 3655\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(key) \u001b[39mfrom\u001b[39;00m \u001b[39merr\u001b[39;00m\n\u001b[1;32m 3656\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m:\n\u001b[1;32m 3657\u001b[0m \u001b[39m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3658\u001b[0m \u001b[39m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3659\u001b[0m \u001b[39m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3660\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_indexing_error(key)\n", + "\u001b[0;31mKeyError\u001b[0m: 'generated_answer'" + ] } ], "source": [ @@ -1069,33 +1082,22 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 22, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "qdrant_client = QdrantClient(\n", - " url=os.getenv(\"QDRANT_URL\"), api_key=os.getenv(\"QDRANT_API_KEY\")\n", + " url=os.getenv(\"QDRANT_URL\"), api_key=os.getenv(\"QDRANT_API_KEY\"), timeout=6000, prefer_grpc=True\n", ")\n", "\n", - "collection_name = \"squadv2-cookbook\"\n", + "# collection_name = \"squadv2-cookbook\"\n", "\n", "\n", - "# Create the collection\n", - "qdrant_client.recreate_collection(\n", - " collection_name=collection_name,\n", - " vectors_config=VectorParams(size=384, distance=Distance.COSINE),\n", - ")" + "# # Create the collection\n", + "# qdrant_client.recreate_collection(\n", + "# collection_name=collection_name,\n", + "# vectors_config=VectorParams(size=384, distance=Distance.COSINE),\n", + "# )" ] }, { @@ -1124,6 +1126,17 @@ "embedding_model = DefaultEmbedding()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Embedding the Questions\n", + "\n", + "We embed the entire training set questions. We'll use the question to question similarity to find the most similar questions to the question we're looking for. This is a workflow which is used in RAG to leverage the OpenAI model ability of incontext learning with more examples. This is what we call Few Shot Learning here.\n", + "\n", + "### ❗️ Important Note: This step can take upto 3 hours to complete. Please be patient. If you see Out of Memory errors or Kernel Crashes, please reduce the batch size to 32, restart the kernel and run the notebook again. This code needs to be run only ONCE. " + ] + }, { "cell_type": "code", "execution_count": 13, @@ -1142,61 +1155,84 @@ }, "metadata": {}, "output_type": "display_data" + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "3cf2827c330b434fa58295794d94356e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/130319 [00:00 List[PointStruct]:\n", - " batch_size = 512\n", - " questions = df[\"question\"].tolist()\n", - " total_batches = len(questions) // batch_size + 1\n", + "# def generate_points_from_dataframe(df: pd.DataFrame) -> List[PointStruct]:\n", + "# batch_size = 512\n", + "# questions = df[\"question\"].tolist()\n", + "# total_batches = len(questions) // batch_size + 1\n", " \n", - " pbar = tqdm(total=len(questions), desc=\"Generating embeddings\")\n", + "# pbar = tqdm(total=len(questions), desc=\"Generating embeddings\")\n", " \n", - " # Generate embeddings in batches to improve performance\n", - " embeddings = []\n", - " for i in range(total_batches):\n", - " start_idx = i * batch_size\n", - " end_idx = min((i + 1) * batch_size, len(questions))\n", - " batch = questions[start_idx:end_idx]\n", + "# # Generate embeddings in batches to improve performance\n", + "# embeddings = []\n", + "# for i in range(total_batches):\n", + "# start_idx = i * batch_size\n", + "# end_idx = min((i + 1) * batch_size, len(questions))\n", + "# batch = questions[start_idx:end_idx]\n", " \n", - " batch_embeddings = embedding_model.embed(batch, batch_size=batch_size)\n", - " embeddings.extend(batch_embeddings)\n", - " pbar.update(len(batch))\n", + "# batch_embeddings = embedding_model.embed(batch, batch_size=batch_size)\n", + "# embeddings.extend(batch_embeddings)\n", + "# pbar.update(len(batch))\n", " \n", - " pbar.close()\n", + "# pbar.close()\n", " \n", - " # Convert embeddings to list of lists\n", - " embeddings_list = [embedding.tolist() for embedding in embeddings]\n", + "# # Convert embeddings to list of lists\n", + "# embeddings_list = [embedding.tolist() for embedding in embeddings]\n", " \n", - " # Create a temporary DataFrame to hold the embeddings and existing DataFrame columns\n", - " temp_df = df.copy()\n", - " temp_df[\"embeddings\"] = embeddings_list\n", - " temp_df[\"id\"] = temp_df.index\n", + "# # Create a temporary DataFrame to hold the embeddings and existing DataFrame columns\n", + "# temp_df = df.copy()\n", + "# temp_df[\"embeddings\"] = embeddings_list\n", + "# temp_df[\"id\"] = temp_df.index\n", " \n", - " # Generate PointStruct objects using DataFrame apply method\n", - " points = temp_df.progress_apply(\n", - " lambda row: PointStruct(\n", - " id=row[\"id\"],\n", - " vector=row[\"embeddings\"],\n", - " payload={\n", - " \"question\": row[\"question\"],\n", - " \"title\": row[\"title\"],\n", - " \"context\": row[\"context\"],\n", - " \"is_impossible\": row[\"is_impossible\"],\n", - " \"answers\": row[\"answers\"],\n", - " },\n", - " ),\n", - " axis=1,\n", - " ).tolist()\n", - "\n", - " return points\n", + "# # Generate PointStruct objects using DataFrame apply method\n", + "# points = temp_df.progress_apply(\n", + "# lambda row: PointStruct(\n", + "# id=row[\"id\"],\n", + "# vector=row[\"embeddings\"],\n", + "# payload={\n", + "# \"question\": row[\"question\"],\n", + "# \"title\": row[\"title\"],\n", + "# \"context\": row[\"context\"],\n", + "# \"is_impossible\": row[\"is_impossible\"],\n", + "# \"answers\": row[\"answers\"],\n", + "# },\n", + "# ),\n", + "# axis=1,\n", + "# ).tolist()\n", + "\n", + "# return points\n", + "\n", + "# points = generate_points_from_dataframe(train_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Upload the Embeddings to Qdrant\n", "\n", - "points = generate_points_from_dataframe(train_df)" + "Note that configuring Qdrant is outside the scope of this notebook. Please refer to the [Qdrant](https://qdrant.tech) for more information. We used a timeout of 600 seconds for the upload, and grpc compression to speed up the upload." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -1208,35 +1244,44 @@ } ], "source": [ - "operation_info = qdrant_client.upsert(\n", - " collection_name=collection_name, wait=True, points=points\n", - ")\n", - "print(operation_info)" + "# operation_info = qdrant_client.upsert(\n", + "# collection_name=collection_name, wait=True, points=points\n", + "# )\n", + "# print(operation_info)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ - "train_sample.describe()" + "train_sample = get_diverse_sample(train_df, sample_size=100, random_state=42)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Using Qdrant to Improve RAG Prompt\n", + "\n", + "Now that we've uploaded the embeddings to Qdrant, we can use Qdrant to find the most similar questions to the question we're looking for. We'll use the top 5 most similar questions to create a prompt that we can use to fine-tune the model. We'll then measure the performance of the fine-tuned model on the same validation set, but with few shot prompting!" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 87, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "dcd828a1f36945a59b8b0d91d641e390", + "model_id": "ff39612710a749fdae166745b2bd2d5a", "version_major": 2, "version_minor": 0 }, "text/plain": [ - " 0%| | 0/1000 [00:00= 1:\n", - " rag_prompt += q_to_prompt(q1[0])\n", " # If the next best question is not the same as the question, add it to the prompt\n", - " if len(q2) >= 1 and (q2[0].payload[\"question\"] != q1[0].payload[\"question\"]):\n", - " rag_prompt += q_to_prompt(q2[0])\n", + " if len(q2) >= 1:\n", + " rag_prompt += q_to_prompt(q2[1])\n", + " if len(q1) >= 1:\n", + " rag_prompt += q_to_prompt(q1[1])\n", + " rag_prompt += q_to_prompt(q1[2])\n", + " if len(q2) >= 1:\n", + " rag_prompt += q_to_prompt(q2[2])\n", + " \n", + " \n", "\n", " rag_prompt += [\n", " {\n", @@ -1326,7 +1376,7 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 88, "metadata": {}, "outputs": [ { @@ -1340,40 +1390,380 @@ "Name: few_shot_prompt_1K, dtype: object" ] }, - "execution_count": 73, + "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[\"few_shot_prompt_1K\"].head()" + "train_sample[\"few_shot_prompt_1K\"].head()" ] }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[{'role': 'system',\n", + " 'content': \"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\\n\\n\"},\n", + " {'role': 'user',\n", + " 'content': 'Question: What is the Italian Quarter?\\n\\nContext: Small Armenian trading and religious communities have existed outside of Armenia for centuries. For example, a community has existed for over a millennium in the Holy Land, and one of the four quarters of the walled Old City of Jerusalem has been called the Armenian Quarter. An Armenian Catholic monastic community of 35 founded in 1717 exists on an island near Venice, Italy. There are also remnants of formerly populous communities in India, Myanmar, Thailand, Belgium, Portugal, Italy, Poland, Austria, Hungary, Bulgaria, Romania, Serbia, Ethiopia, Sudan and Egypt.[citation needed]\\n\\nAnswer:'},\n", + " {'role': 'assistant', 'content': \"I don't know\"},\n", + " {'role': 'user',\n", + " 'content': \"Question: What shape is a cirque, generally?\\n\\nContext: Glaciers form where the accumulation of snow and ice exceeds ablation. The area in which a glacier forms is called a cirque (corrie or cwm) - a typically armchair-shaped geological feature (such as a depression between mountains enclosed by arêtes) - which collects and compresses through gravity the snow which falls into it. This snow collects and is compacted by the weight of the snow falling above it forming névé. Further crushing of the individual snowflakes and squeezing the air from the snow turns it into 'glacial ice'. This glacial ice will fill the cirque until it 'overflows' through a geological weakness or vacancy, such as the gap between two mountains. When the mass of snow and ice is sufficiently thick, it begins to move due to a combination of surface slope, gravity and pressure. On steeper slopes, this can occur with as little as 15 m (50 ft) of snow-ice.\\n\\nAnswer:\"},\n", + " {'role': 'assistant', 'content': 'armchair-shaped'},\n", + " {'role': 'user',\n", + " 'content': 'Question: On which side is a cirque opened?\\n\\nContext: At the start of a classic valley glacier is a bowl-shaped cirque, which has escarped walls on three sides but is open on the side that descends into the valley. Cirques are where ice begins to accumulate in a glacier. Two glacial cirques may form back to back and erode their backwalls until only a narrow ridge, called an arête is left. This structure may result in a mountain pass. If multiple cirques encircle a single mountain, they create pointed pyramidal peaks; particularly steep examples are called horns.\\n\\nAnswer:'},\n", + " {'role': 'assistant', 'content': 'the side that descends into the valley'},\n", + " {'role': 'user',\n", + " 'content': 'Question: What are two indications of a larger cerebrum?\\n\\nContext: In intelligent mammals, such as primates, the cerebrum is larger relative to the rest of the brain. Intelligence itself is not easy to define, but indications of intelligence include the ability to learn, matched with behavioral flexibility. Rats, for example, are considered to be highly intelligent, as they can learn and perform new tasks, an ability that may be important when they first colonize a fresh habitat. In some mammals, food gathering appears to be related to intelligence: a deer feeding on plants has a brain smaller than a cat, which must think to outwit its prey.\\n\\nAnswer:'},\n", + " {'role': 'assistant', 'content': \"I don't know\"},\n", + " {'role': 'user',\n", + " 'content': \"Question: What is a cirque?\\n\\nContext: Glaciers form where the accumulation of snow and ice exceeds ablation. The area in which a glacier forms is called a cirque (corrie or cwm) - a typically armchair-shaped geological feature (such as a depression between mountains enclosed by arêtes) - which collects and compresses through gravity the snow which falls into it. This snow collects and is compacted by the weight of the snow falling above it forming névé. Further crushing of the individual snowflakes and squeezing the air from the snow turns it into 'glacial ice'. This glacial ice will fill the cirque until it 'overflows' through a geological weakness or vacancy, such as the gap between two mountains. When the mass of snow and ice is sufficiently thick, it begins to move due to a combination of surface slope, gravity and pressure. On steeper slopes, this can occur with as little as 15 m (50 ft) of snow-ice.\\n\\nAnswer:\"}]" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_sample[\"few_shot_prompt_1K\"].iloc[0]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## OpenAI Model Fine-Tuning\n", + "\n", + "### Upload the Fine-Tuning Data to OpenAI" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "9bf7fea68e3d45e4ade09e63f7abe907", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/100 [00:00 Date: Thu, 7 Sep 2023 15:41:55 +0530 Subject: [PATCH 13/38] Remove unused code --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 5 ----- 1 file changed, 5 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 0c2cc587e5..d0dc1b5fbd 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -1483,11 +1483,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n", - "openai.api_key = os.environ[\"OPENAI_API_KEY\"]\n", - "os.environ[\"QDRANT_URL\"] = \"https://4541e874-8a3f-439d-81c8-4ac59b2816e1.us-east-1-0.aws.cloud.qdrant.io:6333\"\n", - "os.environ[\"QDRANT_API_KEY\"] = \"A64kdbyOowZ9099dWCBMN1kDcKKUfaBGU3avHVU05jnRshUABoR5aQ\"\n", "# Create OpenAI File\n", "file_object = openai.File.create(\n", " file=open(\"local_cache/100_train_few_shot.jsonl\", \"r\"),\n", From fe59ced6261baed798ec85e91a6cd2a2138b8839 Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 15:44:12 +0530 Subject: [PATCH 14/38] more inline output --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index d0dc1b5fbd..95495c19a6 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -1654,16 +1654,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 95, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'ft:gpt-3.5-turbo-0613:qdrant:trnfewshot20230907:7w54vei9'" + "'ft:gpt-3.5-turbo-0613:qdrant:trnfewshot20230907:7w6Gapu0'" ] }, - "execution_count": 72, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } @@ -1675,13 +1675,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "4333f061f0e54cb5a8a30133308896a5", + "model_id": "e39ab52659a44ec59b5748399c2e24fa", "version_major": 2, "version_minor": 0 }, From 324a4f3402b4230b12acd4a47c55365392b2a524 Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 15:47:31 +0530 Subject: [PATCH 15/38] * fix(ModelFinetune.ipynb): update execution counts for code cells * fix(ModelFinetune.ipynb): update generated answer for few-shot question * fix(ModelFinetune.ipynb): update count percentages for different answer types --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 28 ++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 95495c19a6..b3c9215849 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 99, "metadata": {}, "outputs": [], "source": [ @@ -1706,44 +1706,44 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 100, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'ft_generated_answer_few_shot': 'illegal boycotts',\n", - " 'question': 'What is a type of disobedience against the federal government?',\n", - " 'context': 'Some forms of civil disobedience, such as illegal boycotts, refusals to pay taxes, draft dodging, distributed denial-of-service attacks, and sit-ins, make it more difficult for a system to function. In this way, they might be considered coercive. Brownlee notes that \"although civil disobedients are constrained in their use of coercion by their conscientious aim to engage in moral dialogue, nevertheless they may find it necessary to employ limited coercion in order to get their issue onto the table.\" The Plowshares organization temporarily closed GCSB Waihopai by padlocking the gates and using sickles to deflate one of the large domes covering two satellite dishes.',\n", - " 'is_impossible': False}" + "{'ft_generated_answer_few_shot': \"I don't know\",\n", + " 'question': 'When was the deportation of Canadians?',\n", + " 'context': 'Britain gained control of French Canada and Acadia, colonies containing approximately 80,000 primarily French-speaking Roman Catholic residents. The deportation of Acadians beginning in 1755 resulted in land made available to migrants from Europe and the colonies further south. The British resettled many Acadians throughout its North American provinces, but many went to France, and some went to New Orleans, which they had expected to remain French. Some were sent to colonize places as diverse as French Guiana and the Falkland Islands; these latter efforts were unsuccessful. Others migrated to places like Saint-Domingue, and fled to New Orleans after the Haitian Revolution. The Louisiana population contributed to the founding of the modern Cajun population. (The French word \"Acadien\" evolved to \"Cadien\", then to \"Cajun\".)',\n", + " 'is_impossible': True}" ] }, - "execution_count": 86, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df[[\"ft_generated_answer_few_shot\", \"question\", \"context\", \"is_impossible\"]].iloc[5].to_dict()" + "df[[\"ft_generated_answer_few_shot\", \"question\", \"context\", \"is_impossible\"]].iloc[4].to_dict()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 98, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Expected and Right 46.00%\n", - "Expected but IDK 0.00%\n", - "Expected but Wrong 7.00%\n", - "Hallucination 44.00%\n", - "Did not Expect and IDK 3.00%\n", + "Expected but IDK 5.00%\n", + "Expected but Wrong 2.00%\n", + "Hallucination 27.00%\n", + "Did not Expect and IDK 20.00%\n", "Name: count, dtype: object" ] }, - "execution_count": 75, + "execution_count": 98, "metadata": {}, "output_type": "execute_result" } From b3d2dfe6485839c98fca011cc4e8b48a1acf6121 Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 16:01:56 +0530 Subject: [PATCH 16/38] * chore(ModelFinetune.ipynb): update section titles and content * feat(ModelFinetune.ipynb): add new sections for Qdrant integration and Few-Shot Learning * fix(ModelFinetune.ipynb): fix typo in section title --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 61 ++++++++++++++------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index b3c9215849..0f810efd83 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -9,19 +9,18 @@ "\n", "\n", "\n", - "Here, we'll take you through the process, complete with code examples, to help you fine-tune your OpenAI model for usage with RAG like a pro.\n", - "\n", - "\n", + "Intent: Show how to fine-tune OpenAI models for RAG and improve them using Qdrant and Few-Shot Learning! 🤩\n", "\n", "To begin, we've selected a dataset where we've a guarantee that the retrieval is perfect. We've selected a subset of the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset, which is a collection of questions and answers about Wikipedia articles. We've also included samples where the answer is not present in the context, to demonstrate how RAG handles this case.\n", "\n", "## Table of Contents\n", "1. Setting up the Environment\n", "2. Data Preparation\n", - "3. Running the Model\n", - "4. Evaluation\n", - "5. Fine-Tuning\n", - "6. Comparison" + "3. OpenAI Model Fine-Tuning\n", + "4. Evaluation with OpenAI\n", + "5. Using Qdrant to Improve RAG Prompt\n", + "6. Few-Shot Learning with Qdrant\n", + "7. Comparison and Results" ] }, { @@ -78,10 +77,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Preparing the Data\n", + "## Data Preparation: SQuADv2 Data Subsets\n", "\n", "For the purpose of demonstration, we'll make small slices from the train and validation splits of the [SQuADv2](https://rajpurkar.github.io/SQuAD-explorer/) dataset. This dataset has questions and contexts where the answer is not present in the context, to help us evaluate how LLM handles this case.\n", "\n", + "We'll read the data from the JSON files and create a dataframe with the following columns: `question`, `context`, `answer`, `is_impossible`.\n", + "\n", "### Download the Data" ] }, @@ -96,6 +97,13 @@ "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O local_cache/dev.json" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## JSON to DataFrame" + ] + }, { "cell_type": "code", "execution_count": 4, @@ -333,7 +341,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Fine-Tuning\n", + "## Fine-Tuning the OpenAI Model\n", + "\n", + "For the complete fine-tuning process, please refer to the [OpenAI Fine-Tuning Docs](https://platform.openai.com/docs/guides/fine-tuning/use-a-fine-tuned-model).\n", "\n", "### Prepare the Fine-Tuning Data\n", "\n", @@ -388,7 +398,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### [Optional] Verify the Fine-Tuning Data\n", + "#### [Optional] Verify the Fine-Tuning Data\n", "\n", "The script below will verify that the data is in the format that OpenAI expects." ] @@ -576,7 +586,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Push the Fine-Tuning data to OpenAI" + "### Upload the Fine-Tuning data to OpenAI" ] }, { @@ -1064,7 +1074,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Embed the Training Data" + "### Embed the Training Data\n", + "\n", + "Embeddings are a way to represent sentences as an array of floats. We'll use the embeddings to find the most similar questions to the ones we're looking for." ] }, { @@ -1072,6 +1084,15 @@ "execution_count": 9, "metadata": {}, "outputs": [], + "source": [ + "# !pip install fastembed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "import os\n", "from qdrant_client import QdrantClient\n", @@ -1080,6 +1101,13 @@ "from qdrant_client.http.models import Distance, VectorParams" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we've the Qdrant imports in place, " + ] + }, { "cell_type": "code", "execution_count": 22, @@ -1100,15 +1128,6 @@ "# )" ] }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install fastembed" - ] - }, { "cell_type": "code", "execution_count": 12, From 219fdab9f6bf979ccd4f85044dc640dfea153512 Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 16:23:32 +0530 Subject: [PATCH 17/38] * chore(ModelFinetune.ipynb): update headings and descriptions in the notebook * fix(ModelFinetune.ipynb): fix heading for the setting up section * fix(ModelFinetune.ipynb): fix heading for the data preparation section --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 242 +++++--------------- 1 file changed, 56 insertions(+), 186 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 0f810efd83..f77fbc2209 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -5,11 +5,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Finetuning for RAG\n", + "# Fine-Tuning OpenAI Models for Retrieval Augmented Generation (RAG) with Qdrant and Few-Shot Learning\n", "\n", - "\n", - "\n", - "Intent: Show how to fine-tune OpenAI models for RAG and improve them using Qdrant and Few-Shot Learning! 🤩\n", + "The aim of this blog is to walk through a comprehensive example of how to fine-tune OpenAI models for Retrieval Augmented Generation (RAG). We will also be integrating Qdrant and Few-Shot Learning to boost the model's performance and reduce hallucinations. This could serve as a practical guide for ML practitioners, data scientists, and researchers interested in leveraging the power of OpenAI models for specific use-cases. 🤩\n", "\n", "To begin, we've selected a dataset where we've a guarantee that the retrieval is perfect. We've selected a subset of the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset, which is a collection of questions and answers about Wikipedia articles. We've also included samples where the answer is not present in the context, to demonstrate how RAG handles this case.\n", "\n", @@ -28,7 +26,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Setting Up\n", + "## 1. Setting Up\n", "\n", "### Install and Import Dependencies" ] @@ -77,7 +75,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Data Preparation: SQuADv2 Data Subsets\n", + "## 2.Data Preparation: SQuADv2 Data Subsets\n", "\n", "For the purpose of demonstration, we'll make small slices from the train and validation splits of the [SQuADv2](https://rajpurkar.github.io/SQuAD-explorer/) dataset. This dataset has questions and contexts where the answer is not present in the context, to help us evaluate how LLM handles this case.\n", "\n", @@ -1502,193 +1500,65 @@ "metadata": {}, "outputs": [], "source": [ - "# Create OpenAI File\n", - "file_object = openai.File.create(\n", - " file=open(\"local_cache/100_train_few_shot.jsonl\", \"r\"),\n", - " purpose=\"fine-tune\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "💡 Wait: The file object status needs to change to \"processed\" before we can start our fine tuning job. " - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "uploaded\n", - "processed\n" - ] - } - ], - "source": [ - "while file_object.status!='processed':\n", - " time.sleep(20)\n", - " file_object.refresh()\n", - " print(file_object.status)" + "class OpenAIFineTuner:\n", + " def __init__(self, training_file_path, model_name, suffix):\n", + " self.training_file_path = training_file_path\n", + " self.model_name = model_name\n", + " self.suffix = suffix\n", + " self.file_object = None\n", + " self.fine_tuning_job = None\n", + " self.model_id = None\n", + "\n", + " def create_openai_file(self):\n", + " self.file_object = openai.File.create(\n", + " file=open(self.training_file_path, \"r\"),\n", + " purpose=\"fine-tune\",\n", + " )\n", + "\n", + " def wait_for_file_processing(self, sleep_time=20):\n", + " while self.file_object.status != 'processed':\n", + " time.sleep(sleep_time)\n", + " self.file_object.refresh()\n", + " print(\"File Status: \", self.file_object.status)\n", + "\n", + " def create_fine_tuning_job(self):\n", + " self.fine_tuning_job = openai.FineTuningJob.create(\n", + " training_file=self.file_object[\"id\"],\n", + " model=self.model_name,\n", + " suffix=self.suffix,\n", + " )\n", + "\n", + " def wait_for_fine_tuning(self, sleep_time=45):\n", + " while self.fine_tuning_job.status != 'succeeded':\n", + " time.sleep(sleep_time)\n", + " self.fine_tuning_job.refresh()\n", + " print(\"Job Status: \", self.fine_tuning_job.status)\n", + "\n", + " def retrieve_fine_tuned_model(self):\n", + " self.model_id = openai.FineTuningJob.retrieve(self.fine_tuning_job[\"id\"]).fine_tuned_model\n", + " return self.model_id\n", + "\n", + " def fine_tune_model(self):\n", + " self.create_openai_file()\n", + " self.wait_for_file_processing()\n", + " self.create_fine_tuning_job()\n", + " self.wait_for_fine_tuning()\n", + " return self.retrieve_fine_tuned_model()" ] }, { "cell_type": "code", - "execution_count": 93, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# Finetuning job\n", - "ft_job = openai.FineTuningJob.create(\n", - " training_file=file_object[\"id\"], model=\"gpt-3.5-turbo\", suffix=\"trnfewshot20230907\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "💡 Wait: We've to wait till the fine-tuning job is complete i.e. status changes to \"succeeded\" before we can use the fine-tuned model." - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Status: created\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: succeeded\n" - ] - } - ], - "source": [ - "while ft_job.status!='succeeded':\n", - " ft_job.refresh()\n", - " print(\"Status: \", ft_job.status)\n", - " time.sleep(45)" - ] - }, - { - "cell_type": "code", - "execution_count": 95, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'ft:gpt-3.5-turbo-0613:qdrant:trnfewshot20230907:7w6Gapu0'" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_id = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model\n", + "fine_tuner = OpenAIFineTuner(\n", + " training_file_path=\"local_cache/100_train_few_shot.jsonl\",\n", + " model_name=\"gpt-3.5-turbo\",\n", + " suffix=\"trnfewshot20230907\"\n", + " )\n", + "\n", + "model_id = fine_tuner.fine_tune_model()\n", "model_id" ] }, From 3fc4256ff41f1848cff342013c536a0c2c97bc6a Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 16:48:12 +0530 Subject: [PATCH 18/38] Reorganise --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 883 ++++---------------- 1 file changed, 185 insertions(+), 698 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index f77fbc2209..4f412c7cbd 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -13,11 +13,15 @@ "\n", "## Table of Contents\n", "1. Setting up the Environment\n", + "\n", + "### Section A: Zero-Shot Learning\n", "2. Data Preparation\n", "3. OpenAI Model Fine-Tuning\n", - "4. Evaluation with OpenAI\n", + "4. Baseline Results\n", + "\n", + "### Section B: Few-Shot Learning\n", "5. Using Qdrant to Improve RAG Prompt\n", - "6. Few-Shot Learning with Qdrant\n", + "6. Fine-Tuning OpenAI Model with Qdrant\n", "7. Comparison and Results" ] }, @@ -37,12 +41,12 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install pandas openai tqdm tenacity pandarallel scikit-learn tiktoken python-dotenv" + "# !pip install pandas openai tqdm tenacity matplotlib scikit-learn tiktoken python-dotenv " ] }, { "cell_type": "code", - "execution_count": 99, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -57,13 +61,17 @@ "from tqdm import tqdm\n", "from collections import defaultdict\n", "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "from sklearn.metrics import confusion_matrix\n", + "\n", "\n", "tqdm.pandas()" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -75,7 +83,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 2.Data Preparation: SQuADv2 Data Subsets\n", + "## 2. Data Preparation: SQuADv2 Data Subsets\n", "\n", "For the purpose of demonstration, we'll make small slices from the train and validation splits of the [SQuADv2](https://rajpurkar.github.io/SQuAD-explorer/) dataset. This dataset has questions and contexts where the answer is not present in the context, to help us evaluate how LLM handles this case.\n", "\n", @@ -86,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -99,12 +107,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## JSON to DataFrame" + "## Read JSON to DataFrame" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -153,15 +161,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Using the OpenAI Model for Question Answering\n", + "## Setting up Baseline Model Performance\n", "\n", - "### Prompt, API Call, and Answer\n", - "Create functions to get prompt messages and make API calls:" + "### Utility Functions: Zero Shot Prompt, API Call to OpenAI" ] }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -198,14 +205,6 @@ " return response[\"choices\"][0][\"message\"][\"content\"]" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Running the model" - ] - }, { "cell_type": "code", "execution_count": 7, @@ -215,7 +214,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 100/100 [02:53<00:00, 1.73s/it]\n" + " 25%|██▌ | 25/100 [00:33<01:35, 1.28s/it]" ] } ], @@ -226,7 +225,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -234,108 +233,6 @@ ] }, { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Evaluation\n", - "\n", - "To evaluate the model's performance, compare the predicted answer to the actual answers -- if any of the actual answers are present in the predicted answer, then it's a match. We've also created error categories to help you understand where the model is struggling.\n", - "\n", - "1. Expected and Right: The model responsded the correct answer. It may have also included other answers that were not in the context.\n", - "2. Expected but \"IDK\": The model responded with \"I don't know\" (IDK) while the answer was present in the context. *This is a model error* and better than giving the wrong answer. We exclude this from the overall error rate.\n", - "3. Expected but Wrong: The model responded with an incorrect answer. *This is a model ERROR.*\n", - "4. Hallucination: The model responded with an answer, when \"I don't know\" was expected. **This is a model error.** \n", - "5. Did not expect and IDK: The model responded with \"I don't know\" (IDK) and the answer was not present in the context. *This is a model WIN.*" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "'generated_answer'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/indexes/base.py:3653\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3652\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[0;32m-> 3653\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_engine\u001b[39m.\u001b[39;49mget_loc(casted_key)\n\u001b[1;32m 3654\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/_libs/index.pyx:147\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/_libs/index.pyx:176\u001b[0m, in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7080\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", - "File \u001b[0;32mpandas/_libs/hashtable_class_helper.pxi:7088\u001b[0m, in \u001b[0;36mpandas._libs.hashtable.PyObjectHashTable.get_item\u001b[0;34m()\u001b[0m\n", - "\u001b[0;31mKeyError\u001b[0m: 'generated_answer'", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[61], line 46\u001b[0m\n\u001b[1;32m 42\u001b[0m \u001b[39mreturn\u001b[39;00m freq_series\n\u001b[1;32m 45\u001b[0m evaluator \u001b[39m=\u001b[39m ConfusionMatrixEvaluator(df, answers_column\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mgenerated_answer\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[0;32m---> 46\u001b[0m evaluator\u001b[39m.\u001b[39;49mevaluate_answers()\n\u001b[1;32m 47\u001b[0m error_categories \u001b[39m=\u001b[39m evaluator\u001b[39m.\u001b[39mgenerate_matrices(use_percentages\u001b[39m=\u001b[39m\u001b[39mTrue\u001b[39;00m)\n\u001b[1;32m 48\u001b[0m error_categories\n", - "Cell \u001b[0;32mIn[61], line 34\u001b[0m, in \u001b[0;36mConfusionMatrixEvaluator.evaluate_answers\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mevaluate_answers\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m---> 34\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39my_pred \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mdf\u001b[39m.\u001b[39;49mapply(\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_evaluate_single_row, axis\u001b[39m=\u001b[39;49m\u001b[39m1\u001b[39;49m)\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/frame.py:9423\u001b[0m, in \u001b[0;36mDataFrame.apply\u001b[0;34m(self, func, axis, raw, result_type, args, **kwargs)\u001b[0m\n\u001b[1;32m 9412\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mpandas\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mcore\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mapply\u001b[39;00m \u001b[39mimport\u001b[39;00m frame_apply\n\u001b[1;32m 9414\u001b[0m op \u001b[39m=\u001b[39m frame_apply(\n\u001b[1;32m 9415\u001b[0m \u001b[39mself\u001b[39m,\n\u001b[1;32m 9416\u001b[0m func\u001b[39m=\u001b[39mfunc,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 9421\u001b[0m kwargs\u001b[39m=\u001b[39mkwargs,\n\u001b[1;32m 9422\u001b[0m )\n\u001b[0;32m-> 9423\u001b[0m \u001b[39mreturn\u001b[39;00m op\u001b[39m.\u001b[39;49mapply()\u001b[39m.\u001b[39m__finalize__(\u001b[39mself\u001b[39m, method\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mapply\u001b[39m\u001b[39m\"\u001b[39m)\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/apply.py:678\u001b[0m, in \u001b[0;36mFrameApply.apply\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 675\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mraw:\n\u001b[1;32m 676\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mapply_raw()\n\u001b[0;32m--> 678\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_standard()\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/apply.py:798\u001b[0m, in \u001b[0;36mFrameApply.apply_standard\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 797\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mapply_standard\u001b[39m(\u001b[39mself\u001b[39m):\n\u001b[0;32m--> 798\u001b[0m results, res_index \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mapply_series_generator()\n\u001b[1;32m 800\u001b[0m \u001b[39m# wrap results\u001b[39;00m\n\u001b[1;32m 801\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mwrap_results(results, res_index)\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/apply.py:814\u001b[0m, in \u001b[0;36mFrameApply.apply_series_generator\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 811\u001b[0m \u001b[39mwith\u001b[39;00m option_context(\u001b[39m\"\u001b[39m\u001b[39mmode.chained_assignment\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39mNone\u001b[39;00m):\n\u001b[1;32m 812\u001b[0m \u001b[39mfor\u001b[39;00m i, v \u001b[39min\u001b[39;00m \u001b[39menumerate\u001b[39m(series_gen):\n\u001b[1;32m 813\u001b[0m \u001b[39m# ignore SettingWithCopy here in case the user mutates\u001b[39;00m\n\u001b[0;32m--> 814\u001b[0m results[i] \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mf(v)\n\u001b[1;32m 815\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(results[i], ABCSeries):\n\u001b[1;32m 816\u001b[0m \u001b[39m# If we have a view on v, we need to make a copy because\u001b[39;00m\n\u001b[1;32m 817\u001b[0m \u001b[39m# series_generator will swap out the underlying data\u001b[39;00m\n\u001b[1;32m 818\u001b[0m results[i] \u001b[39m=\u001b[39m results[i]\u001b[39m.\u001b[39mcopy(deep\u001b[39m=\u001b[39m\u001b[39mFalse\u001b[39;00m)\n", - "Cell \u001b[0;32mIn[61], line 16\u001b[0m, in \u001b[0;36mConfusionMatrixEvaluator._evaluate_single_row\u001b[0;34m(self, row)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_evaluate_single_row\u001b[39m(\u001b[39mself\u001b[39m, row):\n\u001b[1;32m 15\u001b[0m is_impossible \u001b[39m=\u001b[39m row[\u001b[39m\"\u001b[39m\u001b[39mis_impossible\u001b[39m\u001b[39m\"\u001b[39m]\n\u001b[0;32m---> 16\u001b[0m generated_answer \u001b[39m=\u001b[39m row[\u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49manswers_column]\u001b[39m.\u001b[39mlower()\n\u001b[1;32m 17\u001b[0m actual_answers \u001b[39m=\u001b[39m [ans\u001b[39m.\u001b[39mlower() \u001b[39mfor\u001b[39;00m ans \u001b[39min\u001b[39;00m row[\u001b[39m\"\u001b[39m\u001b[39manswers\u001b[39m\u001b[39m\"\u001b[39m]]\n\u001b[1;32m 19\u001b[0m y_pred \u001b[39m=\u001b[39m (\n\u001b[1;32m 20\u001b[0m \u001b[39m\"\u001b[39m\u001b[39mExpected and Right\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 21\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_impossible\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 29\u001b[0m \u001b[39melse\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39mDid not Expect and IDK\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 30\u001b[0m )\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/series.py:1007\u001b[0m, in \u001b[0;36mSeries.__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1004\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_values[key]\n\u001b[1;32m 1006\u001b[0m \u001b[39melif\u001b[39;00m key_is_scalar:\n\u001b[0;32m-> 1007\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_get_value(key)\n\u001b[1;32m 1009\u001b[0m \u001b[39mif\u001b[39;00m is_hashable(key):\n\u001b[1;32m 1010\u001b[0m \u001b[39m# Otherwise index.get_value will raise InvalidIndexError\u001b[39;00m\n\u001b[1;32m 1011\u001b[0m \u001b[39mtry\u001b[39;00m:\n\u001b[1;32m 1012\u001b[0m \u001b[39m# For labels that don't resolve as scalars like tuples and frozensets\u001b[39;00m\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/series.py:1116\u001b[0m, in \u001b[0;36mSeries._get_value\u001b[0;34m(self, label, takeable)\u001b[0m\n\u001b[1;32m 1113\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_values[label]\n\u001b[1;32m 1115\u001b[0m \u001b[39m# Similar to Index.get_value, but we do not fall back to positional\u001b[39;00m\n\u001b[0;32m-> 1116\u001b[0m loc \u001b[39m=\u001b[39m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mindex\u001b[39m.\u001b[39;49mget_loc(label)\n\u001b[1;32m 1118\u001b[0m \u001b[39mif\u001b[39;00m is_integer(loc):\n\u001b[1;32m 1119\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_values[loc]\n", - "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/pandas/core/indexes/base.py:3655\u001b[0m, in \u001b[0;36mIndex.get_loc\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 3653\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_engine\u001b[39m.\u001b[39mget_loc(casted_key)\n\u001b[1;32m 3654\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mKeyError\u001b[39;00m \u001b[39mas\u001b[39;00m err:\n\u001b[0;32m-> 3655\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mKeyError\u001b[39;00m(key) \u001b[39mfrom\u001b[39;00m \u001b[39merr\u001b[39;00m\n\u001b[1;32m 3656\u001b[0m \u001b[39mexcept\u001b[39;00m \u001b[39mTypeError\u001b[39;00m:\n\u001b[1;32m 3657\u001b[0m \u001b[39m# If we have a listlike key, _check_indexing_error will raise\u001b[39;00m\n\u001b[1;32m 3658\u001b[0m \u001b[39m# InvalidIndexError. Otherwise we fall through and re-raise\u001b[39;00m\n\u001b[1;32m 3659\u001b[0m \u001b[39m# the TypeError.\u001b[39;00m\n\u001b[1;32m 3660\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_check_indexing_error(key)\n", - "\u001b[0;31mKeyError\u001b[0m: 'generated_answer'" - ] - } - ], - "source": [ - "class ConfusionMatrixEvaluator:\n", - " def __init__(self, df, answers_column=\"generated_answer\"):\n", - " self.df = df\n", - " self.y_pred = []\n", - " self.labels = [\n", - " \"Expected and Right\",\n", - " \"Expected but IDK\",\n", - " \"Expected but Wrong\",\n", - " \"Hallucination\",\n", - " \"Did not Expect and IDK\",\n", - " ]\n", - " self.answers_column = answers_column\n", - "\n", - " def _evaluate_single_row(self, row):\n", - " is_impossible = row[\"is_impossible\"]\n", - " generated_answer = row[self.answers_column].lower()\n", - " actual_answers = [ans.lower() for ans in row[\"answers\"]]\n", - "\n", - " y_pred = (\n", - " \"Expected and Right\"\n", - " if not is_impossible\n", - " and any(ans in generated_answer for ans in actual_answers)\n", - " else \"Expected but IDK\"\n", - " if not is_impossible and generated_answer == \"i don't know\"\n", - " else \"Expected but Wrong\"\n", - " if not is_impossible and generated_answer not in actual_answers\n", - " else \"Hallucination\"\n", - " if is_impossible and generated_answer != \"i don't know\"\n", - " else \"Did not Expect and IDK\"\n", - " )\n", - " return y_pred\n", - "\n", - " def evaluate_answers(self):\n", - " self.y_pred = self.df.apply(self._evaluate_single_row, axis=1)\n", - "\n", - " def generate_matrices(self, use_percentages=False):\n", - " # Using value_counts to create a Series of frequencies, then reindexing to include missing labels with count 0\n", - " freq_series = self.y_pred.value_counts().reindex(self.labels, fill_value=0)\n", - " if use_percentages:\n", - " total = freq_series.sum()\n", - " freq_series = (freq_series / total * 100).apply(\"{0:.2f}%\".format)\n", - " return freq_series\n", - "\n", - "\n", - "evaluator = ConfusionMatrixEvaluator(df, answers_column=\"generated_answer\")\n", - "evaluator.evaluate_answers()\n", - "error_categories = evaluator.generate_matrices(use_percentages=True)\n", - "error_categories" - ] - }, - { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -350,21 +247,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'get_diverse_sample' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 20\u001b[0m\n\u001b[1;32m 17\u001b[0m jsonl_output \u001b[39m=\u001b[39m df\u001b[39m.\u001b[39mprogress_apply(create_jsonl_entry, axis\u001b[39m=\u001b[39m\u001b[39m1\u001b[39m)\n\u001b[1;32m 18\u001b[0m \u001b[39mreturn\u001b[39;00m \u001b[39m\"\u001b[39m\u001b[39m\\n\u001b[39;00m\u001b[39m\"\u001b[39m\u001b[39m.\u001b[39mjoin(jsonl_output)\n\u001b[0;32m---> 20\u001b[0m train_sample \u001b[39m=\u001b[39m get_diverse_sample(train_df, sample_size\u001b[39m=\u001b[39m\u001b[39m100\u001b[39m, random_state\u001b[39m=\u001b[39m\u001b[39m42\u001b[39m)\n\u001b[1;32m 22\u001b[0m \u001b[39mwith\u001b[39;00m \u001b[39mopen\u001b[39m(\u001b[39m\"\u001b[39m\u001b[39mlocal_cache/100_train.jsonl\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mw\u001b[39m\u001b[39m\"\u001b[39m) \u001b[39mas\u001b[39;00m f:\n\u001b[1;32m 23\u001b[0m f\u001b[39m.\u001b[39mwrite(dataframe_to_jsonl(train_sample))\n", - "\u001b[0;31mNameError\u001b[0m: name 'get_diverse_sample' is not defined" - ] - } - ], + "outputs": [], "source": [ "def dataframe_to_jsonl(df):\n", " def create_jsonl_entry(row):\n", @@ -392,7 +277,6 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -403,45 +287,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Num examples: 100\n", - "First example:\n", - "{'role': 'system', 'content': 'You are a helpful assistant.'}\n", - "{'role': 'user', 'content': \"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\\n Question: What is a cirque?\\n\\n\\n Context: Glaciers form where the accumulation of snow and ice exceeds ablation. The area in which a glacier forms is called a cirque (corrie or cwm) - a typically armchair-shaped geological feature (such as a depression between mountains enclosed by arêtes) - which collects and compresses through gravity the snow which falls into it. This snow collects and is compacted by the weight of the snow falling above it forming névé. Further crushing of the individual snowflakes and squeezing the air from the snow turns it into 'glacial ice'. This glacial ice will fill the cirque until it 'overflows' through a geological weakness or vacancy, such as the gap between two mountains. When the mass of snow and ice is sufficiently thick, it begins to move due to a combination of surface slope, gravity and pressure. On steeper slopes, this can occur with as little as 15 m (50 ft) of snow-ice.\\n\\n\\n Answer:\\n\"}\n", - "{'role': 'assistant', 'content': 'The area in which a glacier forms'}\n", - "No errors found\n", - "Num examples missing system message: 0\n", - "Num examples missing user message: 0\n", - "\n", - "#### Distribution of num_messages_per_example:\n", - "min / max: 3, 3\n", - "mean / median: 3.0, 3.0\n", - "p5 / p95: 3.0, 3.0\n", - "\n", - "#### Distribution of num_total_tokens_per_example:\n", - "min / max: 114, 689\n", - "mean / median: 236.88, 217.0\n", - "p5 / p95: 166.7, 321.3\n", - "\n", - "#### Distribution of num_assistant_tokens_per_example:\n", - "min / max: 1, 13\n", - "mean / median: 3.81, 4.0\n", - "p5 / p95: 1.0, 5.0\n", - "\n", - "0 examples may be over the 4096 token limit, they will be truncated during fine-tuning\n", - "Dataset has ~23688 tokens that will be charged for during training\n", - "By default, you'll train for 3 epochs on this dataset\n", - "By default, you'll be charged for ~71064 tokens\n", - "See pricing page to estimate total costs\n" - ] - } - ], + "outputs": [], "source": [ "# Specify the data path and open the JSONL file\n", "\n", @@ -580,16 +428,15 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "### Upload the Fine-Tuning data to OpenAI" + "### Upload the Fine-Tuning Data to OpenAI" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -599,31 +446,18 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "💡 Wait: For file to be uploaded and then processed by OpenAI." + ] + }, { "cell_type": "code", - "execution_count": 51, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " JSON: {\n", - " \"object\": \"file\",\n", - " \"id\": \"file-iuSjUY6kK84A1cOB9QffTxfD\",\n", - " \"purpose\": \"fine-tune\",\n", - " \"filename\": \"file\",\n", - " \"bytes\": 120415,\n", - " \"created_at\": 1694012894,\n", - " \"status\": \"processed\",\n", - " \"status_details\": null\n", - "}" - ] - }, - "execution_count": 51, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "while file_object.status!='processed':\n", " time.sleep(5)\n", @@ -632,16 +466,15 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "### Create Fine Tuning Job" + "### Create a Fine Tuning Job" ] }, { "cell_type": "code", - "execution_count": 52, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -650,70 +483,18 @@ ")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "💡 Wait: For the fine-tuning job to complete and status to be \"succeeded\"." + ] + }, { "cell_type": "code", - "execution_count": 58, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: succeeded\n" - ] - } - ], + "outputs": [], "source": [ "while ft_job.status!='succeeded':\n", " time.sleep(15)\n", @@ -723,27 +504,15 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'ft:gpt-3.5-turbo-0613:qdrant:100train20230906:7vp2AzMY'" - ] - }, - "execution_count": 63, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model_id = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model\n", "model_id" ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ @@ -752,20 +521,9 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"role\": \"assistant\",\n", - " \"content\": \"I don't know\"\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "completion = openai.ChatCompletion.create(\n", " model=model_id,\n", @@ -784,70 +542,118 @@ ] }, { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "## Comparison\n", - "\n", - "### Get Answers from the Fine-Tuned Model" + "### Using the Fine-Tuned Model" ] }, { "cell_type": "code", - "execution_count": 61, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 100/100 [05:16<00:00, 3.17s/it]\n" - ] - } - ], + "outputs": [], "source": [ "df[\"ft_generated_answer\"] = df.progress_apply(answer_question, model=model_id, axis=1)" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluate Baseline Model Performance\n", + "\n", + "To evaluate the model's performance, compare the predicted answer to the actual answers -- if any of the actual answers are present in the predicted answer, then it's a match. We've also created error categories to help you understand where the model is struggling.\n", + "\n", + "1. Expected and Right: The model responsded the correct answer. It may have also included other answers that were not in the context.\n", + "2. Expected but \"IDK\": The model responded with \"I don't know\" (IDK) while the answer was present in the context. *This is a model error* and better than giving the wrong answer. We exclude this from the overall error rate.\n", + "3. Expected but Wrong: The model responded with an incorrect answer. *This is a model ERROR.*\n", + "4. Hallucination: The model responded with an answer, when \"I don't know\" was expected. **This is a model error.** \n", + "5. Did not expect and IDK: The model responded with \"I don't know\" (IDK) and the answer was not present in the context. *This is a model WIN.*" + ] + }, { "cell_type": "code", - "execution_count": 62, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Expected and Right 30.00%\n", - "Expected but IDK 21.00%\n", - "Expected but Wrong 2.00%\n", - "Hallucination 5.00%\n", - "Did not Expect and IDK 42.00%\n", - "Name: count, dtype: object" - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "# Initialize the evaluator\n", - "finetuned_model_evaluator = ConfusionMatrixEvaluator(\n", - " df, answers_column=\"ft_generated_answer\"\n", - ")\n", + "class ConfusionMatrixEvaluator:\n", + " def __init__(self, df, answers_column=\"generated_answer\"):\n", + " self.df = df\n", + " self.y_pred = []\n", + " self.labels = [\n", + " \"Expected and Right\",\n", + " \"Expected but IDK\",\n", + " \"Expected but Wrong\",\n", + " \"Hallucination\",\n", + " \"Did not Expect and IDK\",\n", + " ]\n", + " self.answers_column = answers_column\n", "\n", - "# Run the evaluation\n", - "finetuned_model_evaluator.evaluate_answers()\n", - "finetuned_model_error_categories = finetuned_model_evaluator.generate_matrices(\n", - " use_percentages=True\n", - ")\n", - "finetuned_model_error_categories" + " def _evaluate_single_row(self, row):\n", + " is_impossible = row[\"is_impossible\"]\n", + " generated_answer = row[self.answers_column].lower()\n", + " actual_answers = [ans.lower() for ans in row[\"answers\"]]\n", + "\n", + " y_pred = (\n", + " \"Expected and Right\"\n", + " if not is_impossible\n", + " and any(ans in generated_answer for ans in actual_answers)\n", + " else \"Expected but IDK\"\n", + " if not is_impossible and generated_answer == \"i don't know\"\n", + " else \"Expected but Wrong\"\n", + " if not is_impossible and generated_answer not in actual_answers\n", + " else \"Hallucination\"\n", + " if is_impossible and generated_answer != \"i don't know\"\n", + " else \"Did not Expect and IDK\"\n", + " )\n", + " return y_pred\n", + "\n", + " def evaluate_answers(self):\n", + " self.y_pred = self.df.apply(self._evaluate_single_row, axis=1)\n", + "\n", + " def generate_matrices(self, use_percentages=False):\n", + " # Using value_counts to create a Series of frequencies, then reindexing to include missing labels with count 0\n", + " freq_series = self.y_pred.value_counts().reindex(self.labels, fill_value=0)\n", + " if use_percentages:\n", + " total = freq_series.sum()\n", + " freq_series = (freq_series / total * 100).apply(\"{0:.2f}%\".format)\n", + " return freq_series\n", + "\n", + "\n", + "def evaluate_model(df, answers_column):\n", + " \"\"\"\n", + " Evaluate the confusion matrix for a given DataFrame and answer column.\n", + " \"\"\"\n", + " evaluator = ConfusionMatrixEvaluator(df, answers_column=answers_column)\n", + " evaluator.evaluate_answers()\n", + " error_categories = evaluator.generate_matrices(use_percentages=True)\n", + " return error_categories" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "evaluate_model(df, \"generated_answer\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "evaluate_model(df, \"ft_generated_answer\")" ] }, { "cell_type": "code", - "execution_count": 64, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -865,62 +671,27 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "# Importing required libraries for plotting\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "\n", - "\n", - "def evaluate_matrix(df, answers_column):\n", - " \"\"\"\n", - " Evaluate the confusion matrix for a given DataFrame and answer column.\n", - " \"\"\"\n", - " evaluator = ConfusionMatrixEvaluator(df, answers_column=answers_column)\n", - " evaluator.evaluate_answers()\n", - " matrix = evaluator.generate_matrices(use_percentages=True)\n", - " return matrix\n", - "\n", - "\n", - "def plot_overall_error(matrix1, matrix2, label1, label2):\n", - " \"\"\"\n", - " Plot a bar chart showing only the overall error between two confusion matrices.\n", - " \"\"\"\n", - " # Calculate overall error\n", + "def plot_overall_error(df, answer_columns):\n", " error_categories = [\"Expected but Wrong\", \"Hallucination\"]\n", - " matrix1_error = sum(\n", - " [float(matrix1.loc[cat].replace(\"%\", \"\")) for cat in error_categories]\n", - " )\n", - " matrix2_error = sum(\n", - " [float(matrix2.loc[cat].replace(\"%\", \"\")) for cat in error_categories]\n", - " )\n", - "\n", " labels = [\"Overall Error\"]\n", - " matrix1_values = [matrix1_error]\n", - " matrix2_values = [matrix2_error]\n", - "\n", " x = np.arange(len(labels))\n", - " width = 0.35\n", + " width = 0.35 / len(answer_columns)\n", "\n", " fig, ax = plt.subplots()\n", - " ax.bar(x - width / 2, matrix1_values, width, label=label1)\n", - " ax.bar(x + width / 2, matrix2_values, width, label=label2)\n", + "\n", + " for i, col in enumerate(answer_columns):\n", + " results = evaluate_model(df, col)\n", + " matrix_error = sum(\n", + " [float(results[col].loc[cat].replace(\"%\", \"\")) for cat in error_categories]\n", + " )\n", + " ax.bar(x + (i - len(answer_columns)/2) * width, [matrix_error], width, label=col)\n", "\n", " ax.set_ylabel(\"Error (%) - Lower is Better\")\n", - " ax.set_title(\"Comparison between {} and {}\".format(label1, label2))\n", + " ax.set_title(\"Overall Error Comparison\")\n", " ax.set_xticks(x)\n", " ax.set_xticklabels(labels)\n", " ax.legend()\n", @@ -928,11 +699,8 @@ " fig.tight_layout()\n", " plt.show()\n", "\n", - "\n", - "matrix_plain = evaluate_matrix(df, \"generated_answer\")\n", - "matrix_ft = evaluate_matrix(df, \"ft_generated_answer\")\n", "# Plot only the overall error\n", - "plot_overall_error(matrix_plain, matrix_ft, \"gpt-3.5-turbo-0613\", \"Fine Tuned\")" + "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"])" ] }, { @@ -940,131 +708,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Few Shot Learning with Qdrant to Improve RAG\n", + "# Few Shot Learning\n", "\n", - "So far, we've been using the OpenAI model to answer questions where the answer is present in the context. But what if we want to answer questions where the answer is not present in the context? This is where few-shot learning comes in. Few-shot learning is a type of transfer learning that allows us to answer questions where the answer is not present in the context. We can do this by providing a few examples of the answer we're looking for, and the model will learn to answer questions where the answer is not present in the context." - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Few Shot Prompting\n", "\n", - "We'll select a few examples from the dataset, including cases where the answer is not present in the context. We'll then use these examples to create a prompt that we can use to fine-tune the model.\n", + "We'll select a few examples from the dataset, including cases where the answer is not present in the context. We'll then use these examples to create a prompt that we can use to fine-tune the model. We'll then measure the performance of the fine-tuned model.\n", "\n", - "We'll measure the baseline on our previous 1K dataset, and then we'll fine-tune the model on the new dataset. We'll then measure the performance of the fine-tuned model on the same 1K dataset." - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
titlequestioncontextis_impossibleanswers
0BeyoncéWhen did Beyonce start becoming popular?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[in the late 1990s]
1BeyoncéWhat areas did Beyonce compete in when she was...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[singing and dancing]
2BeyoncéWhen did Beyonce leave Destiny's Child and bec...Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[2003]
3BeyoncéIn what city and state did Beyonce grow up?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[Houston, Texas]
4BeyoncéIn which decade did Beyonce become famous?Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b...False[late 1990s]
\n", - "
" - ], - "text/plain": [ - " title question \\\n", - "0 Beyoncé When did Beyonce start becoming popular? \n", - "1 Beyoncé What areas did Beyonce compete in when she was... \n", - "2 Beyoncé When did Beyonce leave Destiny's Child and bec... \n", - "3 Beyoncé In what city and state did Beyonce grow up? \n", - "4 Beyoncé In which decade did Beyonce become famous? \n", - "\n", - " context is_impossible \\\n", - "0 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", - "1 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", - "2 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", - "3 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", - "4 Beyoncé Giselle Knowles-Carter (/biːˈjɒnseɪ/ b... False \n", - "\n", - " answers \n", - "0 [in the late 1990s] \n", - "1 [singing and dancing] \n", - "2 [2003] \n", - "3 [Houston, Texas] \n", - "4 [late 1990s] " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train_df.head()" + "## 5. Fine-Tuning OpenAI Model with Qdrant\n", + "\n", + "So far, we've been using the OpenAI model to answer questions where the answer is present in the context. But what if we want to answer questions where the answer is not present in the context? This is where few-shot learning comes in. Few-shot learning is a type of transfer learning that allows us to answer questions where the answer is not present in the context. We can do this by providing a few examples of the answer we're looking for, and the model will learn to answer questions where the answer is not present in the context." ] }, { @@ -1079,7 +730,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1108,7 +759,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1118,7 +769,6 @@ "\n", "# collection_name = \"squadv2-cookbook\"\n", "\n", - "\n", "# # Create the collection\n", "# qdrant_client.recreate_collection(\n", "# collection_name=collection_name,\n", @@ -1128,7 +778,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1156,38 +806,9 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "994a3daab89c4e64b78caf39bcfee553", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - "Generating embeddings: 0%| | 0/130319 [00:00 List[PointStruct]:\n", "# batch_size = 512\n", @@ -1249,17 +870,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "operation_id=0 status=\n" - ] - } - ], + "outputs": [], "source": [ "# operation_info = qdrant_client.upsert(\n", "# collection_name=collection_name, wait=True, points=points\n", @@ -1267,15 +880,6 @@ "# print(operation_info)" ] }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "train_sample = get_diverse_sample(train_df, sample_size=100, random_state=42)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -1287,24 +891,9 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ff39612710a749fdae166745b2bd2d5a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/100 [00:00 Date: Thu, 7 Sep 2023 18:39:59 +0530 Subject: [PATCH 19/38] =?UTF-8?q?Add=20pretty=20plots=20=E2=9C=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 554 ++++++++++++++++---- 1 file changed, 461 insertions(+), 93 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 4f412c7cbd..78f2c91a28 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -37,16 +37,16 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 76, "metadata": {}, "outputs": [], "source": [ - "# !pip install pandas openai tqdm tenacity matplotlib scikit-learn tiktoken python-dotenv " + "# !pip install pandas openai tqdm tenacity scikit-learn tiktoken python-dotenv seaborn --upgrade --quiet" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 78, "metadata": {}, "outputs": [], "source": [ @@ -57,6 +57,7 @@ "import pandas as pd\n", "import openai\n", "import tiktoken\n", + "import seaborn as sns\n", "from tenacity import retry, wait_exponential\n", "from tqdm import tqdm\n", "from collections import defaultdict\n", @@ -214,7 +215,7 @@ "name": "stderr", "output_type": "stream", "text": [ - " 25%|██▌ | 25/100 [00:33<01:35, 1.28s/it]" + "100%|██████████| 100/100 [02:21<00:00, 1.41s/it]\n" ] } ], @@ -225,7 +226,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -247,9 +248,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 100/100 [00:00<00:00, 68067.25it/s]\n" + ] + } + ], "source": [ "def dataframe_to_jsonl(df):\n", " def create_jsonl_entry(row):\n", @@ -287,9 +296,45 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Num examples: 100\n", + "First example:\n", + "{'role': 'system', 'content': 'You are a helpful assistant.'}\n", + "{'role': 'user', 'content': \"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\\n Question: What is a cirque?\\n\\n\\n Context: Glaciers form where the accumulation of snow and ice exceeds ablation. The area in which a glacier forms is called a cirque (corrie or cwm) - a typically armchair-shaped geological feature (such as a depression between mountains enclosed by arêtes) - which collects and compresses through gravity the snow which falls into it. This snow collects and is compacted by the weight of the snow falling above it forming névé. Further crushing of the individual snowflakes and squeezing the air from the snow turns it into 'glacial ice'. This glacial ice will fill the cirque until it 'overflows' through a geological weakness or vacancy, such as the gap between two mountains. When the mass of snow and ice is sufficiently thick, it begins to move due to a combination of surface slope, gravity and pressure. On steeper slopes, this can occur with as little as 15 m (50 ft) of snow-ice.\\n\\n\\n Answer:\\n\"}\n", + "{'role': 'assistant', 'content': 'The area in which a glacier forms'}\n", + "No errors found\n", + "Num examples missing system message: 0\n", + "Num examples missing user message: 0\n", + "\n", + "#### Distribution of num_messages_per_example:\n", + "min / max: 3, 3\n", + "mean / median: 3.0, 3.0\n", + "p5 / p95: 3.0, 3.0\n", + "\n", + "#### Distribution of num_total_tokens_per_example:\n", + "min / max: 114, 689\n", + "mean / median: 236.88, 217.0\n", + "p5 / p95: 166.7, 321.3\n", + "\n", + "#### Distribution of num_assistant_tokens_per_example:\n", + "min / max: 1, 13\n", + "mean / median: 3.81, 4.0\n", + "p5 / p95: 1.0, 5.0\n", + "\n", + "0 examples may be over the 4096 token limit, they will be truncated during fine-tuning\n", + "Dataset has ~23688 tokens that will be charged for during training\n", + "By default, you'll train for 3 epochs on this dataset\n", + "By default, you'll be charged for ~71064 tokens\n", + "See pricing page to estimate total costs\n" + ] + } + ], "source": [ "# Specify the data path and open the JSONL file\n", "\n", @@ -436,7 +481,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ @@ -455,9 +500,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + " JSON: {\n", + " \"object\": \"file\",\n", + " \"id\": \"file-X0QsHJXqT3DrG1OaXHKMnwXg\",\n", + " \"purpose\": \"fine-tune\",\n", + " \"filename\": \"file\",\n", + " \"bytes\": 120415,\n", + " \"created_at\": 1694085592,\n", + " \"status\": \"processed\",\n", + " \"status_details\": null\n", + "}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "while file_object.status!='processed':\n", " time.sleep(5)\n", @@ -474,7 +539,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -492,9 +557,93 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: running\n", + "Status: succeeded\n" + ] + } + ], "source": [ "while ft_job.status!='succeeded':\n", " time.sleep(15)\n", @@ -504,9 +653,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'ft:gpt-3.5-turbo-0613:qdrant:100train20230906:7w7eYRbu'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model_id = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model\n", "model_id" @@ -521,9 +681,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\n", + " \"role\": \"assistant\",\n", + " \"content\": \"I don't know\"\n", + "}\n" + ] + } + ], "source": [ "completion = openai.ChatCompletion.create(\n", " model=model_id,\n", @@ -550,9 +721,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 100/100 [04:54<00:00, 2.94s/it]\n" + ] + } + ], "source": [ "df[\"ft_generated_answer\"] = df.progress_apply(answer_question, model=model_id, axis=1)" ] @@ -575,7 +754,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -635,25 +814,57 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Expected and Right 43.00%\n", + "Expected but IDK 0.00%\n", + "Expected but Wrong 10.00%\n", + "Hallucination 47.00%\n", + "Did not Expect and IDK 0.00%\n", + "Name: count, dtype: object" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "evaluate_model(df, \"generated_answer\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "Expected and Right 34.00%\n", + "Expected but IDK 17.00%\n", + "Expected but Wrong 2.00%\n", + "Hallucination 8.00%\n", + "Did not Expect and IDK 39.00%\n", + "Name: count, dtype: object" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "evaluate_model(df, \"ft_generated_answer\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -671,36 +882,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 144, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "def plot_overall_error(df, answer_columns):\n", + "def plot_overall_error(df, answer_columns, labels):\n", " error_categories = [\"Expected but Wrong\", \"Hallucination\"]\n", - " labels = [\"Overall Error\"]\n", - " x = np.arange(len(labels))\n", - " width = 0.35 / len(answer_columns)\n", - "\n", - " fig, ax = plt.subplots()\n", - "\n", + " \n", + " # Prepare data for Seaborn\n", + " data = []\n", " for i, col in enumerate(answer_columns):\n", " results = evaluate_model(df, col)\n", - " matrix_error = sum(\n", - " [float(results[col].loc[cat].replace(\"%\", \"\")) for cat in error_categories]\n", - " )\n", - " ax.bar(x + (i - len(answer_columns)/2) * width, [matrix_error], width, label=col)\n", + " matrix_error = sum([float(results.loc[cat].replace(\"%\", \"\")) for cat in error_categories])\n", + " data.append([labels[i], matrix_error])\n", "\n", - " ax.set_ylabel(\"Error (%) - Lower is Better\")\n", - " ax.set_title(\"Overall Error Comparison\")\n", - " ax.set_xticks(x)\n", - " ax.set_xticklabels(labels)\n", - " ax.legend()\n", + " df_plot = pd.DataFrame(data, columns=[\"Model\", \"Overall Error\"])\n", + " \n", + " # Create the plot\n", + " plt.figure(figsize=(10, 6))\n", + " ax = sns.barplot(x=\"Overall Error\", y=\"Model\", data=df_plot, palette=\"icefire\")\n", + " \n", + " # Add annotations\n", + " for i, p in enumerate(ax.patches):\n", + " ax.text(p.get_width() - 6, p.get_y() + p.get_height() / 2, f\"{p.get_width():.0f}%\", \n", + " va='center', color='white', fontweight='bold')\n", "\n", - " fig.tight_layout()\n", + " plt.xlabel(\"Error (%) - Lower is Better\")\n", + " plt.title(\"Total Error Comparison\")\n", " plt.show()\n", "\n", "# Plot only the overall error\n", - "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"])" + "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], labels=[\"Base gpt-3.5-turbo-0613 Model\", \"Fine-Tuned Model\"])\n" ] }, { @@ -739,7 +962,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 51, "metadata": {}, "outputs": [], "source": [ @@ -759,7 +982,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 57, "metadata": {}, "outputs": [], "source": [ @@ -767,7 +990,7 @@ " url=os.getenv(\"QDRANT_URL\"), api_key=os.getenv(\"QDRANT_API_KEY\"), timeout=6000, prefer_grpc=True\n", ")\n", "\n", - "# collection_name = \"squadv2-cookbook\"\n", + "collection_name = \"squadv2-cookbook\"\n", "\n", "# # Create the collection\n", "# qdrant_client.recreate_collection(\n", @@ -778,7 +1001,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ @@ -806,7 +1029,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ @@ -870,7 +1093,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -891,9 +1114,24 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "ca88ff9dbfe24e9592ca03fc5c33c80e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/100 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "# Evaluate this using the Evaluator\n", + "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], labels=[\"Base Model\", \"Fine-Tuned Model\", \"Fine-Tuned Model with Few-Shot\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: The figure layout has changed to tight\n", + " self._figure.tight_layout(*args, **kwargs)\n", + "/var/folders/b4/grpbcmrd36gc7q5_11whbn540000gn/T/ipykernel_3534/765969223.py:24: UserWarning: The figure layout has changed to tight\n", + " plt.tight_layout()\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABBwAAAJYCAYAAADBgvVFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAADBq0lEQVR4nOzdd1gUV//+8XsRUCyoJIoNu2AXbNixIJZorJHEFo0xJoolauwxxsRoEmPBhjWW2GvUaOy9V+zmsaFoYsOGIAjs7w9/zNcNaBAXUXm/rstL98yZmc8s8zxh7z1zjslsNpsFAAAAAABgRTbJXQAAAAAAAHj7EDgAAAAAAACrI3AAAAAAAABWR+AAAAAAAACsjsABAAAAAABYHYEDAAAAAACwOgIHAAAAAABgdQQOAAAAAADA6ggcAAAAAACA1RE4AACAt16/fv3k5uamLVu2xLu9WbNmcnNzU6tWreLdvmLFCrm5uWnUqFGSpDZt2sjNzU33799PspoTa9y4cXJzc/vPPzVr1kzuUhMlIiJCM2bMsGiL/fmePn06mapKPvXr15ebm5uGDBmS3KUAQBy2yV0AAABAUvP09NTy5ct19OhR1ahRw2Lb3bt3derUKdnY2CgwMFAPHz5UunTpLPocOnRIklSxYsVXVvPLqlWrlooUKfLM7RkyZHiF1VhP69atdfHiRX3yySdGm7e3t3LmzKl33303GSt79Y4dO6bz58/LwcFBq1evVr9+/ZQmTZrkLgsADAQOAADgrefp6SlJCgwMjLNtz549iomJUZ06dbRu3Trt378/Tihx6NAhpU6dWqVLl34l9VqDt7e3mjZtmtxlWN3t27fjtHl7e8vb2zsZqklev//+u0wmkzp06KDx48frzz//VOPGjZO7LAAw8EgFAAB46+XIkUMuLi46duyYYmJiLLbt3r1btra26tKliyRp165dFtvv3r2rCxcuyMPDQ6lTp35lNQPP8/jxY/3xxx9ydXWVr6+vbGxstHjx4uQuCwAsEDgAAIAUwdPTUw8fPtT//vc/i/Zdu3apZMmScnNzk4uLi3bv3m2x/fDhwzKbzfE+TnH9+nX16dNHnp6ecnd310cffaQ9e/bE6RcZGanJkyerfv36KlGihCpWrKhevXrpypUrFv2WLVsmNzc37dmzR9OnT5ePj4+KFy8ub29vTZo0SdHR0VZ4JyzduXNHlStXVtGiRXXy5EmLbV9//bXc3Nw0adIki/p27Nih8ePHq2rVqnJ3d5evr+8z58dYu3atPvzwQ3l4eKh06dL6+OOPtXfv3nj7bty4UW3atFHZsmXl6empdu3a6cCBA5Kk4OBgubm56erVq3rw4IHc3NzUr18/SXHncIjtO27cOG3atEnNmzdXyZIlVbFiRQ0aNEghISFxzv3nn3+qefPm8vDwUNWqVTVy5Ejt3r1bbm5uWrZs2X++jyEhIfrxxx9Vr149lSpVSqVKldJ7772ngIAARUVFGf327dtnHHPJkiVq2LChSpQooWrVqunHH39UeHj4f55LkrZt26Y7d+6oSpUqypo1q8qWLauDBw/q4sWLcfq+6H21c+dOffzxx6pYsaJKliyphg0bavLkyYqMjJQkzZ07V25ublq6dKnFfnPmzJGbm5u++uori/bTp0/Lzc1NY8eONdpu3rypIUOGqFq1aipevLhq1qypn3/+WaGhoRb7tmnTRjVr1tS2bdtUs2ZNlSpVSt27d0/QewQg+RE4AACAFKF8+fKSpKNHjxptQUFBunr1qipVqiRJqlSpks6fP6/r168bfZ43f8PHH3+sEydOqGnTpqpVq5YCAwPVoUMHiw/ujx8/VseOHTVq1CilS5dOrVu3VtWqVbV+/Xo1b95cf/31V5zj/vzzzxo/frzKlCmjVq1a6dGjRxozZoz8/f2t8l48LXPmzPrmm28UHR2tb775xhgBsmPHDi1atEgeHh767LPPLPYZPXq0pkyZomrVqun999/XpUuX9MUXX8T5ADp27Fj16NFDN27cUJMmTdSkSROdO3dO7du31++//27Rd/LkyerSpYvOnz+vOnXq6L333tOpU6fUrl077dq1S46OjvLz81OGDBlkb28vPz+//3yMYsuWLfLz81OWLFnUpk0bOTs7a/HixercubNFv1mzZql79+76559/1KhRI1WrVk2//fabBg8enKD38MGDB2rRooVmz56tggULqm3btmrQoIFu3ryp0aNH65dffomzz2+//aYhQ4aoUKFCatOmjVKnTq0ZM2Zo0KBBCTpn7PtXv359i7+fN8ohIffVwYMH9fnnn+vChQuqX7++WrdurVSpUmnUqFHGxJTVqlWTpDjBUezrgwcPWrTv2LFDkoxHla5du6bmzZtrwYIFKlasmNq1a6d8+fJp2rRpatOmjcLCwiz2v3Pnjnr06KHSpUurSZMmKlu2bILeIwCvATMAAEAK8M8//5hdXV3N/fr1M9rmzp1rdnV1NR84cMBsNpvNf/zxh9nV1dW8dOlSo4+vr6+5TJky5qioKKOtdevWZldXV3PHjh3NkZGRRvuvv/5qdnV1NX/33XdG29SpU82urq7mn376yaKeY8eOmYsVK2Zu1qyZ0bZ06VKzq6uruUyZMuZLly4Z7VeuXDEXK1bMXKlSpf+8Tn9/f7Orq6v5iy++MPv7+z/zz7lz5yz269Gjh9nV1dX822+/me/fv2+uVq2a2d3d3aKO2PqKFCliPnLkiNEeFBRkLlu2rLlMmTLme/fumc1mszkwMNDs5uZmbt26tTksLMzoGxISYq5du7a5VKlS5tu3b5vNZrP5woUL5qJFi5rr1q1rvnHjhtH30qVLZnd3d3ODBg2Mtho1apjLlCljUXvfvn3Nrq6u5lOnThnvl6urq9nV1dW8Zs0ao19kZKT5vffeM7u6uhrX//fff5tLlChh9vb2Nt+8edPoe/LkSXOxYsXi3A/xmTx5stnV1dW8aNEii/Zr166Zixcvbq5cubLRtnfvXuM9PHz4sNF+//59c4UKFcxFixY1h4aGPvd8d+/eNRcrVszs4+NjtIWEhBj3yNP3pNn8YvdV165dza6urubLly9bvG+NGjUyFylSxPzgwQOz2Ww216lTx1ylShWjT3R0tLlcuXJmd3d3s6urqzk4ONjY1rp1a3PFihXNMTExZrPZbO7YsaPZzc3NvGXLFos6Z82aZXZ1dTX/+OOPFvu6urqahw8f/tz3BMDriREOAAAgRXB2dlbevHktRjjs3r1badOmlbu7uySpQoUKMplMxmMVkZGROnnypMqVK6dUqVLFOeZnn30mOzs743XsUpPBwcFG25IlS+To6Kgvv/zSYt8SJUqobt26On78eJzHPHx8fJQnTx7jda5cuVSgQAHdunVLERERCbreTZs2afz48c/88++h919//bWcnJzk7++vr7/+Wv/884/69u1rUUes+vXrG++ZJOXOnVutWrXSgwcPtHXrVuO6zWaz+vTpIwcHB6Nv5syZ1bFjR4WHh2vt2rWSnjzOEBUVpc6dOytLlixG3zx58qhv375q1qyZHj9+nKDrfpqLi4vq1atnvLazszNGqly9elXSk0c+IiIi1KlTJ4tVLooWLaomTZok6DxVqlTRt99+G2fCxuzZs8vFxSXeRzjKlSsnDw8P43WGDBnk4eGhqKgo/fPPP88935o1a/T48WO99957RlvmzJlVuXJl3bp165mPtyTkvood4XL8+HGjn52dnaZOnap9+/Ypffr0kiQvLy/duHFD58+flySdOnVK9+7dU4sWLSTJeBQmNDRUR44cUbVq1WQymXTjxg1t375dXl5eql69ukV9rVu3Vvbs2bV8+fJ4awfw5mGVCgAAkGJ4enpq0aJFun//vtKlS6d9+/apfPnysrV98iuRk5OTChcurP3790t6suxgZGTkM5fD/PeH8UyZMkmSHj58aPx98eJFZcmSxZgH4Wm3bt2S9OQZ90KFChntefPmjdM3dhnLyMjIBE1eOXz48BdapcLJyUmDBw9Wjx49tHbtWnl5eenDDz+Mt2/s4ylPK1mypCTpzJkzev/9943HStavX2+EELFiP1DHzrlw5swZSbIIMWI9q4aE+K/3Ufq/D9ax9T+tdOnSWrRo0X+ep2jRoipatKgePnyowMBABQUF6dKlSzp+/LiCgoLinXvjebX9V7gS+zjF04GDJDVs2FBbt27V4sWL4/2AnpD76oMPPtDGjRv15ZdfauzYsapataqqVaumChUqyN7e3tjPy8tLM2fO1J49e1SgQAHt3btXNjY2ateunebMmaODBw+qcePG2rt3rx4/fmyEC6dOnZLZbNbdu3c1bty4OPXY2dnp77//1vXr1+Xs7Gy058qV67nvCYDXE4EDAABIMTw9PbVw4UIdPXpUjo6Oun//fpwwoWLFipoxY4YuX76sw4cPG23x+a8P/rET4N28eVPjx49/Zr979+5ZvH76g10sk8kkSTKbzc8958uoXLmy0qVLp4cPH8b74T/W0x8EY8WODoi95gcPHkiSpkyZ8szjxF73/fv3Jcn49txanvc+xrpz544kWYxuiJU1a9YEnSciIkKjRo3SwoULjUkfnZ2dVa5cOWXOnFk3b958odqe9zMOCgrSkSNHJP3fvA3/tnPnTv3zzz/Kli3bC5/Ty8tLs2fP1vTp07V7927NmTNHc+bMUaZMmeTn56c2bdpIksqWLau0adNqz549at26tfbu3avChQsre/bsKlq0qDHCYceOHbKzs1OVKlUk/d/P+ujRoxajjf7t7t27FvdZmjRpntkXwOuLwAEAAKQYsd/MnzhxQjY2T54sfVbgcPjwYR0+fFhZsmSxGH3wItKmTSvpyYezuXPnvkTlr8YPP/yghw8fKlOmTAoICFCdOnVUoECBOP0ePXoUpy02YMicObOkJ9eeKlUqBQYGWjx2Ep/Y9+nhw4fG/k+fy97e3vh5WVtsyPHw4UM5OTlZbPv3ignPMmLECM2bN0916tRRq1at5ObmZox2qVevXryBQ2KtWLFC0pPHf+J73OXEiRM6efKklixZIj8/v0Sdo3z58ipfvrzCwsJ08OBBbd26VcuXL9f333+v3Llzy8vLS/b29qpQoYL279+vyMhIHTp0yHiconz58po+fbpu3rypnTt3qnTp0sb7HPuz7ty5M6tNACkAczgAAIAUI0uWLMqfP79OnjypQ4cO6d1335Wbm5tFn3LlysnOzk5nz55VYGCgPD09E32+DBkyKEeOHDp37ly8H9JXrFihcePGWcz5kFy2bdum5cuXq2rVqpowYYIiIyM1cOBA45n+pz39fH+s2G/dYx9NcHNzU3R0tPHYxNOOHj2qkSNHGqsZuLq6SnryCMu/ff/99ypVqlScJUStpVixYs88d2BgYIKOsXr1ar3zzjsaO3asPD09jbDh0aNHunbtmiTrjEwxm81auXKlTCaThg8frqFDh8b5M2DAAElPlsJMzDlnzZqlMWPGSHoSDlSrVk2DBw/WN998I+n/Vm2RnoyGuH//vpYtW6awsDCVK1dOkoz/zSxevFjBwcEWczXE/u/txIkT8Z7f399fU6ZMMR55AfBmI3AAAAApiqenp44fP66jR4+qQoUKcbY7ODjI3d1dW7ZsUUhIyDMfp0ioJk2a6O7duxo5cqTFh/dz585p6NCh+vXXX40PqMnlwYMH+vrrr5UmTRp98803Klu2rJo2baojR45o5syZcfovWrTImCxQki5evKg5c+bI2dnZGDofO+HiDz/8YDFSIDQ0VEOGDNHUqVONuQ0aNGggGxsbBQQEGI84SNLly5e1du1aubi4yMXFRdKTZ/yjoqKsdu0NGzaUnZ2dAgICLCZ3/N///qeFCxcm6BipU6dWRESE8biAJEVHR2vYsGFG0JSYSS//7eDBgwoODlbZsmWVI0eOePuULVtWefPm1dWrV7Vr164XPsfOnTsVEBAQ53GH2Ek2nz6vl5eXpCePzdjY2BiBQ5kyZWRra6sZM2ZIkkXg4OLionLlymn79u36888/Lc6xYsUKTZgwQTt27Ij38Q8Abx4eqQAAACmKp6en5s+fL0mqVKlSvH0qVqwof39/498v47PPPtPOnTs1Z84cHTp0SOXLl9f9+/f1559/Kjw8XCNHjrT63AWStHHjRuND4rN8+OGHypIli3744Qddv35dvXr1Mj7Y9+nTR1u2bNHYsWNVo0YN5cuXz9gvJiZGLVq0UN26dWU2m7V+/Xo9evRIP/30kzGvRYUKFdSmTRvNmTNH7733njEMf+PGjfr777/14YcfGt+EFyhQQH5+fvL391ejRo1Uo0YNmc1mrVmzRhERERoxYoRx7qxZs+rSpUvq3bu3qlSpEmdliBeVM2dOdevWTb/88osaNWqkWrVq6dGjR1q3bp1xLf/1OEfDhg01Y8YMNWvWTN7e3oqKitLOnTt18eJFOTk5KSQkRHfv3k3wnBDPEvs4xfvvv//cfk2aNNHo0aO1ePFiIwBKqK5du2rfvn1q27at6tatK2dnZ507d05btmxRgQIFLM6dPXt2ubq66q+//lKRIkWUMWNGSU8eUylatKiOHTum3LlzK3/+/BbnGDp0qFq1aqXu3burWrVqKlSokC5evKitW7cqU6ZMxmgKAG8+AgcAAJCieHp6ymQyyWw2/2fg4OLiopw5c77U+dKkSaPZs2dr2rRpWrNmjebNm6cMGTKodOnS6tSpU7wrPljDpk2btGnTpuf28fb21pkzZ7Rs2TK5urqqffv2xrZMmTKpX79+6tOnjwYMGGAxB0WnTp109+5dLV++XBEREXJ3d1fXrl3jTDQ5aNAglShRQvPnz9fKlSuVKlUq5cuXT127do2z5GSXLl2UL18+zZo1S7///rtMJpM8PDzUrVs3ixUkvvrqKw0YMEB//vmnbt++/dKBg/QkFHrnnXc0a9YsLV26VJkyZdLHH38sJycnDRs2zGJZz/h8+eWXSpcunVauXKl58+bJyclJBQoU0KBBg3T+/Hn98MMP2rZtmz744INE1xgREaF169bJ3t5edevWfW7fxo0ba+zYsdq0aVO8S3I+T8mSJfXbb79p0qRJ2rt3r0JCQpQ1a1a1bdtWX3zxhTEHQ6xq1arpr7/+inMfe3p66tixY8YoiKflz59fy5Yt08SJE7Vt2zbt2bNHWbNmVaNGjdSlSxcj9ALw5jOZk3KqYwAAALw1li1bpv79+6t///5q165dcpdjFXfu3FF0dHS8q1T4+/trwoQJWrx4cbzLZgIAno85HAAAAJBi7du3T5UrV46zbGlISIiWL1+ujBkzqnDhwslUHQC82XikAgAAAClW1apVlTNnTk2YMEHHjx+Xq6ur7t27p40bN+rOnTsaMWIEExgCQCIROAAAACDFSpcunRYsWKBp06Zp69at2rNnj9KmTavixYurQ4cOLz1pKACkZMzhAAAAAAAArI45HAAAAAAAgNUROAAAAAAAAKsjcACAt9jZs2d19uzZ5C4DAAAAKRCBAwC8xSIjIxUaGqqIiIjkLgXJLCIiQocOHeJeAPcCDNwLiMW9gKRC4AAAKUB0dHRyl4BkFnsPcC+AewGxuBcQi3sBSYXAAQAAAAAAWB2BAwAAAAAAsDoCBwAAAAAAYHUEDgAAAAAAwOoIHAAAAAAAgNUROAAAAAAAAKsjcAAAAAAAAFZH4AAAAAAAAKyOwAEAAAAAAFgdgQMAAAAAALA6AgcAAAAAAGB1BA4AAAAAAMDqCBwAAAAAAIDVETgAAAAAAACrI3AAgBTAZDIldwlIZiaTSQ4ODtwL4F6AgXsBsbgXkFRMZrPZnNxFAACSxvHjxyVJJUqUSOZKAAAAEi8mxiwbGwKRN41tchcAAEh6Y2YHKvh6aHKXAQAA8MJyOadXj7alkrsMJAKBAwCkAMHXQ3Ux+H5ylwEAAIAUhDkcAAAAAACA1RE4AAAAAAAAqyNwAAAAAAAAVkfgAAAAAAAArI7AAQAAAAAAWB2BAwAAAAAAsDoCBwAAAAAAYHUEDgAAAAAAwOoIHAAAAAAAgNUROAAAAAAAAKsjcAAAAAAAvJF+/PFHubm5ad++fRbtYWFh8vf3V926dVWyZEl5e3tr1KhRCgsLi3OMTZs2ycfHRx4eHmrbtq3Onz8fp09ISIhKly6tOXPmJNm1vI0IHAAAAAAAb5xjx45p1qxZcdqjoqLUqVMnTZgwQVmzZlXr1q2VO3duTZ48Wa1bt1ZERITR9/r16+rRo4fs7Ozk6+uroKAgffrpp3r06JHFMadOnSpHR0f5+vom+XW9TWyTuwAAAAAAAF5EZGSkBgwYoOjo6Djbli5dqv3796tdu3bq37+/0f7LL79oypQpWrJkiVq1aiVJWrVqlaKiojRjxgw5Ozurbt268vX11bZt21SnTh1JT0KJefPmaeDAgbK3t381F/iWYIQDAAAAAOCNEhAQoEuXLqlSpUpxtl26dEmZM2fWZ599ZtH+3nvvSZKOHDlitAUHB8vJyUnOzs6SpCJFihjtsSZNmqSsWbOqadOmVr+Otx2BAwAAAADgjXHmzBlNmTJFnTp1UsGCBeNs79u3r/bu3at33nnHov3ChQuSpHfffddoy5gxo8LCwmQ2myVJoaGhkqQMGTJIkq5cuaIlS5bIz89PtrY8IPCiCBwAAAAAAG+E6OhoDRw4UHny5FGnTp0StM/du3e1atUqffvtt3J0dFTLli2NbaVKlVJYWJjmzJmjhw8fatq0aTKZTHJ3d5ckTZgwQXny5FHDhg2T4nLeekQ0AAAAAIA3wvTp03Xq1CnNmzcvQfMpLF68WIMGDZIkpU2bVtOmTVPu3LmN7TVr1lSNGjU0bNgwDRs2TJLUoUMHubq66vz581q5cqVGjRolG5sn39XHxMQY/8Z/450CAAAAALz2Ll68qPHjx6tly5by8PBI0D5OTk7q2LGjGjRooKioKH366afasWOHRZ9JkyZp4sSJ6tu3r+bOnas+ffpIkvz9/eXm5qY6deooKChIvr6+Klq0qKpXr661a9da/freRoxwAAAAAAC81sxmswYOHKh33nlHPXv2TPB+tWrVUq1atSRJJ0+e1Icffqg+ffpo06ZNSps2rSTJZDIZfWKdPn1a69atU0BAgEwmk3r37q1Hjx5p8uTJ2rZtm3r37q3ChQsrX7581rvItxAjHAAAAAAAr7W5c+fq0KFDGjJkiNKlS5eoYxQrVkyNGjVSSEiIjh49+ty+Y8aMkbu7u6pXr64zZ87o2LFj+vLLL+Xl5aUBAwbI0dFRixYtSlQdKQkjHAAAAAAAr7V169ZJUpylLmO1bdtWkrRp0yb9/fffunfvnry9veP0y5EjhyTpzp07zzzXkSNHtHXrVs2cOVOSFBQUJEnKkyePJMnW1lYuLi66fPly4i4mBSFwAAAAAAC81po0aaLy5cvHad+xY4cCAwPVpEkT5cyZU46Ojvrkk0909epV7d69WxkzZrTof+bMGUmSi4vLM881evRoeXp6qmLFipKerIzx9N+SFBERIZPJ9NLX9bYjcAAAAAAAvNaaNm0ab/v9+/eNwMHT01OSVK9ePQUEBOiXX37R0KFDjb5bt27V+vXr5erqqhIlSsR7vD179mjfvn2aP3++0RY7T0NgYKBcXV117949Xbp0SVWrVrXW5b21CBwAAAAAAG+Njh07asuWLVq4cKH++usveXh4KCgoSJs3b1amTJn0yy+/PHN0wujRo1WtWjWVLl3aaCtSpIiKFSum4cOH6+zZszp48KCio6Pl6+v7qi7pjcWkkQAAAACAt0b69Ok1b948ffLJJ7px44bmzJmjY8eOqWnTplq2bJlcXV3j3W/z5s0KDAxUjx494mybMGGCPDw8tGDBAoWHh8vf3/+5j2XgCZPZbDYndxEAgKRx/PhxSdKsP+/rYvD9ZK4GAADgxeXL5aiRX1VO7jKQCIxwAAAAAAAAVkfgAAAAAAAArI7AAQAAAAAAWB2BAwAAAAAAsDoCBwAAAAAAYHUEDgAAAAAAwOoIHAAAAAAAgNUROAAAAAAAAKsjcAAAAAAAAFZH4AAAAAAAAKyOwAEAAAAAAFgdgQMAAAAAALA6AgcAAAAAAGB1BA4AAAAAAMDqbJO7AABA0svlnD65SwAAAEgUfo95cxE4AEAK0KNtqeQuAQAAINFiYsyysTEldxl4QTxSAQApwKNHj5K7BCSz8PBwnTp1SuHh4cldCpIZ9wJicS8g1ptwLxA2vJkIHAAgBTCbzcldApKZ2WxWeHg49wK4F2DgXkAs7gUkFQIHAAAAAABgdQQOAAAAAADA6ggcAAAAAACA1RE4AAAAAAAAqyNwAAAAAAAAVkfgAAAAAAAArI7AAQAAAAAAWB2BAwAAAAAAsDoCBwAAAAAAYHUEDgAAAAAAwOoIHAAgBYmKitLMmTNVv359lSxZUrVq1dKECRP0+PFji36LFi2Sl5eXypQpoy+++ELXr1+Pc6zz58+rSJEi2rx586sqHwAAAG8QAgcASEGGDh2q4cOHK1OmTGrbtq2cnZ3l7++vXr16GX1OnDihr7/+Ws7OzmrWrJkOHTokPz8/mc1mi2ONHTtWJUqUUM2aNV/1ZQAAAOANYJvcBQAAXo3Dhw9r4cKFqlOnjsaOHSuTySSz2ax+/fppxYoV2rJli2rUqKGlS5cqY8aMmj17ttKkSaNSpUqpZ8+eOnnypIoXLy5JOnnypNavX6/p06cn81UBAADgdcUIBwAvpF+/fnJzc7P4U7x4cXl5eWnAgAHxDr1/XTxde1BQ0DP7ffvtt3Jzc1PlypWtdu59+/bJzc1N8+fPf+F93dzc9OWXX750DXPnzpUk+fn5yWQySZJMJpN69uwpk8mkxYsXS5KCg4OVN29epUmTRpJUuHBhoz3WmDFjVK5cOau+RwAAAHi7MMIBQKL0799fmTNnliRFRkbq4sWLWrRokQ4cOKDly5crffr0yVzh823cuFEdOnSI0242m7Vhw4ZkqCjpHTx4UJkzZ5arq6tFu7Ozs/LmzasDBw5IkhwdHfX3338b2x8+fChJypAhgyTp0KFD2r59uxFgAAAAAPEhcACQKN7e3sqVK5dFm4eHh/z8/LRixQq1bt06mSr7by4uLs8MHA4fPqybN2/KyckpGSpLOpGRkfrnn39UqlSpeLfnzJlTFy9eVEhIiNzd3bV69Wpt2LBBFStW1MyZM+Xg4KAiRYpIejK6oUqVKipbtuyrvAQAAAC8YQgcAFiNp6enJOncuXPJXMnz1a5dWzNnztStW7f07rvvWmzbsGGD8ubNq6xZs+rChQvJVKH13bt3T9L/jVL4t9j2Bw8eqEWLFlq+fLn8/PwkSTY2Nho4cKCcnJy0a9cu7d+/X0uWLDH2jYmJkY0NT+gBAADAEr8hArCaa9euSZLy5Mlj0b5//359/vnnqlChgooVK6ZKlSqpZ8+eRv9YmzZt0gcffKDSpUvLw8NDH330kTZu3BjnPCtXrlTTpk1VsmRJeXp6qnv37rp8+XKC66xdu7ZiYmLiXc5x/fr1qlOnTrz7Xb9+Xf3791elSpVUvHhx1atXT1OnTlV0dLRFv7t372rw4MGqXLmyPDw81K1bN928eTPeY77stSRUVFSUJMne3j7e7bHtERERSp06tRYsWKDRo0erf//+WrJkiTFiZcyYMfL29laJEiV07NgxNWjQQEWLFlW9evW0b98+q9cNAACANxeBA4BEuX//vkJCQhQSEqIbN25o//79GjBggHLmzKlmzZoZ/fbs2aN27drp9u3b6ty5s77++mtVqVJFa9asUadOnYx+Bw8eVLdu3WRnZ6devXqpd+/eCgsLk5+fn/bs2WP0mzhxor766itlzZpVffv2VevWrXXgwAF98MEHz50I8mnFihVTzpw544QZJ0+e1NWrV+MNHK5du6ZmzZppzZo1atSokfr376+8efNq5MiR6tmzp9EvMjJSbdu21dKlS1W/fn317NlTt27d0tdffx3nmNa4loRKnTq1JOnx48fxbo+MjJQkOTg4SHoSQNSvX1/t2rVTsWLFJD2Z9+L48ePq1q2bHj9+rK5du8rJyUlTp06Vq6ur/Pz8jJEUAAAAAI9UAEiUJk2axGlLlSqVJk6cKEdHR6Pt119/VebMmTV79mzjw+yHH36oqKgo/fHHH7p+/bqcnZ31xx9/KCoqSuPHjzfmT6hfv74+/PBDnT17VhUrVtSVK1c0fvx4tWnTRoMGDTLO8cEHH6h+/foaOXKkxo0bl6D6a9eurXnz5ik0NNSY4HL9+vXKlSuX8QH7ab/88otu3rypuXPnGnMXtGrVSt9++63mzZunjRs3ytvbW0uXLtXZs2f1888/6/3335ckffTRR+rYsaN2795tHM+a15IQGTJkkI2NjUJDQ+Pd/uDBA6NffMxms8aOHav69evLzc1NmzZt0j///KMpU6bIzc1NJUuWVMWKFbVq1arXev4OAAAAvDqMcACQKD///LN+/fVX/frrr5oyZYq+++47FS9eXJ9//rlWrFhh9Js0aZJWr15thA2SFBoaanzjHhYWJknKli2bJOm7777TiRMnJEmZM2fWunXr1K5dO0lPvmGPjo6Wt7e3MboiJCRE9vb2Kl++vLZv3248OvBfateurcjISG3fvt1oW79+vXx8fOL0jY6O1ubNm1W+fPk4EyV27tzZqE2Stm7dKkdHRzVo0MDoY2trG+dDuDWvJSHs7OyUI0cOi6UtnxYcHCwnJydlypQp3u1//PGHzp8/r65du0qSMQIjb968kqSMGTMqc+bMSfI4CAAAAN5MjHAAkCilS5eOs0pFo0aN1LBhQw0fPlx169ZVmjRplCpVKv39998aP368/ve//yk4OFjXrl2T2WyW9GTCQUlq3bq1du/erTVr1mjNmjXKkiWLqlWrpkaNGhmTUcZ+yP3444+fWVdISIiyZs2aoPrfffddbdq0SfXr19e5c+d04cIFjRgxIk7fO3fuKCwsTPnz54+zLUuWLHJ0dNTVq1clPfngnitXrjiTKBYoUMDitTWvJaHKlCmj33//XRcvXlS+fPmM9uvXr+vSpUuqUaNGvPtFRUVp3LhxatSokbFfbBgSFRVlhEcREREymUxWqxcAAABvNgIHAFaTOnVq1ahRQzNnztSFCxdUtGhRzZw5U8OHD1fu3LlVrlw51ahRQ8WLF9eOHTs0efJkY9906dJp1qxZOn78uDZt2qSdO3dq+fLlWrp0qfz8/NS1a1cjnPD393/m0P+MGTMmqFYbGxvVqlVLf/zxhyIjI7V+/Xplz55dJUuWjNM3NhyJ/fvfYmJiZGdnJ0kymUx69OhRvH3ie22Na0moxo0b6/fff9fo0aM1ZswY2djYyGw2a9SoUZIkX1/fePdbvny5rl69qunTpxttseFLYGCgKlWqpPPnz+vBgwfGiAcAAACAwAGAVcV+kLaxsVFERITGjBkjDw8PzZ4922KFhJUrV1rsd+XKFd24cUNlypRRiRIl1KNHD/39999q166dZsyYIT8/P+XMmVOSlDVrVnl4eFjsHzux5LNWYYhP7dq1tXDhQu3bt0/r169X7dq14/2G3snJSWnTptXFixfjbLtx44ZCQ0ONR0Jy5cqlvXv3KjIy0qKWK1euWOxn7WtJiEqVKql+/fpas2aNfH195enpqSNHjujgwYOqU6eOqlevHmefyMhITZw4Uc2bN7cY0VKlShVly5ZNffr00XvvvafNmzcrU6ZMatiwoVVrBgAAwJuLORwAWE14eLg2bdokJycnFSxYUI8ePVJ4eLjy5Mlj8eH52rVrWr9+vSQZS0qOGTNG7du3140bN4x+2bNnV9asWWVjYyOTyaSaNWtKkiZPnmwxYuDKlSv64osv9Msvv7zQkP4KFSrI0dFRc+bM0enTp5+5HGaqVKlUvXp17d+/XwcPHrTYFhAQIElGbT4+PgoPD9ecOXOMPmaz2eL10/2tdS0J9dNPP6lbt266c+eOZs2apVu3bqlbt24aOXJkvOdbuHChbt++rS+++MKiPU2aNJoyZYpy5sypuXPnKl26dAoICDAm4AQAAAAY4QAgUTZu3KjMmTNLevKB+vbt21q6dKmuXr2qYcOGydbWVhkzZpSHh4dWrVolR0dHubq66vLly1q0aJHCw8MlSQ8fPpQktW/fXuvXr1fr1q3VokULOTo6au/evdq/f78+//xzSVKhQoXUvn17/frrr2rVqpXq1aunR48e6bffflN0dLT69ev3QtdgZ2en6tWra+XKlcqSJYtKly79zL69evXS3r171aFDB7Vs2VK5cuXSrl27tGnTJtWqVUu1atWS9OSxhWXLlunnn3/WpUuXVLhwYW3atEmnT5+2OJ61r+VFrrlLly7q0qVLgvq3adNGbdq0iXebm5ubFi5caM3yAAAA8BYhcACQKMOHDzf+bWNjI0dHRxUpUkQ9e/aUt7e3sW3s2LEaMWKEVq9erUePHilbtmxq3ry5fHx81KJFC+3evVseHh4qXry4fv31V02cOFHTp09XaGio8ubNq0GDBlms8NCvXz/lz59f8+fP18iRI5U2bVoVL15cfn5+cnd3f+Hr8PHx0cqVK1W7du04Ez0+LVeuXFqyZInGjBmj5cuX6+HDh8qTJ4/69euntm3bGqMDbGxsNHXqVPn7+2v16tVauXKlypUrp1GjRhmrbSTVtQAAAACvE5P5WbOgAQDeeMePH5ckFSxY0GJpUqQ8YWFhOn36tIoUKaK0adMmdzlIRtwLiMW9gFjcC0gqzOEAAAAAAACsjsABAAAAAABYHYEDAAAAAACwOgIHAAAAAABgdQQOAAAAAADA6ggcAAAAAACA1RE4AAAAAAAAqyNwAAAAAAAAVkfgAAAAAAAArI7AAQAAAAAAWB2BAwAAAAAAsDoCBwAAAAAAYHUEDgAAAAAAwOoIHAAgBTCZTMldApKZyWSSnZ1dcpcBAABSEAIHAEgB0qRJk9wlIJk5ODioWNGihE8AAOCVsU3uAgAASW/viFG6f/lKcpeBZOSY20UV+vWUHj9O7lIAAEAKQeAAACnA/ctXdOfcheQuAwAAACkIj1QAAAAAAACrI3AAAAAAAABWR+AAAAAAAACsjsABAAAAAABYHYEDAAAAAACwOgIHAAAAAABgdQQOAAAAAADA6ggcAAAAAACA1RE4AAAAAAAAqyNwAAAAAAAAVkfgAABACnb9+nWVKVNGM2fOjHf7ihUr1LhxY7m7u6tatWoaPny4Hj58GKffpk2b5OPjIw8PD7Vt21bnz5+P0yckJESlS5fWnDlzrH0ZAADgNUTgAABACvXw4UN17dpVoaGh8W6fPHmy+vbtq5iYGLVu3VqFCxfWzJkz1aFDB0VGRhr9rl+/rh49esjOzk6+vr4KCgrSp59+qkePHlkcb+rUqXJ0dJSvr2+SXhcAAHg92CZ3AQAA4NW7evWqunbtqpMnTz5zu7+/vzw8PDRnzhzZ2dlJksaOHauJEydq0aJFat26tSRp1apVioqK0owZM+Ts7Ky6devK19dX27ZtU506dSQ9CSXmzZungQMHyt7e/tVcJAAASFaMcAAAIIWZOXOmGjZsqDNnzqhChQrx9lm0aJGioqLUqVMnI2yQpM8//1zp06fX4sWLjbbg4GA5OTnJ2dlZklSkSBGjPdakSZOUNWtWNW3aNCkuCQAAvIYIHAAASGFmz56tnDlz6rffflOjRo3i7XPgwAFJUvny5S3aU6dOLXd3d505c0YPHjyQJGXMmFFhYWEym82SZDyikSFDBknSlStXtGTJEvn5+cnWlsGVAACkFAQOAACkMN9++61WrFih0qVLP7PP5cuX9e677ypdunRxtuXMmVOSdPHiRUlSqVKlFBYWpjlz5ujhw4eaNm2aTCaT3N3dJUkTJkxQnjx51LBhQ+tfDAAAeG3xNQMAAClM1apV/7PP3bt3lStXrni3xY5ciB3JULNmTdWoUUPDhg3TsGHDJEkdOnSQq6urzp8/r5UrV2rUqFGysXnyPUdMTIzxbwAA8PYicAAAAHFERUU9c3LH2PaIiAijbdKkSdq8ebOCgoJUsmRJlS1bVpLk7+8vNzc31alTR0FBQerTp48CAwOVLVs29e3bV/Xq1Uv6iwEAAMmCwAEAAMSRJk0aPX78ON5tsUtiOjg4GG0mk0m1atWy6Hf69GmtW7dOAQEBMplM6t27tx49eqTJkydr27Zt6t27twoXLqx8+fIl3YUAAIBkw3hGAAAQh6OjozEp5L/Ftsc+WvEsY8aMkbu7u6pXr64zZ87o2LFj+vLLL+Xl5aUBAwbI0dFRixYtsnrtAADg9UDgAAAA4sibN69u376tR48exdl29epV2djYKE+ePM/c/8iRI9q6dau6d+8uSQoKCpIkYx9bW1u5uLjo8uXLSVA9AAB4HRA4AACAOMqUKaOYmBgdPHjQoj0iIkJHjx5VwYIFlT59+mfuP3r0aHl6eqpixYqSpOjoaIu/Y49lMpmSoHoAAPA6IHAAAABxNGjQQKlSpdL48eONORskKSAgQKGhofL19X3mvnv27NG+ffvUo0cPoy12nobAwEBJ0r1793Tp0iXlzZs3SeoHAADJj0kjAQBAHAUKFNAnn3yiqVOnqnHjxqpRo4bOnTunrVu3qnTp0mrRosUz9x09erSqVaum0qVLG21FihRRsWLFNHz4cJ09e1YHDx5UdHT0c4MLAADwZmOEAwAAiFevXr00ePBgmUwmzZ49W//73//Url07TZky5ZlLZm7evFmBgYEWoxtiTZgwQR4eHlqwYIHCw8Pl7+8vFxeXJL4KAACQXBjhAABACta0aVM1bdo03m0mk0mtWrVSq1atEny8mjVr6uzZs/Fuy549u6ZPn56oOgEAwJuHEQ4AAAAAAMDqCBwAAAAAAIDVETgAAAAAAACrI3AAAAAAAABWR+AAAAAAAACsjsABAAAAAABYHYEDAAAAAACwOgIHAAAAAABgdQQOAAAAAADA6ggcAAAAAACA1RE4AAAAAAAAqyNwAAAAAAAAVkfgAAAAAAAArI7AAQAAAAAAWB2BAwAAAAAAsDrb5C4AAJD0HHO7JHcJSGbcAwAA4FUjcACAFKBCv57JXQJeA9FRUcldAgAASEF4pAIA3nKRkZEKDw9P7jKQzMLDw3Xy1CmZzebkLgUAAKQQBA4AkALwIRNms1mPHz9O7jIAAEAKQuAAAAAAAACsjsABAAAAAABYHYEDAAAAAACwOgIHAAAAAABgdQQOAAAAAADA6ggcAAAAAACA1RE4AAAAAAAAqyNwAAAAAAAAVkfgAAAAAAAArI7AAQAAAAAAWB2BAwAAAAAAsDoCBwAAAAAAYHUEDgAAAAAAwOoIHAAAAAAAgNUROAAAAAAAAKsjcAAAAAAAAFZH4AAAAAAAAKyOwAEAAAAAAFgdgQMAAAAAALA6AgcAAAAAAGB1BA4AAAAAAMDqCBwAAAAAAIDVETgAAAAAAACrI3AAAAAAAABWR+AAAAAAAACsjsABAAAAAABYHYEDAAAAAACwOgIHAAAAAABgdQQOAAAAAADA6ggcACAFMJlMyV0CkpnJZJKDgwP3ArgXYOBeAJDUbJO7AABA0rK3t5eDg0Nyl4Fk5uDgoKJFiyZ3GXgNcC8g1tt6L8TEmGVjQ4gCvA4IHAAgBRgzO1DB10OTuwwAAJJULuf06tG2VHKXAeD/I3AAgBQg+HqoLgbfT+4yAAAAkIIwhwMAAAAAALA6AgcAAAAAAGB1BA4AAAAAAMDqCBwAAAAAAIDVETgAAAAAAACrI3AAAAAAAABWR+AAAAAAAACsjsABAAAAAABYHYEDAAAAAACwOgIHAAAAAABgdQQOAAAAAN5Kd+7c0ffffy9vb2+VLFlS9evX19SpUxUVFfXc/X777Te5ublp2bJlcbZt2rRJPj4+8vDwUNu2bXX+/Pk4fUJCQlS6dGnNmTPHatcCvIkIHAAAAAC8dUJDQ9WyZUvNmTNHBQsWVKtWrZQhQwaNHDlSfn5+MpvN8e539epV/fLLL/Fuu379unr06CE7Ozv5+voqKChIn376qR49emTRb+rUqXJ0dJSvr6/Vrwt4k9gmdwEAAAAAYG1TpkzRhQsXNHDgQLVt29Zo79Wrl1avXq1t27apevXqcfYbPHiwwsLC4j3mqlWrFBUVpRkzZsjZ2Vl169aVr6+vtm3bpjp16kh6EkrMmzdPAwcOlL29fZJcG/CmYIQDAAAAgLfO1atXlT17drVs2dKivX79+pKkI0eOxNln6dKl2rlzp7y8vOI9ZnBwsJycnOTs7CxJKlKkiNEea9KkScqaNauaNm1qlesA3mSMcAAAAADw1nnWYxEXLlyQJL377rsW7Tdu3NCIESPUpEkTFS5cWNu2bYuzb8aMGRUWFiaz2SyTyaTQ0FBJUoYMGSRJV65c0ZIlSzRs2DDZ2vJRC2CEAwAAAIC3mtls1u3btzV37lyNGzdOOXLk0Pvvv2/R59tvv5WdnZ369ev3zOOUKlVKYWFhmjNnjh4+fKhp06bJZDLJ3d1dkjRhwgTlyZNHDRs2TMrLAd4YxG4AAAAA3mpjx47VpEmTJD0Z2TB9+nRlzJjR2L5mzRpt3LhRo0ePVqZMmZ55nJo1a6pGjRoaNmyYhg0bJknq0KGDXF1ddf78ea1cuVKjRo2Sjc2T73VjYmKMfwMpEYEDAAAAgLeai4uLOnbsqEuXLmnTpk1q1aqVpk2bpmLFiikkJETfffedatSoYczv8DyTJk3S5s2bFRQUpJIlS6ps2bKSJH9/f7m5ualOnToKCgpSnz59FBgYqGzZsqlv376qV69eUl8m8NohcAAAAADwVmvWrJnx7y1btuiLL75Q3759tWrVKg0bNkwREREaMmRIgo5lMplUq1Yti7bTp09r3bp1CggIkMlkUu/evfXo0SNNnjxZ27ZtU+/evVW4cGHly5fPmpcFvPYY3wMAAAAgxahRo4YqVqyo//3vf5o7d65Wr16tXr16KVu2bIk+5pgxY+Tu7q7q1avrzJkzOnbsmL788kt5eXlpwIABcnR01KJFi6x4FcCbgcABAAAAwFslKipKu3fv1q5du+LdniNHDklPRjtI0tChQ+Xm5mb8GT58uCSpf//+cnNz0759+555riNHjmjr1q3q3r27JCkoKEiSlCdPHkmSra2tXFxcdPnyZetcHPAG4ZEKAAAAAG+dzz//XOnSpdPOnTuVKlUqi21nzpyRyWRSs2bNjBUmnnb06FHt3LlTtWrVUpEiRZQzZ85nnmf06NHy9PRUxYoVJUnR0dEWf0tSRESETCaTFa4KeLMQOAAAAAB4q9ja2qp27dpavXq1pk+frs8++8zYNm/ePJ04ceK5k0TOnDlTO3fulLe3t5o2bfrM8+zZs0f79u3T/PnzjbbYeRoCAwPl6uqqe/fu6dKlS6pataqVrg54cxA4AAAAAHjr9OnTRwcPHtQvv/yiffv2ydXVVadPn9aePXuUK1cuffvtty99jtGjR6tatWoqXbq00VakSBEVK1ZMw4cP19mzZ3Xw4EFFR0fL19f3pc8HvGmYwwEAAADAW8fZ2VlLlixRixYtdPbsWc2ePVtBQUH6+OOPtWTJEjk7O7/U8Tdv3qzAwED16NEjzrYJEybIw8NDCxYsUHh4uPz9/eXi4vJS5wPeRCaz2WxO7iIAAEnj+PHjkqRZf97XxeD7yVwNAABJK18uR438qnJyl/HGCQsL0+nTp1WkSBGlTZs2ucvBW4QRDgAAAAAAwOoIHAAAAAAAgNUROAAAAAAAAKsjcAAAAAAAAFbHspgAAAAAkIIsW7ZM/fv3T1Dfs2fPJnE11rF//34tXLhQx44d0z///KM0adKoSJEiat68uRo2bCiTyZToY0dERGju3Ln65JNPrFhxykDgAAAAAAApUPny5VW+fHk9fvxYN2/eVJYsWWRnZ5fcZb2QyMhIDR06VIsXL1a6dOnk5eUlHx8fhYSEaPPmzfrqq6+0detWjRw5UjY2iRvg37p1a128eJHAIREIHAAAAAAgBSpfvry6du36Ri+L+e2332rJkiWqWbOmhg8frkyZMhnbQkND1aVLF/3xxx/Knj27vvrqq0Sd4/bt21aqNuVhDgcAAAAAwBtn7969WrJkiQoVKqSxY8dahA2SlD59eo0dO1Zp06bV3LlzdefOneQpNAUjcAAAAAAAPNeyZcvk5uamtWvXqkOHDipRooRq1KihK1euqE2bNqpZs6a2bdummjVrqlSpUurevbux765du9S+fXuVLl1aJUuWVJMmTTR37lzFxMRYnMPNzU39+vVTQECAypYtq7Jly2rmzJnPrGnJkiWSpE8//VT29vbx9smUKZOGDBmiYcOGWTwu8vDhQ02YMEGNGjWSh4eHSpQoIR8fH/30008KCwuTJAUHB8vNzU1Xr17VgwcPjPpihYaGauTIkfL29lbx4sVVtWpVffPNN/GOiLh165YGDx6sqlWrqlSpUmrZsqUOHz6sdu3aqWbNmhZ9IyMjFRAQoPr166t48eLy9PTUF198oePHjyfoZzJ+/Hi5ublp8eLFceq4evWqChcurF69ej3zfbUmHqkAAAAAACTI999/r6xZs6pNmzYKDg6Wi4uLJOnOnTvq0aOHatWqpfTp06tAgQKSpDlz5uj7779XhgwZVLt2baVNm1Y7duzQ0KFDdfDgQY0aNcpiQscdO3Zow4YNatKkiW7duqVSpUo9s5YdO3ZIkqpUqfLcmhs1amTxOioqSu3bt9exY8dUpUoVValSRQ8fPtTmzZs1ffp0BQcHy9/fX46OjvLz89OsWbMUERGhzz77TEWKFJEkPXjwQC1bttRff/2lihUrysfHR8HBwVq0aJF27NihBQsWKGvWrMZ707JlSwUFBalKlSpyc3PT3r179fHHHytTpkwWQUhERITat2+vQ4cOydXVVR999JFu3bqljRs3aseOHRozZoy8vb2f+zNp3Lixxo8fr1WrVumDDz6w6Ltq1SqZzWY1btz4ue+ZtRA4AAAAAEAKtH//fo0bN+6Zk0ZWrVpV7u7uFvvY2tpq3rx5cnBwsGgPCwtT+/btLUYAXLlyRSNGjFCOHDk0e/ZsI5wICwvTF198oTVr1sjLy8viw++tW7c0adKkON/6/9ujR4909+5dpU+fXu++++4LXfe6desUGBiozz//XF9++aXR3rt3b9WpU0cbN25UeHi4HB0d1bVrVy1fvlz3799X165djb6jRo3SX3/9pcGDB6tVq1ZG+6ZNm9S5c2cNGzZMY8eOlSSNHz9eQUFB6tOnjzp06CBJiomJUc+ePbV27VrlzJnT2H/atGk6dOiQmjZtqu+++062tk8+sp88eVItW7ZU//79VaFCBaVPn97YJ76fSdmyZXXgwAHduHHDCD6kJ4FDlixZVKlSpRd6zxKLwAEAAAAAUqD9+/dr//79z9yeIUOGOIFDtWrV4oQNsXx8fCxer1y5UlFRUerSpYsRNkhS2rRpNWjQIDVo0EBLly61CBzSpEkjLy+v/6z9/v37kqR06dL9Z99/K1q0qL7//nvVqlXLoj19+vQqWrSotm/frnv37j3zOqOiorRixQoVKlTIImyQpFq1aql06dLasGGDQkND5eDgoFWrVilnzpxq166d0c/GxkZ9+vTR+vXrLfZfvny5HBwcNHDgQCNskKRixYqpZcuWmjFjhtavX6+mTZsa2+L7mTRu3FgHDhzQmjVrjPOeOnVK586dU/v27ZUqVaoEv18vg8ABAAAAAFIgPz+/F16lIleuXAnedubMGUlSuXLl4vQtVKiQHB0djT6xsmXLlqAPw7ETRMYGDy8iX758ypcvnyIiIhQYGKiLFy/q8uXLOnnypBHAREdHP3P/ixcvKiwsTNHR0Ro3blyc7REREYqOjtbZs2eVOXNm3bt3TxUqVIhzXTly5FC2bNmM16Ghobpy5YpKly5tMYIhVpkyZTRjxow471l8P5O6devqu+++06pVq4zAYdWqVZLiPmKSlAgcAAAAAAAJkjp16mduS5MmjcXr0NBQSU9GSsQna9asCgoKeu4xnsXe3l7Ozs66fv16nMcG/u327dtKlSqVEVLExMRo8uTJ+vXXX3Xv3j1J0jvvvCMPDw/lzJlT58+fl9lsfubxYkOOCxcuaPz48c/sF3tsSc987CNr1qy6ceOGpCcTWUrPf7+kJ4+TPC2+n0n69Onl7e2t1atXKygoSC4uLlq9erVcXV2NeShehQQHDrE3S2LEl84AAF4djyJZlDPriw85BJB4D8Mf6+6DyOQuA0hRcjnzueN1Evu4w/Xr1+Xk5BRn+7179+IsZfkiqlatqiVLlmjXrl1q0qTJM/uNHz9eCxYs0LfffqsWLVpoxowZGjNmjMqXL6+OHTuqSJEiypIli6QnK16cP38+QdfVqFEj/fTTT8/te/bsWUnP/jwdGzI8fdzr16/H2zc26Ejoe9a4cWOtXr1aa9euVZkyZXTjxg19/PHHCdrXWhIcOJQtW9Zi9tCEMplMOnXq1AvvBwCwnlYNXJO7BCDFiY6OfmXPyAL4PzExZtnYvPjnFlhf4cKFtWHDBh06dCjOt+pBQUG6efPmS01e2KRJEy1ZskRTp05Vw4YNLeY8iHXz5k2tWbNGJpNJFStWlCStXr1aqVKl0qRJkyy+HDebzbpw4YLx72fJly+f7O3tdfLkSZnN5jifk2fOnKmwsDB99NFHyp8/v9KmTatjx47FOc79+/d18eJFY+RC+vTplStXLl26dEkhISFxQpoDBw5IkgoWLJiQt0eVKlVSlixZtGXLFt2/f182NjZq2LBhgva1lgQHDvE9dwMAeDN0795d586dS+4ygBSjYMGCxuzkr5vw8HBdvHhR+fLle+aEaEgZ3tZ7gbDh9dGoUSNNmjRJAQEB8vLyslilYujQoUafxCpbtqzq16+vNWvWqGvXrvrpp58sHke4ceOGunXrprt376pNmzbG+VOnTq3o6GiFhIRYBA4TJkzQ1atXJT2ZGDKWnZ2dxevUqVOrfv36WrFihX799Vd98sknxrZ9+/bpp59+Uvbs2fX5558bH/IXLlyo+fPn66OPPpL05LGOn3/+WY8fP7a4piZNmmjcuHH64YcfNGLECItVKn777Tc5Ojr+5woesVKlSqWGDRtq5syZun79uipUqCBnZ+cE7WstCQ4c5syZk5R1AACS0Llz53TyxInkLgPAa8BsNis8PPy5394hZeBewH8tiylJ9evXV4ECBRJ1fBcXF/Xt21fDhg1TkyZN5O3trbRp02r79u26cuWK3nvvPYsVKhLjhx9+0IMHD7R582Z5eXmpRo0aypYtm65evart27fr4cOHql27tvr06WPs8/777+vo0aP66KOPVK9ePdnZ2Wnfvn06efKk3nnnHd2+fVt37941+mfNmlWXLl1S7969VaVKFTVu3Fh9+/bVkSNH9OOPP2rTpk0qWbKkrl+/rvXr18vW1lY//PCDbGxsJEk9evTQjh07NGTIEG3atEkFCxbUgQMHdOHCBaVJk8boJ0kdO3bUzp07tWrVKp09e1YVKlTQ7du3tXHjRpnNZo0ePfqFpixo0qSJZsyYob///ls9evR4qfc6Maw2aeSNGzd07949FSpUSFFRUfEOZwEAAAAAvB7+a1lMSSpSpEiiAwdJatu2rfLmzavp06dr/fr1MpvNKlCggDp16qTmzZsn+rixHBwcNHnyZG3YsEFLlizRkSNHdOPGDTk4OKhUqVL64IMPVL9+fYt9WrZsKbPZrPnz52vx4sXKkCGD8uXLp1GjRil16tTq0qWLtm3bJg8PD0nSV199pQEDBujPP//U7du31bhxYzk5OWnRokXGuefMmSMnJyfVrFlTnTt3VuHChY3zOTk5af78+Ro5cqR27Nih/fv3q1SpUpo9e7Y6depkMcIoderUmjlzpqZPn65Vq1Zp/vz5cnR0VI0aNdSpUycVLVr0hd4fV1dXFShQQNeuXVPt2rVf4p1OHJP5JSLNR48eafz48Vq+fLlCQkKM+RqmTJminTt3asiQIcqfP7816wUAvIDjx49Lkvr168cIB+AVKla8uP7444/kLiNeL7L8Hd5u3AuIxb2QtC5fvqxs2bLJ3t7eoj0yMlKlS5dWxYoVNXXq1CQ594MHD1S5cmXVqVNHP//8c5Kc43ls/rtL/B4+fKiWLVtq2rRpsre3l4uLizEc69GjR9q/f79atWql4OBgqxULAAAAAMCbpHPnzqpcubKxykSsWbNm6fHjx/L09Eyyc0+dOlURERFq0aJFkp3jeRIdOEyaNEmnTp3SoEGDtHnzZovZLrt166YRI0bo3r17mjhxolUKBQAAAADgTfPRRx/p/v37atiwoYYOHaqff/5Z7du318iRI+Xm5qY2bdpY/ZytWrXSe++9p8mTJ6tChQrJtghEoidaWLt2rapWrarWrVtLUpylQBo3bqz169dr3759L1chAAAAAABvqFatWilLliyaM2eO1qxZo7CwMGXPnl2dOnVSp06dlDp1aqufM2PGjDpx4oQqV66sn376yerHT6hEBw43btzQe++999w++fLl086dOxN7CgAAAAAA3ng+Pj7y8fF5Zed7XZ40SPQjFU5OTjp//vxz+/zvf/+Tk5NTYk8BAAAAAADeUIkOHGrUqKEtW7Zo+/bt8W5ft26dtm/frmrVqiW6OAAAAAAA8GZK9CMVfn5+2rJliz7//HNVq1ZNd+/elSSNGzdOJ06c0Pbt2/XOO++oS5cu1qoVAAAAAAC8IRIdOLz77rtasGCBvvnmG23bts1YEnPChAmSpHLlymno0KFydna2TqUAAAAAAOCNkejAQZKyZ8+uKVOm6ObNmzp16pTu37+vtGnTys3NTbly5bJWjQAAAG+F69evq379+uratavatWtntNesWVNXr1597r7Dhw9X06ZNJUmbNm3Sjz/+qJs3b6pEiRL65ptvVKBAAYv+ISEh8vb21pdffpkkS64BAPBfXipwiJUlSxZ5eXlZ41AAAABvpYcPH6pr164KDQ2Ns61t27Z68OBBnPZHjx5pxowZSp06tUqUKCHpSWjRo0cP5c6dW76+vlq7dq0+/fRTrV27VmnSpDH2nTp1qhwdHeXr65t0FwUAwHMkOHBYsWJFok/SuHHjRO8LAADwprt69aq6du2qkydPxrv96dEOT/v+++8VExOjgQMHqlChQpKkVatWKSoqSjNmzJCzs7Pq1q0rX19fbdu2TXXq1JH0JJSYN2+eBg4cKHt7+yS5JgAA/kuCA4d+/frJZDIZr81mc5zXsZ5ulwgcgHHjxmn8+PHP7RM7VDa275o1a+IMj01K/fr10/Lly/+zX/ny5TVnzpxXUNGLia3/2LFjSp06dbx9goODVatWLUlSw4YNNXLkyHj73bp1S1WrVlVMTIyGDBmijz76yCo1tmnTRhcuXNCuXbteaL/kuicAWMfMmTPl7++vR48eqUKFCtq7d2+C9jt48KB+++03Va5cWR988IHRHhwcLCcnJ2OerCJFihjtsSZNmqSsWbMaj2AAAJAcEhw49O/f3+J1TEyMpk+frtDQUDVu3FgeHh7KlCmTHj58qOPHj2vp0qXKnDmzvvzyS6sXDbypPv/8c+XPnz/ebaVLl5Yk1a5dW7lz537lE676+vqqYsWKxusLFy4oICBAtWvXVu3atY32d99995XWlVS2bdumqKgo2drG/b/BDRs2KCYmJhmqAvA2mj17tnLmzKlvv/1Wly5dSnDg8OOPPypVqlQaNGiQRXvGjBkVFhZmfPkT+4hGhgwZJElXrlzRkiVLNGzYsHj/Pw4A/s1kMsnBwSHOF8evm7CwMC1dulSrVq3SpUuX9OjRI+XJk0cNGjRQu3btnvml0+vs8ePHunXrlrJnz57cpSSJBP9X6OOPP7Z4HRAQoIcPH2ru3LkqVqyYxbb69eurefPm8vX11YkTJ1S3bl3rVAu84SpVqiRPT8/n9ilcuLAKFy78iir6Px4eHvLw8DBe79u3TwEBAXJzc1OjRo1eeT1JycXFRVeuXNH+/ftVqVKlONvXr18vJycnhYSEJEN1AN423377rSpVqqRUqVLp0qVLCdpn/fr1OnbsmFq0aBEnqC5VqpTCwsI0Z84cNWvWTNOmTZPJZJK7u7ukJyuG5cmTRw0bNrTylQB43cXEmGVj8+KhgYODg4oWLZoEFSW+pn+7fPmyOnfurIsXL6p+/fpq0KCBzGaz9uzZo1GjRmnLli2aMWOG0qZNa4WqX42rV6+qQ4cO+vjjj602ovZ1k+jYe8GCBfLx8YkTNsQqUKCA6tatqxUrVqh3796JLhAArK1atWpasmSJNmzYECdwuHv3rvbv369mzZpp4cKFyVQhgLdJ1apVX3ifX3/9VTY2NurQoUOcbTVr1lSNGjU0bNgwDRs2TJLUoUMHubq66vz581q5cqVGjRolGxsbSU9Gpcb+G8DbzcbGpDGzAxV8Pe7ktMkhl3N69Whb6qWPExkZqS5duujGjRtauHChihcvbmz7+OOPNXPmTA0fPtzi/xffBMHBwbp48WJyl5GkEv1fn3v37snBweE/+4WHhyf2FECKNG7cOLm5uen8+fOSpGXLlsnNzU3Hjx9X//795enpqVKlSql9+/Y6c+aMxb5ms1mzZs3Se++9pxIlSqhy5coaOHCgbt26ZdUaY2vavn27Rfv27dvl5uamZcuWGW1ubm4aN26c5s6dqzp16qh48eKqU6eO5s6dG+e4x44d06effqrSpUvL3d1drVu31p49e+L0O3jwoNq0aSMPDw9Vr15d06ZNe6H606ZNq0qVKmnz5s0W889I0ubNm2VjY6Pq1avHu++KFSvUtGlTlShRQuXKlVPnzp119uzZOP02btyoZs2aqVSpUqpTp84z58e4fv26+vfvr0qVKql48eJq0KBBvO8NgJTj1KlTOnz4sLy9vZU3b954+0yaNEkTJ05U3759NXfuXPXp00eS5O/vLzc3N9WpU0dBQUHy9fVV0aJFVb16da1du/YVXgWA5BJ8PVQXg++/Fn+sFXwsWLBAf/31l/r27WsRNsRq166dChcurD/++EP37t2zyjlhHYkOHFxdXbVx40bduHEj3u2XLl3SunXrjCWcAEgPHjxQSEhInD8RERH/uW/37t117do1devWTe3atdPBgwfVsWNHRUVFGX2+/vprDR8+XEWLFtWAAQPUpEkTrVmzRr6+vrpz505SXtpzLV++XBMnTlSjRo3Uv39/2draaujQodq2bZvRZ8+ePWrZsqVu3LghPz8/devWTQ8fPtQnn3yiP//80+h34MABtWvXTv/884/8/PzUvHlzTZ06VevWrXuhmnx8fPTPP//o+PHjFu3r169X5cqVlS5dujj7jBo1Sn379lXq1KnVu3dvtW3bVocPH9aHH36oY8eOGf1Wr14tPz8/xcTEqGfPnqpdu7aGDBmiEydOWBzv5s2batGihbZu3aoPP/xQ/fv3V+7cuTV06FB9//33L3Q9AN4esSuDtWjR4pl9TCaTatWqpU8++URly5aVJJ0+fVrr1q1T9+7dZTKZ1Lt3b4WFhWny5MmqWbOmevfu/dZ/kwbg7bR69WqlTZtW77///jP7TJw4Ubt27VLGjBklSefPn1e3bt3k6empEiVKqFGjRlq8eLHFPrFfoq1du1Z16tRRiRIlNHjwYAUHB8vNzU3Tp09X27ZtVbx4cTVs2FDR0dGSnnzJ1rJlS7m7u6t06dLq2LFjvKsQnTx5Ul988YU8PT1VtmxZtWnTRvv37zfO3bZtW0nSkCFD5ObmZpX36nWT6EcqOnbsKD8/P3344Ydq27atihUrpnTp0unBgwc6fPiw5syZo/DwcPn5+VmzXuCN1qVLl3jb+/fv/8wl0WIVKFBAU6dONV7b2tpq/Pjx2rdvnypXrqwDBw5o8eLFcY5Vr149ffDBB5o8ebL69etnjct4YTdv3tS6deuUI0cOSZKXl5dq1aqllStXysvLSzExMRo8eLBcXV21cOFC2dnZSZJat26t1q1b6/vvv1fNmjVlb2+vn3/+WRkzZtSiRYuUOXNm4xqbNGnyQjXVrFlTtra22rhxo0qWLClJCg0N1a5duzR06NA4/c+fP6+pU6eqSpUqmjJlilKlSiVJatKkiRo0aKDBgwdrxYoViomJ0YgRI1SoUCEtWLDAmLyoatWqatu2rcVzhaNGjVJoaKh+//135cqVS5LUqlUr/fDDD5o1a5aaN2+eLPN5AEheW7ZsUaZMmSwm8k2IMWPGyN3dXdWrV9eZM2d07NgxTZo0SV5eXqpcubLWrl2rRYsWqWvXrklUOQBYn9ls1smTJ1W6dGnjd8T45MyZ0/j3yZMn1bp1a9nb26tly5bKnDmz1q9fr0GDBunChQvq27evxb4DBw6Ur6+vsmXLpty5cxvt48ePV6VKlTRo0CBFRkYqVapUWrFihfr166cyZcqoZ8+exkSWH330kWbOnGlMBH/48GG1a9dOjo6Oatu2rTJlyqQFCxbok08+0cyZM1WuXDl9/vnnCggIUNOmTVWhQgUrv3Ovh0QHDt7e3vruu+/0008/acSIEXGWyHRyctLYsWON1B2A1Ldv33g/QObLl+8/961Xr57F69hl0G7evClJxjf8NWvWtJjsMHv27CpUqJC2bNmSbIGDu7u7ETZIUq5cueTo6Gg86nH69GldvnxZ3bt314MHDyz29fb21i+//KITJ04ob968OnbsmFq3bm2EDdKTMKZatWrasGFDgmvKlCmTypYtq40bN6pnz56SnqxcYTabVatWLZ0+fdqi/+bNmxUTE6NOnToZYUPstbz//vtauHChgoODdefOHd28eVMdOnSwmCnZ09NTRYsWNUaFxcTEaMOGDfLw8FDatGktfmY+Pj6aNWuWtm7dSuAApDAXLlzQ5cuX1bRp0xdaYeLIkSPaunWrZs6cKUkKCgqSJOXJk0fSk5DaxcVFly9ftnrNAJCU7ty5o6ioKGXJkiXB+3z33XeKjo7W4sWLjQChdevW6ty5s2bMmKFGjRpZ/I5Vs2ZNixAidpnhzJkzy9/f3/jdLzQ0VN99951q1KihSZMmGf1bt26t999/X99//73xaPGIESPk4OCgZcuWKWvWrJKk9957Tz4+PpoyZYqmTJmiSpUqKSAgQCVLlnzrJmmP9VJrJX3wwQeqW7eutm7dqrNnz+r+/ftydHRUsWLF5OXl9UbNEAq8CsWKFfvPVSqe5Z133rF4bW9vL0nG8o2xv1w+vYTl02IT4QcPHujRo0cW2zJkyKA0adIkqq6EcHJyitNmb28fp/axY8dq7Nix8R7j2rVrsrW1ldlstkieYxUoUOCFAgfpyQf7oUOH6uLFi8qXL5/Wr1+vChUqyNHRMU7f2P/wxLesaYECBSQ9mWk4NjiIr8b8+fMbgcOdO3f04MED7dix45nfYl67du2FrgfAmy8wMFCSVKZMmRfab/To0fL09DT+/yR22G/s35IUERHx2i95BwD/Fjvp7dOPET/PrVu3dOTIETVt2tTi9zEbGxt9/vnn2rJlizZs2GARODxrdEHZsmUtvmjavXu3QkNDVadOnTirmXl5eWnevHm6fv26bG1tFRgYqJYtWxphg/TkC6+5c+fG+7vm2+qlF2fOkCGDGjZsyNJLQBL7r18SY2JilDp1agUEBDy337Bhw+JMYDh8+HA1bdr0pWuMDRD+7b9mR4/dr3PnzipXrly8fQoWLKjr169LUpzA5Hnnfp7YkVobNmzQxx9/rO3bt2vAgAHx9o2dXPLfk0w+3WZnZ2f8nOKbl+PpfWM/BNSsWVNt2rSJ95xP/wcKQMpw6tQpSXrmKmDx2bNnj/bt26f58+cbbbEj5wIDA+Xq6qp79+7p0qVLiVoxAwCSU6ZMmWRvb6/bt28nqP/Vq1clxT+C+OkviZ727rvvxnusf3/hF/sl2b8fyXjatWvXjJAivol/CxUq9Mx930YvHTisXLlSS5Ys0dmzZxUeHq5MmTKpUKFCaty4MSEE8ArlzJlTO3fuVMGCBeN8UN28ebMyZcokSfr000/jTLhTsGDBFzpXbIAQGRlp0R77eMeLin3mLk2aNHGWqTx79qz+/vtvOTg4KGfOnDKZTPGuY5+YYcLOzs4qVaqUNm3apPz58ysyMlK1atWKt2/sHAsXLlyIM6TvwoULkqRs2bIZj1HENzHb0zU6OTnJwcFBkZGRca45JCREBw4cMIZCA0g5rly5IunFAsfRo0erWrVqxnPD0pPH7ooVK6bhw4fr7NmzOnjwoKKjo+Xr62v1mgEgqZUuXVqBgYGKjIw0Rvn+25IlS7Rx40Z99tlnzzxO7BdU/54L4llfjj09uuHp/QcPHvzMR6Lz589v/B7IqLKXWKXCbDarV69e6tu3r/bv328Mczabzdq1a5f69Omj3r17W7NWAM8R+0F54sSJFu1Hjx5V586dNWvWLElPwoVKlSpZ/HnRb9JjP3A/Pc+B2WxO9JJrxYsXV9asWfXbb79ZLGUUGRmpvn37qlu3boqKipKTk5PKlSunNWvWWDxuEBwcrM2bNyfq3LVr11ZgYKAWLlyocuXKxfv4h/Tk/TWZTJoyZYrFEOVr165p5cqVKly4sHLkyKGiRYsqZ86cWrBggUJD/28pqCNHjlisiGFraysvLy/t3r1bR48etTiXv7+/unXrpnPnziXqmgC8ue7evSvpyQjShNi8ebMCAwPVo0ePONsmTJggDw8PLViwQOHh4fL395eLi4sVqwWAV8PHx0fh4eFatWpVvNvNZrMWLVqkrVu3GiFB7BdCT3v6S6LEiP2SLGPGjHF+n06bNq1iYmKUJk0aY+6y+L4Q+/XXX/Xdd98l6vxvokSPcJg/f77++OMPVahQQYMGDbL4hjQoKEhDhw7VH3/8oYoVK6pZs2ZWKRbAs3l5ecnHx0fz58/X33//rWrVqun27dv67bff5OjoqO7du1vtXJ6ensqaNaumTJmiqKgoZc+eXWvXrjUeeXhRdnZ2Gjx4sLp3764mTZqoRYsWypAhg1asWKHTp0+rd+/exiSRAwYMUMuWLdWiRQu1adNGJpNJs2fPVvr06eM8S5cQPj4++vnnn7V9+3Z98803z+xXoEABffLJJ5o+fbpat26tevXq6f79+5o3b57MZrOxr8lk0jfffKPOnTurefPm8vX11YMHDzRr1qw4YUbv3r21b98+tWvXTh999JHy5s2rvXv3as2aNapevTpDn4G3UNOmTZ/7CNuCBQte6Hg1a9bU2bNn492WPXt2TZ8+/YWOBwCvow8++ECzZ8/Wzz//rMKFC8d57GzChAkKDAxUgwYNVKpUKZUqVUpr1qxR586djXkcYmJiNHnyZElSjRo1ElVH5cqVlSZNGk2fPl0+Pj7GaIu7d++qW7duMpvN2rJli7JmzapixYpp7dq16tKli/F77P379zVt2jRj/ojYcCQxjwa/KRIdOCxZskS5c+dWQEBAnMnm8uTJo/HjxxsztxM4AK/G6NGjNWPGDK1YsULDhw9XpkyZVKFCBXXv3j3eyQ4Ty9bWVtOmTdOPP/6oWbNmKXXq1PLx8dHAgQMT/ShV7dq1NXPmTE2aNElTpkyR2WxW/vz59dNPP1nM2lukSBHNmzdPI0eOVEBAgBwcHNSiRQuL/4i8iNy5c8vNzU3/+9//5OPj89y+ffr0Ub58+TR37lz9/PPPSpcuncqXLy8/Pz+5uroa/by8vDR9+nT5+/trzJgxcnJyUo8ePRQYGKg9e/YY/VxcXLR48WL5+/vr999/14MHD5QjRw517dpVn3766X/OfQEAAJAS2Nvba/z48frkk0/k6+ur+vXry93dXQ8fPtS2bdt04MABFS9e3PgCaNCgQWrbtq0++OADtWzZUk5OTlq/fr3279+vNm3aqGjRoomqI3PmzOrVq5eGDRumZs2aqXHjxkqVKpUWLFigGzduaNSoUcYKQwMGDFD79u3VrFkzffjhh0qTJo0WL16sBw8eqFevXsbxJOmPP/6Qvb29mjRp8kIrFL0JTOb4ZkBLgFKlSql58+b6+uuvn9nnu+++0/Lly3X48OFEFwgASLzYxzj69eunkydOJHM1QMpRrHhx/fHHH8ldRrzCwsJ0+vRpFSlShBXFUjjuhbfTmNmBCr4e+t8dX4FczunVo20pqx0vdvTuxo0bde3aNT1+/Fj58uVTw4YN1bZtW4v5Hc6ePauxY8fqwIEDioyMVMGCBdWqVSuLUWbLli1T//79NXXqVFWrVs1oDw4OVq1atdSxY8d4pwnYsGGDpk+frjNnzsjOzk6urq767LPP5OXlZdHv2LFjGjt2rA4fPiw7OzuVKFFCPXv2tBihEbuUptls1u+//x7vSmdvskQHDmXKlJGPj4+GDx/+zD79+/fXhg0bdPDgwUQXCABIPAIHIHkQOOBNwL3w9omJMcvG5vWaqPB1rAmvTqLH65YsWVKbNm2Ks6RIrCtXrmjjxo0qUaJEoosDAAAAACRMYj/Yh4eH69SpUwoPD7dyRYmvCW+HRAcOn376qe7fv6+2bdtq+fLlunLliu7cuaMzZ85o7ty5atWqlUJDQ9WhQwdr1gsAAAAAsCKz2azw8HAlcvA78EyJnpGicuXKGjhwoH788UcNGDDAYpvZbJatra0GDBigKlWqvHSRAAAAAADgzfJSU2C2adNG1atX18qVK3X27FmFhoYqXbp0Kly4sN5//33WegYAAAAAIIVKcOBw7dq1eNtTpUqlJk2aPHefHDlyJKI0AAAAAADwpkpw4FCzZk2ZTC8+4YfJZNKpU6deeD8AAAAAAPDmeuFHKtKmTauyZcvK1valnsYAAAAAAABvsQSnBq1bt9aGDRt0/fp1HTlyRDVr1lTdunVVuXJl2dnZJWWNAAAAAADgDZPgwGHQoEEaNGiQjhw5onXr1mn9+vX6/ffflT59etWqVYvwAQAAAAAAGF74uQgPDw95eHioX79+OnbsmP78809t2LBBK1asUPr06VWjRg3Vq1dPVapUkb29fVLUDAAAAAAAXnMvNRFDyZIlVbJkSfXp00cnT540Rj6sWrVK6dKlU40aNVS3bl15e3tbq14AAAAAAPAGsLHWgYoVK6aePXvqzz//1OLFi1WoUCGtXr1aXbt2tdYpAAAAAADAG8JqS02EhoZq69atWr9+vXbs2KHw8HDZ2dmpYsWK1joFACCRqlevrgIFCiT5ee7fv6+bN28m+XmA113BggWTuwQASDCTySQHBweZTKbkLiWOffv2qW3btvLz80vUl9njxo3T+PHjtWbNmiT5XSi2viZNmmjEiBFG++XLl5U7d+7n7uvm5qb69etr9OjRVq/rdfFSgUNISIg2bdqk9evXa+/evXr8+LHSpEmjKlWqyMfHRzVr1lT69OmtVSsAIJG++uqrV3Iec3SMTKmsNngOeKNFR0crVapUyV0GgBQksf+/4+DgoKJFiyZBRSnz/ws7dOggR0fHtzpISKgXDhyuXbumDRs2aMOGDTpy5Iiio6OVNm1a1a5dWz4+PvLy8pKDg0NS1AoASKS9I0bp/uUrSXoOx9wuqtCvZ5KeA4kXHh6uixcvKl++fPx3+hVJab9gA0h+qVKlUvfu3XXu3LnkLkXSk9FeY8eOTe4yklSaNGkkSalTpzbadu7cqfr16ydXSa+VBAcOAQEBWr9+vU6fPi1JypAhgxo2bCgfHx9WpACA19z9y1d059yF5C4DychsNis8PFxmszm5SwEAJKFz587p5IkTyV1GipE3b15JUv78+ZO3kNdUggOHMWPGyGQy6d1335W3t7cqVKggW1tbmc1m7dix47n71qpV66ULBQAAAADgWc6ePatRo0bp4MGDSp06tZo2bRrvaLOIiAhNnjxZq1at0t9//63MmTPL29tb3bp1U+bMmY1+bm5u8vPzk5OTk2bPnq2rV68qZ86catu2rVq1aiVJypgxo959910VLFhQwcHBxmffNWvWaM2aNZo9e7Y8PT0TVP8///yjli1bKjw8XLNnz1ahQoXUpk0bSVKXLl00evRonT59WhkyZFD9+vXVq1cvY4SFJF2/fl1jxozRtm3bdP/+fbm4uKhp06b65JNPlCpVKm3dulWdOnXSL7/8ogYNGhj7vf/++zp79qx27dqld999V5J0+/ZtVa5cWT169NDnn3/+gj+J//NCj1SYzWbdvHlTCxYs0IIFCxLU32QyGaMiAAAAAACwtosXL6ply5ZKnTq1Pv30U9na2mr+/Pm6c+eORb/Hjx/r008/1f79+9WgQQO1b99e586d08KFC7V7924tWrRIGTNmNPovX75cERERatWqlTJmzKh58+Zp6NChypUrl7y8vCRJu3btkiSFhYXpp59+Up8+feTu7q6WLVsmeKLKkJAQtWvXTg8fPtSsWbNUqFAhi2vr3LmzmjZtqmbNmmnjxo2aPXu27Ozs1KdPH0lPpj5o0aKFHjx4oJYtWypXrlzauXOnRo4cqRMnTmjs2LGqUKGCUqdOrT179hiBQ0hIiP766y9J0v79+41HQXbs2CGz2azq1asn4qfxfxIcOPj5+b3UiQAAAAAASAr+/v56/Pixli1bpjx58kiSmjZtqoYNGyosLMzot2zZMu3fv189evTQF198YbSXLVtWX375pSZMmKABAwYY7Tdv3tS6deuUI0cOSZKXl5dq1aqllStXGoFDrLRp06pRo0bq06ePcuTIoUaNGiWo9gcPHuiTTz7RnTt3NHPmTBUuXNhi+82bNzV69GgjDGjevLl8fHy0atUqI3D45ZdfdPPmTc2dO1dly5aVJLVq1Urffvut5s2bp40bN8rb21uenp7as2ePcey9e/fKxsZGTk5OcQKHHDlyxKnlRRE4AAAAAADeWDExMdq2bZsqVapkhA2S9M4776hhw4aaOXOm0bZx40alSZNGn3zyicUx6tevL39/f23cuNEicHB3dzfCBknKlSuXHB0ddevWLavUHh4ero4dO+rMmTNatGiRihQpEqePnZ2dateubby2sbGRm5ubNm/eLOnJSiCbN29W+fLljbAhVufOnS0CBy8vL23fvt1YtnPv3r0qUqSI8uTJowMHDkh68n7u2rVLdevWfenrY+0yAAAAAMAb6+7du3r48KFF2BDr3480BAcHK0eOHBarSjzd99q1a4qJiTHanJyc4vSzt7e36PMytmzZosDAQJnNZh0+fDjePhkyZJCdnd0za7hz547CwsLinbgyS5YscnR01NWrVyXJGJURO8ph37598vT0VOnSpXXu3DmFhITo+PHjunPnjmrUqPHS10fgAAAAAAB44z169ChO27+DAbPZ/MwVm2JiYmRraysbm//7mPz0v5NC2rRpjccoxo4dawQDT/uvGmKv53nXFRtYuLi4KH/+/NqzZ4/++ecfXbp0SZ6ensbElvv379eOHTvk4OCgChUqvMylPan9pY8AAAAAAEAyyZw5s9KnT69Lly7F2Xb58mWL17ly5dK1a9fiDScuXLigbNmyJVWZ8apevbo8PT317bffKjw8XEOGDHnhYzg5OSlt2rS6ePFinG03btxQaGioxXV5eXlpz5492rt3r2xtbVWmTBkVKlRI77zzjg4cOKDt27cbE0y+LAIHAAAAAMAby2QyqXbt2tq3b5+OHTtmtD948EArVqyw6FurVi1FRETo119/tWj/888/denSJes8RmBj88KPXLi7u6tFixbavn27Vq9e/UL7pkqVStWrV9f+/ft18OBBi20BAQGSpJo1axptXl5eunv3rubOnauiRYsqffr0kqRy5cppy5YtOn78uFXeB+kFl8UEAAAAAOB18+WXX2rHjh1q3769Pv74Y2XIkEELFy6M85hBs2bNtHLlSo0ZM0bnz59X6dKldf78eS1cuFC5cuVSly5dXroWJycnHTp0SAsXLlTVqlUtJp18nt69e2vjxo364YcfVKVKFWXKlCnB5+zVq5f27t2rDh06GMti7tq1S5s2bVKtWrVUq1Yto2/ZsmWVLl06HTt2TB07djTaPT099eeff0rSSy+HGYvAAQAAAADeEgULFkzuEgyvshZnZ2ctWLBAP//8s+bMmSNJeu+991SgQAF9//33Rj97e3vNmDFDkyZN0h9//KE///xTWbJk0Ycffig/P78X+pD/LL1799Yvv/yi77//Xt99950aN26coP0cHR3Vr18/ffXVV/rxxx81fPjwBJ8zV65cWrJkicaMGaPly5cbk2j269dPbdu2lclkMvra2dmpcuXKWr9+vcqXL2+0x87jULRoUTk7Oyf43M9jMj9rZgkAwBvv+PHjkqS/J83QnXMXkvRcmQvml8/E0ZKerBc9btw4bdu2Tbdv31bGjBlVsWJFde/eXS4uLsY+ixYt0oQJExQaGqry5ctryJAhcf4Dd/78eTVo0EATJkywGA6IFxMWFqbTp0+rSJEiSps2bXKXg2TEvYBY3Atvn+joaKVKlSq5y7DwOtaEV4c5HAAAVnXz5k198MEHWrhwoQoUKKA2bdqoRIkSWr16tZo3b25M6HTixAl9/fXXcnZ2VrNmzXTo0CH5+fnFGfo4duxYlShRgrABAID/kNgP9uHh4Tp16pTCw8OtXFHia8LbgUcqAABWNW7cOP3999/q16+f2rdvb7T//vvv6tOnj0aMGKGAgAAtXbpUGTNm1OzZs5UmTRqVKlVKPXv21MmTJ1W8eHFJ0smTJ7V+/XpNnz49uS4HAIC3ntlsVnh4+DOXVQQSixEOAACr2rhxo5ycnPTxxx9btDdq1Ei5c+fWzp07FRMTo+DgYOXNm1dp0qSRJBUuXFiSFBwcbOwzZswYlStXTpUrV351FwAAAACrYIQDAMBqoqOj1alTJ9na2srGJm6mbW9vr8ePHysqKkqOjo76+++/jW0PHz6UJGXIkEGSdOjQIW3fvl1z5859NcUDAADAqggcAABWkypVqjgjG2KdP39eFy5cUO7cuWVvby93d3etXr1aGzZsUMWKFTVz5kw5ODioSJEikp6MbqhSpYrKli37Ki8BAAAAVkLgAABIcjExMfruu+8UExOjFi1aSJJatGih5cuXy8/PT5JkY2OjgQMHysnJSbt27dL+/fu1ZMkSi2PEN2oCAAAArycCBwBAkjKbzRo8eLD27Nmj4sWLGyMgUqdOrQULFmjjxo26ceOGypUrp2LFikl6MrrB29tbJUqU0LFjxzRgwACdO3dO+fLl05AhQ4x1ogEAAPD6InAAACSZqKgoff3111q2bJlcXFw0ceJE2dvbG9vt7e1Vv359i302btyo48eP6/fff9fjx4/VtWtX5cmTR1OnTtWSJUvk5+enjRs3KmPGjK/6cgAAAPACGJsKAEgS4eHh6ty5s5YtW6a8efNq9uzZcnZ2fu4+ZrNZY8eOVf369eXm5qbt27frn3/+0cCBA1W1alUNHTpUDx8+1KpVq17RVQAAACCxGOEAALC6e/fuqWPHjgoMDFTRokU1bdo0vfPOO/+53x9//KHz58/L399fkhQUFCRJyps3ryQpY8aMypw5sy5fvpxktQMAAMA6GOEAALCqiIgIderUSYGBgSpfvrzmzJmToLAhKipK48aNU6NGjZQvXz6j7em/Y49vMpmSpngAAABYDYEDAMCqRo0apSNHjsjDw0NTp05V+vTpE7Tf8uXLdfXqVXXp0sVoy58/vyQpMDBQ0pOlNR88eGCMeAAAAMDri0cqAABWc/PmTc2dO1fSk7Bg6tSp8fb77LPPlDp1auN1ZGSkJk6cqObNmytXrlxGe5UqVZQtWzb16dNH7733njZv3qxMmTKpYcOGSXshAACkICaTSQ4ODq/dCMIvv/xSa9as0ebNm5UzZ06LbQEBARo9erSyZs2qHTt2xNm3Q4cO2rdvnw4ePKg0adK8qpLxLwQOAACrCQwM1OPHjyVJS5cufWa/jz/+2CJwWLhwoW7fvq0vvvjCol+aNGk0ZcoUDR48WHPnzlXBggX1008/JXjUBAAAKYk5OkamVC8+iN3BwUFFixZNgooSX5MklS9fXmvWrFFgYGCcwGH37t2ys7PTjRs3dP78eRUoUMDYFhMTo6NHj8rd3Z2wIZkROAAArMbb21tnz5594f3atGmjNm3axLvNzc1NCxcufNnSAAB465lS2WjviFG6f/lKcpciSXLM7aIK/Xomev/y5ctLko4ePWqxjPajR4905MgRNWzYUMuWLdPu3bstAoe//vpLoaGhqlChQuKLh1UQOAAAAADAW+L+5Su6c+5CcpdhFQUKFFCWLFmMuZxiHTp0SJGRkWrWrJn27dun3bt3W3xxcfjwYUkicHgNMGkkAAAAAOC1VK5cOZ06dUqRkZFG2549e5Q2bVqVKlVKnp6e2r9/v6Kjo43thw8floODg0qVKmWMopw4caJKly4tT09P7d69W5J0/fp19e/fX5UqVVLx4sVVr149TZ061eJY+/btk5ubm7Zt26YffvhBVapUUcmSJeXr66t9+/bFqXfu3LmqV6+eSpYsqYYNG2r9+vVq167dM0dyvu0IHAAAAAAAr6Xy5csrMjJSZ86cMdp2796tcuXKyc7OThUrVlRoaKiOHz9ubD98+LDKlCkjOzs7SdKJEye0ePFi9erVS82aNVPJkiV17do1NWvWTGvWrFGjRo3Uv39/5c2bVyNHjlTPnnEfA/n222918OBBffbZZ+rSpYsuXryozz77THfu3DH6jBo1SkOHDlWOHDnUt29fubu7q0ePHjp16lQSvkOvNx6pAAAAAAC8lp6ex6FkyZK6e/euTp8+rT59+kj6v8cmdu/eLXd3d924cUNXr17Vhx9+aBwjLCxMEyZMUKVKlYy2b775xlhdq2zZspKkVq1a6dtvv9W8efO0ceNGeXt7G/3TpUunhQsXGiFGlixZ1L9/f23YsEEtWrRQcHCwpk+fLm9vb40fP95Y8SN//vwaMWJEEr5DrzdGOAAAAAAAXkv/nsdh3759iomJUcWKFSVJWbNmVYECBYzHG+Kbv8HW1tYIFSQpOjpamzdvVvny5S3aJalz586SpI0bN1q0+/j4GGGDJGNVj5s3b0qSNm/erKioKH3yyScWy4u2atUqRa+uReAAAAAAAHhtlStXTkePHpX0ZCRD5syZ5ebmZmyvWLGisTT34cOHlSFDBhUrVszYniFDBtnb2xuv79y5o7CwMOXPnz/OubJkySJHR0ddvXrVot3JycnidWz4EBMTI0kKCgqSJOXLl8+in729vVxcXF70kt8aBA4AAAAAgNdW+fLlFRwcrJCQEO3du1cVKlSwGEVQsWJFhYeH69SpUzpy5IjKlSunVKlSGduf/rckmc1mi7//LSYmxmI0gyTZ2Dz/o/Pjx48lySLYiJU6dern7vs2I3AAAAAAALy2Yudx2Llzpy5dumQ8TvH0dhsbGx09elSnT5/+z+UwnZyclDZtWl28eDHOths3big0NFTZsmV7oRrz5MkjSXGOaTabjdEPKRGBAwAAAADgtRU7j8O8efMkKU7g4OjoqKJFi2rlypV6/PjxfwYOqVKlUvXq1bV//34dPHjQYltAQIAkqWbNmi9UY+3atWVjY2PUGGv16tUWK1mkNKxSAQAAAAB4rZUrV05r1qxRzpw5lTt37jjbK1asqKlTp+qdd96Rq6vrfx6vV69e2rt3rzp06KCWLVsqV65c2rVrlzZt2qRatWqpVq1aL1Rf7ty51a5dO82YMUMhISGqVq2aLly4oEWLFsV5PCMlIXAAAAAAgLeEY+7XZ4JCa9ZSvnx5rVmz5pmjF2IDh/Lly1vM7/AsuXLl0pIlSzRmzBgtX75cDx8+VJ48edSvXz+1bds2Qcf4t6+++kqZM2fWokWLtGvXLuXLl0/+/v76+uuv453bISUwmZ81UwYA4I13/PhxSdLfk2bozrkLSXquzAXzy2fi6CQ9BxIvLCxMp0+fVpEiRZQ2bdrkLgfJiHsBsbgX3j7m6BiZUr1eT82/jjUlhbCwMJnNZqVLl86i3Ww2y93dXXXq1NFPP/2UTNUln7f/Jw8AAAAAKUBiP9jHrvAQHh5u5YoSX9Ob5tSpUypdurSWLl1q0b5582Y9evRIJUuWTKbKkhePVAAAAABACmY2mxUeHv7MZSLx30qVKqW8efPqhx9+UFBQkFxcXBQUFKT58+erQIECatasWXKXmCwIHAAAAAAAeAl2dnaaPXu2JkyYoFWrVunWrVt655131KRJE3Xt2lUODg7JXWKyIHAAgBTgVUwg9TpNUgUAAPCqOTs7a+jQocldxmuFwAEAUoAK/Xq+kvOklImhAAAA8N/4rRAA3nKRkZFJMglUfAgbAAAAEIvfDAEgBWASKAAAALxqBA4AAAAAAMDqCBwAAAAAAIDVETgAAAAAAACrI3AAAAAAAABWR+AAAAAAAACsjsABAAAAAABYHYEDAAAAAACwOgIHAAAAAABgdQQOAAAAAADA6ggcAAAAAACA1RE4AAAAAAAAqyNwAAAAAAAAVkfgAAAAAAAArI7AAQAAAAAAWB2BAwAAAAAAsDoCBwAAAAAAYHUEDgAAAAAAwOoIHAAAAAAAgNUROAAAAAAAAKsjcAAAAAAAAFZH4AAAAAAAAKyOwAEAAAAAAFgdgQMAAAAAALA6AgcAAAAAAGB1BA4AAAAAAMDqCBwAAAAAAIDVETgAAAAAAACrI3AAAAAAAABWR+AAAAAAAACsjsABAFIAk8mU3CUgmZlMJjk4OHAvgHsBBu4FxOJeQFIxmc1mc3IXAQBIGsePH5cklShRIpkrAQAAeDvFxJhlY0NYEx/b5C4AAJD0xswOVPD10OQuAwAA4K2Syzm9erQtldxlvLYIHAAgBQi+HqqLwfeTuwwAAACkIMzhAAAAAAAArI7AAQAAAAAAWB2BAwAAAAAAsDoCBwAAAAAAYHUEDgAAAAAAwOoIHAAAAAAAgNUROAAAAAAAAKsjcAAAAAAAAFZH4AAAAAAAAKyOwAEAAAAAAFgdgQMAAAAAAFb2448/ys3NTfv27Yuzbfv27WrTpo08PDzk6empDh066NixY3H6bdq0ST4+PvLw8FDbtm11/vz5OH1CQkJUunRpzZkzJ0mu42UQOAAAAAAAYEXHjh3TrFmz4t22aNEidezYUZcuXVLz5s1Vs2ZNHThwQC1btrQIHa5fv64ePXrIzs5Ovr6+CgoK0qeffqpHjx5ZHG/q1KlydHSUr69vkl5TYtgmdwEAAAAAALwtIiMjNWDAAEVHR8fZdu3aNQ0bNkwFChTQb7/9JicnJ0nShx9+qA8//FAjR47U7NmzJUmrVq1SVFSUZsyYIWdnZ9WtW1e+vr7atm2b6tSpI+lJKDFv3jwNHDhQ9vb2r+4iE4gRDgAAAAAAWElAQIAuXbqkSpUqxdm2ZMkSPXr0SIMGDTLCBkkqVaqUPv30UxUpUsRoCw4OlpOTk5ydnSXJ2BYcHGz0mTRpkrJmzaqmTZsm1eW8FEY4AAAAAABgBWfOnNGUKVPUqVMn3b9/X7t377bYvn37dmXMmFEVKlSIs2+vXr0sXmfMmFFhYWEym80ymUwKDQ2VJGXIkEGSdOXKFS1ZskTDhg2Tre3r+dGeEQ4AAAAAALyk6OhoDRw4UHny5FGnTp3ibDebzTp//rzy58+vmzdvqm/fvqpQoYLc3d3VoUMHnT592qJ/qVKlFBYWpjlz5ujhw4eaNm2aTCaT3N3dJUkTJkxQnjx51LBhw1dxeYnyesYgAAAAAAC8QaZPn65Tp05p3rx58c6n8ODBA4WFhSkiIkIffPCBHBwc1KBBA928eVMbNmxQy5YtNXv2bJUoUUKSVLNmTdWoUUPDhg3TsGHDJEkdOnSQq6urzp8/r5UrV2rUqFGysXkyjiAmJsb49+uCwAEAAAAAgJdw8eJFjR8/Xi1btpSHh0e8fcLDwyVJp06dUsWKFRUQEKA0adJIerL8ZefOnTV48GAtX77c2GfSpEnavHmzgoKCVLJkSZUtW1aS5O/vLzc3N9WpU0dBQUHq06ePAgMDlS1bNvXt21f16tVL4itOGAIHAAAAAAASyWw2a+DAgXrnnXfUs2fPZ/YzmUzGv/v162eEDZJUq1YtlS9fXvv379elS5eUN29eY59atWpZHOf06dNat26dAgICZDKZ1Lt3bz169EiTJ0/Wtm3b1Lt3bxUuXFj58uWz7oUmwus13gIAAAAAgDfI3LlzdejQIQ0ZMkTp0qV7Zr/YyR7t7Ozk6uoaZ3vsKhSXL19+7vnGjBkjd3d3Va9eXWfOnNGxY8f05ZdfysvLSwMGDJCjo6MWLVr0EldkPYxwAAAAAAAgkdatWydJ+uyzz+Ld3rZtW0lPHpvImjWrbt26pejo6DjzLURFRUmSHBwcnnmuI0eOaOvWrZo5c6YkKSgoSJKUJ08eSZKtra1cXFz+M7R4VQgcAAAAAABIpCZNmqh8+fJx2nfs2KHAwEA1adJEOXPmlKOjo8qWLas1a9bowIEDqlSpkkX/kydPytbWVgUKFHjmuUaPHi1PT09VrFhR0pOVMZ7+W5IiIiIsHt9ITgQOAAAAAAAkUtOmTeNtv3//vhE4eHp6SpJatGihNWvW6Oeff9acOXOUPn16SdKaNWt09OhR1a5dW05OTvEeb8+ePdq3b5/mz59vtMXO0xAYGChX1//X3p3H5ZT+/wN/VUoqWoxlVMZ632hTWlCJZBljn6YslW3sWceEGQzG4ENjqTGGbIPsLXajGBSyhFJGiMhaU0ILbef3R7/7fN3uIubOjV7Px6PHTNe5zjnvc7rudN7nWiR48uQJUlJS4OzsrMxLfGdMOBARERERERG9B23atIG3tzc2bdqEHj16oHPnznj48CEOHz6Mzz77DNOnTy9z36VLl6Jdu3awsbERy5o3bw4zMzMsWLAASUlJOH/+PIqKiuDp6fk+LueNOGkkERERERER0XsyY8YMLFiwAIaGhti6dSvOnj2L7t27Y8eOHTA2Ni51n6NHjyIuLg4TJ05U2LZixQpYW1tj27ZtyMvLQ0BAAExNTSv4KspHTRAEQdVBEBFRxbh8+TIA4M9DT3Hr7lMVR0NERET0aWloUgP+3zuqOowPFns4EBEREREREZHSMeFARERERERERErHhAMRERERERERKR0TDkRERERERESkdEw4EBEREREREZHSMeFARERERERERErHhAMRERERERERKR0TDkRERERERESkdEw4EBEREREREZHSMeFARERERERERErHhAMRERERERERKR0TDkRERERERESkdEw4EBEREREREZHSMeFAREREREREREpXRdUBEBFRxbNuXgvGtXVVHYZK5eQVIOtZvqrDICIiok+ISR09VYfwQWPCgYioEhjYXaLqEFSuqKgIGhoaqg6DiIiIPjHFxQLU1dVUHcYHiQkHIqJKYMKECbhx44aqw1CZJk2aYPny5aoOQ6Xy8vJw69YtNGzYENWqVVN1OKRCbAskw7ZAMmwL/w2TDWVjwoGIqBK4ceMGEhMSVB0GqZAgCMjLy4MgCKoOhVSMbYFk2BZIhm2BKgonjSQiIiIiIiIipWPCgYiIiIiIiIiUjgkHIiIiIiIiIlI6JhyIiIiIiIiISOmYcCAiIiIiIiIipWPCgYiIiIiIiIiUjgkHIiIiIiIiIlI6JhyIiIiIiIiISOmYcCAiIiIiIiIipWPCgYiIiIiIiIiUjgkHIiKqdJYtWwapVFrq16RJk8R6O3bsgIuLC1q1aoXRo0fj0aNHCsdKTk5G8+bNcfTo0fd5CUREREQfvCqqDoCIiOh9u3r1KrS0tDBixAiFbU2bNgUAJCQkYObMmbCyskKXLl0QHh4OX19f7NixA2pqamL95cuXw8LCAq6uru8tfiIiIqKPARMORERU6Vy7dg1NmjTBuHHjyqwTEhICfX19bNy4Edra2rCyssLkyZORmJgIc3NzAEBiYiIOHz6MtWvXvq/QiYiIiD4aHFJBRESVSnZ2Nu7duwepVPraenfv3kWDBg2gra0NAGjWrJlYLrNs2TLY2dnB0dGx4gImIiIi+kh9cAmHwMDAMsfVyr5CQ0NVGmNmZiYWLlyILl26wMLCAra2tvD09MSff/6JgoICubrTpk2DVCrFixcvlBrDnTt33lhHdu7XfZ05c6ZC43wTb2/vN8YolUoxbdq09xpXeXl7e7/xQePMmTPidSxdurTMevHx8WK9EydOKC1GV1dXeHh4vPV+FdkmXh0nD5Q8xBUVFYnfl+felsfx48cxevRouLi4wNLSEl26dMGsWbNw8+bNch9D9jPcunXrf47nXZXnM0/lc/XqVQB4Y8KhRo0ayM3NFb/PyckBAFSvXh0AEBsbixMnTmDChAkVFCkRERHRx+2DHVIxatQoNGrUqNRtNjY27zma//Po0SN4eHjg+fPn6Nu3Lxo0aIDc3FzExMRg/vz5OHr0KNasWQNNTc0Ki2HWrFlISkrC9u3by1V/+vTpMDQ0LHVb48aNAQCenp5o06ZNhcZdmlGjRsHd3V38PjY2Ftu3b4enpydatWolltevX/+9xlVRjhw5ovCgLXP48OH3HI3qLFq0CMbGxuL3ISEhmDt3Ls6ePQsNDQ2lnKOgoACzZ8/Grl27YGFhgYEDB8LQ0BC3bt1CWFgYQkND8fPPP6NPnz5KOV9FGjZsGGrUqPHahBWVX1JSEoCS5PGQIUOQkJAAAGjTpg0mTpwo/tvTsmVL7Nu3DxEREWjTpg02bNiAatWqoXnz5gBKejc4OTnB1tZWNRdCRERE9IH7YBMObdu2hYODg6rDULBixQpkZmZi7969aNCggVg+ZMgQLF26FH/88Qd2794t9xCtbNHR0fjss8/KXd/NzQ0mJiavrWNtbQ1ra+v/Gtpbe/UNdlFREbZv346WLVuiV69e7z2eimRqaorr16/j9u3b+OKLLxS2R0REwMjICJmZmSqI7v169Wd77tw5PH/+XKnnCAgIwK5duzBp0iSMGjVKbtuYMWMwduxYTJs2DXXr1kWbNm2Uem5li46ORrdu3VQdxidDlnBYt24dXF1d8c033yApKQl//fUXTp06hU2bNqF58+bw8PBAWFgYfH19AQDq6ur48ccfYWRkhJMnT+Ls2bPYtWuXeNzi4mKoq39wHQeJiIiIVIZ/Gb2lCxcuoH79+nLJBpnBgwdDTU0NFy5ceP+B0QevU6dOAEoSC69KSkpCSkqKWIf+mzt37mDt2rXo0qWLQrIBAPT09BAQEABDQ0P89NNPEARBBVGSqmhoaMDY2Bjr1q1DYGAg/Pz8sHbtWixevBjPnj3DDz/8AACoWrUqtm3bhqVLl2L69OnYtWsXvLy8AJT0bnBzc4OFhQXi4+PRvXt3tGjRAl9++aU4VI2IiIiosvvoEw6PHj3C9OnT0bZtW5ibm6N79+4IDg4WtwcHB0MqleLixYtiWXFxMezt7WFtbY3CwkKx/PLly5BKpdi7d2+Z59PT00NKSgpOnz6tsM3Q0BDx8fGYP3++wrarV69i6NChaNmyJRwcHDBt2jRkZWXJ1Xn69CnmzZsHFxcXmJubo2PHjvj111+Rl5cn1pFKpbh37x7i4uKUOp/Fq+P1ZXNp3L17F76+vmjVqhVsbGzg6+srN2EaAOTn5yMwMBCdOnWCubk52rdvj4ULFyI7O1spscnIYkpOTpYr37p1q9x8FHfv3oVUKsXOnTuxYsUKdOjQARYWFujZsycOHTqkcNwTJ05gwIABaNmyJWxsbDB8+HAkJiYq1IuMjMTXX38tLpEXFhb2VvHXr18fEokEkZGRCtsOHz6MOnXqwMrKSmFbcXEx/vzzT3z11VcwNzdHmzZt8P333+PevXsKdXfu3Inu3bvD0tISPXv2xPHjx0uN5ebNmxg/fjzs7e1haWmJvn374sCBA291PceOHYNUKsW+ffvkynv27AmpVIp///1XLMvIyECzZs3wxx9/AJCfw8Hb21u8l5aWlgrzdZw5cwb9+vWDpaUlnJycsGDBgjf2hti/fz+KioowaNCgMuvo6+vD3d0dt2/flvv9kJWVhVmzZsHR0RHW1tYYP3480tPTFfaXSqXw9/fH+PHjYWFhgfbt2+PJkycASn4O/fr1g42NjfhZ/t///ic3H0Z5PmOytgwABw4ckGvn9O5++uknHD16VKEXXc+ePWFnZ4crV66Ic3xoaWmhW7duGDx4MMzMzACU/C64fPkyxo8fj4KCAowbNw5GRkYICgqCRCKBr6+v2BaIiIiIKrMPNuHw7NkzZGZmKny9/Ad7eno6PDw8cOzYMfTr1w/Tp09H/fr1MXfuXMybNw8A0K5dOwCQSxAkJibiyZMnyM3NxZUrV8TyEydOQENDQ9ynNB4eHigsLMTgwYPRv39/rF69GpcuXRITF1paWqXuN3jwYNSpUwfTp0+Hi4sLwsLCMHXqVHH706dP0b9/f2zZsgXt27fHDz/8gFatWmH16tUYOnQo8vPzAZSMfTc0NET9+vWxaNEi2NnZvfFePn36tNR7KTvm6/j4+EBNTQ3ff/89+vbtiyNHjmDixIni9uLiYowePRqrVq2Co6MjfvzxR7i6umLz5s0YPHhwuc5RUVauXInw8HB4eXnhu+++w5MnTzBx4kRcu3ZNrBMeHo4RI0ZATU0NkydPxogRI5CSkoL+/fvL9VTZt28ffH19UVxcjMmTJ6NTp06YPXu2OPa7vDp37oy4uDi5h3GgJOHQuXNnqKmpKezz/fffY/78+TA2Nsb06dPRt29fREZGwt3dHampqWK91atXY8aMGTAyMoKfnx9sbW3h6+urcK7r16/Dw8MDCQkJGDZsGKZMmQJdXV1MmjQJGzZsKPe1tG7dGlWrVpX7bGVmZor39+zZs2J5VFQUBEFA+/btFY4zatQocQz8/Pnz4enpKW578uQJRo0aBQsLC/zwww8wMzPDhg0bsGjRotfGdv78eWhqasLS0vK19WRDKc6fPw+gJHnm4+ODkJAQdOvWDZMnT8a///6LmTNnlrp/cHAwMjIyMGPGDHh4eEBfXx+BgYGYMWMGPv/8c/j5+eH777/H559/jnXr1mHx4sUKx3jdZ8zIyEi81pYtW2LRokXivCtUMVq0aAEAColVGUEQsHz5cnTr1k2c4PXhw4f48ccf4ezsjLlz5yInJ+e1iWsiIiKiyuKDncNh7NixpZZPnz4dgwcPBgAsWbIE2dnZ2L17tzhHwcCBAzF//nz8+eefcHd3R7NmzdC4cWOcPn0aY8aMAQDExMTA0NAQOTk5OHPmjPhQEhUVBWtra+jr65cZV9++fZGVlYXly5fjwoUL4kNp9erV4ebmhrFjx8LU1FRhv2+//Va8Jk9PTzx8+BBRUVHIycmBrq4u1qxZgxs3bmDx4sXo2bMnAGDAgAFo2rQp/P39sXXrVgwaNAi9evXC8uXLYWhoWO45DsqaFG/FihVwc3N77b7Ozs6YM2eO+H12djbCwsKQkpKCBg0aYM+ePYiOjsZvv/0mNxzA0dERY8aMwfbt2+Ht7V2uOJXtxYsXOHTokDijfPPmzeHj44P9+/dDIpEgOzsbP//8Mzp06ICVK1eK+3l5eaFnz56YN28eQkNDUVxcjIULF6Jp06bYtm0bqlatCqDk3vj4+EBHR6fcMXXq1Am//fYbjhw5Ij5Yp6Sk4Nq1a5g5c6bCQ050dDT27duHb775RkyiASWJC09PTyxcuBArVqxAVlYWVqxYAScnJwQFBYnjyM3NzTF9+nS5Y/7888/Q09NDeHg4atSoAaCkl8H48eOxZMkS9OzZE0ZGRm+8Fm1tbTg4OMglHGJiYqCurg4jIyOcPXtWnHcgKioK9erVE5cVfJmjoyP27t2L8+fPo3v37uL9BUomfpw3bx569+4NoCTh17VrV/z111+YNWtWmbGlpaXBwMDgjZOg1q5dW6wPlExemZSUJPc57N+/P4YPH45Tp04p7C8IAlatWgU9PT0x3j///BMdOnSQm+Bx4MCB6NixI6KiohSO8abPWK9eveDn54d69ep9cvOaqEJhYSGuXLkCQRBK7VEk6z3zcjt82f79+5GcnIyAgAAAwO3btwFAHGanr68PQ0NDripCREREhA+4h8PUqVOxfv16ha8uXboAKHmzHhERAWtra+jo6Mi9ue/cuTOAki7fAODi4oJLly6JQxNiYmJgb2+PFi1a4Ny5cwBK3qTGx8eX+gb2VUOHDsXx48cxb948dOnSBQYGBnj27BnCwsLQo0cPxMbGKuwje3iRsbCwQFFRkThBYGRkJExMTNCjRw+5eoMGDYKenl6p3fDLa/HixaXey/Ks9vHqRHWy2dllb80PHToEPT09tGrVSu5nIEvc/P333+8c93/l7OwsJhuA/3tzKesef+rUKWRnZ6NLly4KPT9cXFyQmJiIR48eITExEenp6ejbt6/cQ4iDg4N4zPJq1qwZ6tevL/fzPHz4MD777LNSZ7qX1Rs9erRcuZWVFRwdHXHixAnk5+cjJiYGz58/h6enp9ykdb1795ZboeTx48c4e/Ys2rVrh8LCQvGaHz9+jM6dO+PFixc4efJkua/HxcUF9+7dEx+uYmJi0Lx5c9jb24ufreLiYpw8eRIuLi7lPq5MlSpV5Nqguro6WrRogYyMDLklNN+V7F7JeigdO3YMNWrUQPfu3eVikI3bf5WZmZmYbAAATU1NnDx5Ev7+/nL1MjIyUKNGDXFZxZe96TNGylVcXIwBAwZg+PDhCm1IEARcvHgRVapUEX8OLyssLERgYCB69eqFhg0bimUv/xcoSXaW1luJiIiIqLL5YHs4mJmZvXaVisePH+PZs2eIiooqc4b5+/fvAyh5KFq3bh1iY2Ph4OCACxcuYMqUKbh79y527NiBoqIinDx5EkVFRejQoUO54jMwMMA333yDb775BsXFxYiLi8PatWsRERGBWbNmYf/+/XL1a9asKfe9trY2AIhDDu7evQt7e3uFP1K1tLRgampa6nj98rKxsXnjKhVleTVu2ZAR2R/qd+7cQXZ2dpk/A1ncWVlZKCgokNumr69f5hAUZXj1Lb3sXMXFxQD+783ky0NbXnX//n08fPgQQOlLczZq1Eh8O15ebm5u2Lx5M7Kzs6Gnp4fDhw/Dzc2t1Nnt7969C21tbbklJGUaN26M6OhopKWliT0jXo1RXV0dDRo0EK85NTUVgiBg+/btZS6rKvvclIeLiwt+/vlnnD59GvXr18eZM2fQsWNH1KtXD/v370dmZiZSU1Px+PHjcn+2Xla9enWFNqKtrQ1BEFBYWFjmEpq1a9fGrVu3kJ+f/9o2JvvZyVZ9uXv3LkxMTBR+FmUNY3j18wGUtLPo6GhERETg1q1buHPnjphYLG11mTd9xki5tLS00KFDBxw+fBirV6+WS+atW7cO165dQ+/evcXePy8LCwvDvXv3sHbtWrFMtoRmXFwc2rZti+TkZDx79qzUiYWJiIiIKpsPNuHwJrI/xl1dXcvssi/rLt2qVSvo6enh9OnT0NbWRm5uLhwcHMRZyq9cuYKoqCiYmpqiSZMmZZ7zxo0bCA0NxZdffgkLCwuxXF1dHdbW1vjtt9/g4+ODM2fOICsrCwYGBmKdsh6MZARBKHOm/OLi4jd2Da8ob3pLV1RUBGNjY7nu/i+T9QgYN26c3Jh+ANi4caNSlj6VPUy/6k3L08n2mzVrlvi28lWNGjXCo0ePAEBu/hCZd1ndoHPnzli3bh1OnDgBa2trXL58GZMnTy61rqxdCIKg8LOQxa+pqSlue1OMss+Np6cnunbtWuo5SxsSVBZTU1M0atQIp0+fhouLC1JSUuDg4IB69eoBKJnH4caNG6hWrRpat25d7uPKvOlzUxZbW1tER0cjLi7utfOcyHphtGrVCkBJey9tQsqy2tir8QmCgHHjxiEiIgJWVlawsLBA3759YW1tjTlz5iAlJUXhGHwT/v5NnToVFy9exLJly3D27Fk0a9YMCQkJOHv2LJo0aaIwcSlQkhz+/fff4e7uLpfAdXJyQt26deHn54evvvoKR48ehYGBgUJvNSIiIqLK6KNNOBgZGaFatWrIz89H27Zt5bZlZmbi3Llz+OKLLwCUPJC1bdsWp06dgp6eHj777DM0adIEdevWhYaGBs6ePYuoqKgyH8BksrKysHbtWgiCIJdweFnTpk1x9uzZMsf/lsXExAS3bt1SeLDMz8/H3bt3yzyfqpmYmODChQuws7NTSIocOHBAfMs3depUPH36VG57aeP5X0eWQHi1p0RpKwiUh6zXgL6+vkIbunTpErKzs6GtrS0+gN+6dUvhGO8yTrtly5aoXbs2IiMjkZ6eDgMDA9jb25da18TEBNHR0bh3755CL5Vbt25BS0sLRkZGcjG+PC5dEASkpqaK+77cU+LVa05NTUVSUhKqVav2VtcjmwQ1JiYGVapUERN8NWvWxLlz53D58mVxgsn3pXv37vjtt9+wYcMGuYTDhQsXsGfPHgwePBifffYZduzYAWNjY3E4i4mJCWJiYhR6Rrw8OefrnD9/HhERERg6dKhCzxkOkfhwmJiYICQkBMuXL8eJEydw7tw51K5dG0OHDsWYMWPkhmLJbN++HRkZGQrDm7S1tbF69WrMmjULwcHBaNKkCRYtWiQ31IaIiIiosvpg53B4kypVqsDFxQWnTp3CpUuX5LYFBARg/PjxuHHjhljm4uKCf/75B5GRkeIDiJ6eHlq0aIGtW7ciPT39jfM3WFtbo379+ti2bRvi4+MVtmdkZCAiIgKOjo5v/dDWsWNH3Lt3T2Fm802bNiEnJ0euO7q6unqZb1zfN1dXV+Tm5iqsbnDgwAFMmjRJXDLR3Nwcbdu2lft63eScpalVqxYAyK0skp+fj4iIiHeK3dHREdra2li7dq3cahpZWVkYP348pk+fDg0NDbRo0QLGxsbYtm2b3FKfFy9exOXLl9/6vGpqanBzc8Px48dx6NAhuLm5oUqV0nN/HTt2BABxOUmZ+Ph4nDx5Es7OzmJCTVdXFxs3bpS7lgMHDiAjI0P8vnbt2rCwsMDevXvlHqIFQcDPP/+MsWPH4vHjx291PS4uLsjKykJwcDBatGghPmjZ2dnh77//xuXLl984nEKWTFJWuzY1NcW3336LyMhI/P7772Ivj2fPnuHEiRPo1asXfHx8kJ6ejmnTponJss6dOyMvLw+bNm0SjyUIgtz3ryNb6vbVnlLHjh1DSkqK3Dj/t/EhfeY/FXXq1MH8+fMRHR2NhIQEHD16FFOnTi012QCUTKwaHx+POnXqKGyTSqXYvn07EhISEB4eDmtr64oOn4iIiOij8NH2cACAKVOm4MyZM+ISlQ0aNEBMTAwOHDiA9u3bw9nZWawrW+oyISEB7u7uYrmDgwPWrFkDHR2dMt8yy2hoaGDJkiXi+bp06QIbGxtUrVoVN2/eRHh4ONTV1TF79uy3vpYRI0YgIiIC06ZNQ2xsLKRSKeLj4xEeHg4LCwsMGDBArGtkZITr168jODgYDg4Orx0GUtG++eYb7NmzB/7+/khKSoKtrS1u376N4OBgGBsbY9iwYUo7V6dOnfDLL79gwYIFSEtLQ/Xq1RESEvLOY90NDQ3x3Xff4ZdffsHXX3+N3r17Q0NDA9u2bUNaWhqWLFkiJgJ++uknjBkzBu7u7vD09MSzZ8/w559/lms1h9J07twZW7ZswYULFzBq1Kgy67Vr1w5ffvkldu7cifT0dLRr1w4PHz7E5s2boa+vL75F19PTw7Rp0zBz5kwMGDAAvXr1wv3797Flyxa5oT0AMHPmTPj4+MDd3R0DBw5ErVq1EBkZiejoaPTv3x9NmzZ9q2uxtbWFrq4u4uPjMXz4cLHcwcEBhw4dAoA3JvNk93HFihVwdHQsc06QtzFu3DhxRZkjR46gS5cuMDIyQqdOnbBx40YkJiaiXr16cpMD9u7dG6GhoVi8eDFSUlLQrFkzHDlyBP/880+5zmljY4MaNWpg8eLFSEtLQ82aNREXF4fw8HBUrVoVubm5pQ6PeRMjIyPExsZi+/btcHZ2FoesEBERERF9yD7aHg5AyVvMnTt3olOnTti9ezfmzZuHK1euYNy4cVi+fLncGP7atWuLKwq8nFiQzSHg6OhYrgkMLSwscODAAXh5eeHatWtYsmQJ5s6diyNHjqBnz57Yu3fvW42Bl6lRowa2bdsGT09PHD16FPPnz8eFCxcwevRobN68WS62cePGwdDQEAsWLHjnt/vKoqWlhfXr12PEiBG4dOkS5s2bh4MHD6J79+4IDg4udVK9d2VoaIg1a9agcePGWLFiBVauXIk2bdrgp59+eudj+vj44LfffoOuri4CAwOxYsUK1KxZE6tWrZJbPcDFxQVr166FkZERli1bhrCwMEycOBGOjo7vdF47OzsYGBigRo0aCkMbXvXrr79iypQpuHPnDhYsWICwsDB07doVYWFh4rAhoGTJyICAABQVFWHx4sU4evQoFixYIE5qJ2NlZYXt27fD1tYWmzdvxsKFC5GWloYff/wRM2fOfOtr0dTUFO9DaZ+tFi1alPpW+GX9+/eHhYUFNmzYgDVr1rx1DKWpUqUK5syZg7Vr16J27drYvHkz5syZg4iICHTr1g0LFy6Ejo4Ounfvjt9//x1ASU+CoKAgDBkyBMeOHcOiRYugrq6OJUuWlOucNWvWxOrVq9G4cWMEBQXB398f//zzD2bMmAE/Pz/k5+eXuorNm0yZMgUAMG/ePIW5UIiIiIiIPlRqwrvMekdE9AnIz8/Hzp07oaamJteL6FMiG/Yzbdo0JCYkqDga1TEzN1dYPaiyyc3NxT///IPmzZtDR0dH1eGQCrEtkAzbAsmwLVBF+aiHVBAR/RdaWloYOHCgqsMgIiIiIvokfdRDKoiIiIiIiIjow8SEAxEREREREREpHRMORERERERERKR0TDgQERERERERkdIx4UBERERERERESseEAxEREREREREpHRMORERERERERKR0TDgQERERERERkdIx4UBERERERERESseEAxEREREREREpHRMORERERERERKR0VVQdABERVbz27dujcePGFX6ep0+fIj09vcLP87aaNGmi6hCIiIiIKh0mHIiIKoHvv//+vZxHKCqGmsaH2XmuqKgIGhoaqg6DiIiIqNJgwoGIqBKIWbgET++kVug5atQ3Retpkyv0HP8Fkw1ERERE7xcTDkRElcDTO6l4fOOmqsMgIiIiokrkw+z3SkREREREREQfNSYciIiIiIiIiEjpmHAgIiIiIiIiIqVjwoGIiIiIiIiIlI4JByIiIiIiIiJSOiYciIiIiIiIiEjpmHAgIiIiIiIiIqVjwoGIiIiIiIiIlI4JByIiIiIiIiJSOiYciIiIiIiIiEjpmHAgIiKlS09Px6xZs+Di4gJzc3M4OjpiypQpSE1Nlau3Y8cOuLi4oFWrVhg9ejQePXqkcKzk5GQ0b94cR48efV/hExEREZESMOFARERKlZ6ejm+++Qbbt29H48aN4e3tDQsLC+zbtw/u7u5ISUkBACQkJGDmzJmoU6cOvv76a8TGxsLX1xeCIMgdb/ny5bCwsICrq6sKroaIiIiI3lUVVQdARESflsDAQDx48ADTpk3DkCFDxPLdu3fDz88PCxcuxB9//IGQkBDo6+tj48aN0NbWhpWVFSZPnozExESYm5sDABITE3H48GGsXbtWVZdDRERERO+IPRyIiEipIiMjYWRkhEGDBsmV9+rVC/Xr10d0dDSKi4tx9+5dNGjQANra2gCAZs2aAQDu3r0r7rNs2TLY2dnB0dHx/V0AERERESkFezgQEZHSFBUVYeTIkahSpQrU1RVz2lpaWigoKEBhYSFq1KiBBw8eiNtycnIAANWrVwcAxMbG4sSJEwgODn4/wRMRERGRUjHhQERESqOhoaHQs0EmOTkZN2/eRP369aGlpYWWLVti3759iIiIQJs2bbBhwwZUq1YNzZs3B1DSu8HJyQm2trbv8xKIiIiISEmYcCAiogpXXFyMn3/+GcXFxfDw8AAAeHh4ICwsDL6+vgAAdXV1/PjjjzAyMsLJkydx9uxZ7Nq1S+4YpfWaICIiIqIPExMORERUoQRBwKxZs3D69GmYm5uLPSCqVq2Kbdu2ITIyEmlpabCzs4OZmRmAkt4Nbm5usLCwQHx8PH744QfcuHEDDRs2xOzZs+Hg4KDKSyIiIiKicmDCgYiIKkxhYSFmzpyJ0NBQmJqa4vfff4eWlpa4XUtLC926dZPbJzIyEpcvX8bu3btRUFCAcePG4YsvvkBQUBB27doFX19fREZGQl9f/31fDhERERG9BfZNJSKiCpGXl4cxY8YgNDQUDRo0wMaNG1GnTp3X7iMIApYvX45u3bpBKpXixIkTePjwIX788Uc4Oztj7ty5yMnJwd69e9/TVRARERHRu2IPByIiUronT55g+PDhiIuLQ4sWLbBmzRrUrFnzjfvt378fycnJCAgIAADcvn0bANCgQQMAgL6+PgwNDXHnzp0Ki52IiIiIlIM9HIiISKlevHiBkSNHIi4uDvb29ti0aVO5kg2FhYUIDAxEr1690LBhQ7Hs5f/Kjq+mplYxwRMRERGR0jDhQERESrVkyRJcvHgR1tbWCAoKgp6eXrn2CwsLw7179zB27FixrFGjRgCAuLg4ACVLaz579kzs8UBEREREHy4OqSAiIqVJT09HcHAwgJJkQVBQUKn1RowYgapVq4rf5+fn4/fff4e7uztMTEzEcicnJ9StWxd+fn746quvcPToURgYGKBHjx4VeyFERERE9J8x4UBEREoTFxeHgoICAEBISEiZ9QYNGiSXcNi+fTsyMjIwevRouXra2tpYvXo1Zs2aheDgYDRp0gSLFi0qd68JIiIiIlIdJhyIiEhp3NzckJSU9Nb7eXt7w9vbu9RtUqkU27dv/6+hEREREdF7xjkciIiIiIiIiEjpmHAgIiIiIiIiIqVjwoGIiIiIiIiIlI4JByIiIiIiIiJSOiYciIiIiIiIiEjpmHAgIiIiIiIiIqVjwoGIiIiIiIiIlI4JByIiIiIiIiJSOiYciIiIiIiIiEjpmHAgIiIiIiIiIqVjwoGIiIiIiIiIlI4JByIiIiIiIiJSOiYciIiIiIiIiEjpmHAgIiIiIiIiIqVjwoGIiIiIiIiIlK6KqgMgIqKKV6O+6SdxDiIiIiL6eDDhQERUCbSeNvm9nEcoKoaaBjvPERERERGHVBARffLy8/ORl5f3Xs7FZAMRERERyfAvQyKiSkAQBFWHQERERESVDBMORERERERERKR0TDgQERERERERkdIx4UBERERERERESseEAxEREREREREpHRMORERERERERKR0TDgQERERERERkdIx4UBERERERERESseEAxEREREREREpHRMORERERERERKR0TDgQERERERERkdIx4UBERERERERESseEAxEREREREREpnZogCIKqgyAioopx4cIFCIIATU1NqKmpqTocUiFBEFBQUMC2QGwLJGJbIBm2hY+TlpYWpFKpqsN4rSqqDoCIiCqO7I8G/vFAampq0NLSUnUY9AFgWyAZtgWSYVugisIeDkRERERERESkdJzDgYiIiIiIiIiUjgkHIiIiIiIiIlI6JhyIiIiIiIiISOmYcCAiIiIiIiIipWPCgYiIiIiIiIiUjgkHIiIiIiIiIlI6JhyIiIiIiIiISOmYcCAiIiIiIiIipWPCgYiIiIiIiIiUjgkHIiIiIiIiIlI6JhyIiIiIiIiISOmYcCAiIiIiIiIipWPCgYjoE3X//n1MmjQJrVu3RqtWrTB27FikpqaqOix6T1avXg1HR8dStz1//hz+/v7o0KEDrKys4OnpidOnT7/nCKmixcfHY/jw4bC1tYWFhQV69+6N8PBwuTpsC5VDUlISRowYAQcHB9jZ2WH8+PG4ffu2XB22hcrl3r17sLGxwbRp0+TK2Q5I2ZhwICL6BGVlZcHHxwenT5/GoEGDMGbMGFy6dAkDBw5EZmamqsOjCnb8+HEEBASUuf27777DunXr0LFjR0ydOhUFBQX49ttvcf78+fcYJVWk5ORkeHt7IykpCd9++y38/PxQrVo1TJ06FevXrxfrsS18+m7duoX+/fvj+vXrGDlyJEaMGIELFy7Aw8MDDx48EOuxLVQegiDghx9+QE5OjsI2tgNSOoGIiD45S5cuFaRSqXD58mWxLCkpSWjevLmwcOFCFUZGFam4uFjYtGmTYGZmJkgkEqFt27YKdU6dOiVIJBJh/fr1YllOTo7QsWNHoU+fPu8xWqpIw4cPF1q2bCk8fPhQLCsqKhI8PT2Fli1bCtnZ2WwLlcT48eMFS0tLITU1VSy7evWqIJFIhHnz5gmCwN8Llc3L/05MnTpVLGc7oIrAHg5ERJ+gffv2oWXLljA3NxfLJBIJWrdujX379qkwMqpInp6e+Pnnn+Hg4AAzM7NS6+zduxeamprw8PAQy3R0dODu7o7ExESkpKS8p2ipohQVFeHcuXNwdnZGnTp1xHJ1dXV8+eWXyM3NxT///MO2UElUqVIFX331FUxMTMQyqVQKAwMDXL16FQB/L1Qmd+7cwa+//gpfX1+FbWwHVBGYcCAi+sQ8efIEqampcskGGTMzM6SlpSEtLU0FkVFFu3//PubOnYs1a9ZAV1e31DoJCQlo2LAhdHR05MplCYqEhIQKj5Mqlrq6Ovbs2QM/Pz+FbbIhVRoaGmwLlcSvv/6K+fPny5U9ePAAWVlZqFevHgD+XqgsiouLMW3aNEilUgwaNEhhO9sBVYQqqg6AiIiU69GjRwAg92ZTpnbt2gBK/tiU/T99Oo4ePQotLa3X1nn06BEsLS0VymXt4f79+xUSG70/ampqMDU1VSjPzc1FSEgIdHR00KJFC7aFSigjIwMJCQnw9/eHjo4Ohg4dCoC/FyqLP//8EwkJCQgPD4e6uuJ7Z7YDqghMOBARfWJkk0BVq1ZNYZu2tjaAkgcP+vS8KdkAlLSP17WNvLw8pcdFqicIAmbMmIH09HSMHTsWVatWZVuohL7++mtxosgpU6ZAIpEA4O+FyuDmzZtYtmwZJkyYgEaNGuHFixcKddgOqCIw4UBE9IkRBAFAyVvOsrxuG1VubBufHkEQMHv2bOzfvx/29vYYPXp0ufZjW/j0TJo0CVpaWjh48CD8/f1x9+5dzJkz5437sS183IqKijB9+nQ0b94cQ4YMeefjsB3Qu2DCgYjoEyMbe1nam4jnz58DAPT09N5rTPTh0NHREdvBy9g2Pk0FBQWYNm0a9u3bB0tLS6xcuRKampoA2BYqo169egEAvvzyS0ycOBHbtm2Dl5cX28Inbt26dUhISMDGjRuRlZUFoOR3AwDk5+cjMzMTenp6bAdUIZhwICL6xBgbGwMA0tPTFbbJJossbX4Hqhzq1avHtlFJ5OXlYdy4cYiKioK9vT1Wrlwp98DAtlC5ffXVVzh48CCuXLnCtvCJO3HiBAoLCzFgwACFbfv378f+/fuxYMECtgOqEEw4EBF9YqpXr4769esjMTFRYVtiYiLq1q2LWrVqqSAy+hCYmZlhz549eP78uTguF4DYXiwsLFQVGilRQUEBfH19ER0djQ4dOmD58uWoWrWqXB22hU/fkydP4OHhAWdnZ8yYMUNum2y+H21tbbaFT9zUqVPx9OlTubKCggKMGDECTk5OGDZsGJo0aYLY2Fi2A1I6LotJRPQJ6tq1K2JjY+WSDteuXUNMTAy6d++uwshI1bp27Yr8/Hxs27ZNLMvNzcWuXbtgaWmJ+vXrqzA6UpaAgABER0fD1dUVgYGBCskGgG2hMtDX14empib27t0r9+Y6Pz8fGzduhI6ODhwcHNgWPnHm5uZo27at3Ffr1q0BALVq1ULbtm1Ru3ZttgOqEOzhQET0CRo2bBjCw8MxbNgwDBs2DOrq6li/fj3q1KmDYcOGqTo8UiFnZ2c4Oztj8eLFePDgARo2bIgdO3bg4cOHWLhwoarDIyVIS0vD+vXrUaVKFTg5OeHAgQMKddq0acO2UEnMmTMHPj4+6N+/P/r37w91dXWEhobi+vXrmDdvHgwMDNgWCAD/faCKoSbIpjMnIqJPSmpqKhYsWIDTp09DS0sL9vb28PPzg6mpqapDo/fA29sbN2/exMmTJxW25eTkYOnSpThw4ADy8vIglUoxadIkODg4qCBSUrZDhw5hwoQJr60TFBSEdu3asS1UEufOnUNgYCDi4+MBlLzxHjlyJJydncU6bAuVy4sXL2BpaYk+ffrIJRPYDkjZmHAgIiIiIiIiIqXjHA5EREREREREpHRMOBARERERERGR0jHhQERERERERERKx4QDERERERERESkdEw5EREREREREpHRMOBARERERERGR0jHhQERERERERERKx4QDERERERERESkdEw5ERESkcoGBgZBKpQpfZmZmcHBwgLe3N3bv3q3qMCvMs2fPsHnzZlWH8dby8/PRvXt3rFu3TixLTk5G//79YWlpiR49euDIkSOl7tuvXz+MGzeu1G2nTp2Cra0t0tLSKiRuAHB1dYVUKn1tHVm7DA0NFcumTZsGqVSKf/75553OK5VK0atXr3faV5lk1xYZGamU44WGhpb6Gba2tka3bt2wdOlS5OTk/Ofz7Nu3D6mpqXJlaWlpCAkJ+c/HJiLlq6LqAIiIiIhkOnbsiObNm4vfFxYWIjMzEwcPHoSfnx9u3ryJSZMmqTDCitGlSxfUqlULXl5eqg7lrfzxxx/Iy8sT4xYEAZMmTcLDhw/Rr18/XLx4EePGjUNoaCiaNWsm7vf3338jLi4Oe/bsKfW4bdu2hbW1NebOnYvffvvtvVxLebm5ucHY2BifffaZqkP5T+zt7eHr64uGDRsq/bj29vYAgOLiYuTk5CAhIQF//PEHTp8+jc2bN0NLS+udjr148WKsWbMG4eHhYllGRga6du2K1q1b4+uvv1bGJRCREjHhQERERB8MNzc39O3bV6F82LBh6NOnD4KCguDh4QFjY2MVRFdxMjIyUKtWLVWH8VZSUlKwevVqzJkzR3yAvHz5MpKSkrB06VJ069YNeXl5cHFxwc6dOzFz5kwAJUmJ5cuXo1u3bmjatGmZx580aRL69OmDv//+Gx06dHgv11Qebm5ucHNzU3UY/5mDgwMcHByUflx7e/tSe67MnTsXwcHB2LNnD9zd3d/p2BkZGQpleXl5Suk5QUQVg0MqiIiI6IPXoEEDdOzYEUVFRYiOjlZ1OARg3bp10NXVRY8ePcSyu3fvAoA4VKFatWpo0KCBWA4ABw8exPXr1zF+/PjXHr9FixawsbHBqlWrKiB6et9kvQ/OnTun4kiI6H1iwoGIiIg+CnXq1AEAZGVlyZUfPHgQ/fr1g7W1NWxsbDBo0CDExMTI1Tlz5gykUim2bNmCyZMnw9LSEk5OToiNjQVQ8pY0ICAAXbt2hZWVFVxdXTFnzhxkZmbKHSc/Px+rVq1Ct27dYGFhgTZt2uC7775TGFMuG89++vRprF27Fp07d4a5uTnc3NywcuVKFBUVycUFAFevXoVUKkVgYKB4nNjYWPj6+sLJyQnm5uaws7PDkCFDFK4PAO7cuYPJkyeLwxGGDx+O5ORkdOrUCd7e3u90HWV5/Pgxdu/eja5du8p1j9fX1wcA5ObmimXZ2dnQ09MDABQVFSEwMBC9e/fGF1988cbz9OjRAxcvXsTFixfLFdf7UNYcDtu2bUOPHj1gZWWFjh07IigoCOHh4ZBKpThz5ozCcS5duoTBgwejZcuWsLe3x/jx4+USMzLp6emYPXs22rVrB3Nzc7i6umLx4sXIzs6Wq+ft7Q1XV1ccP34crq6usLKywoQJE8q8jtLmcLh9+zYmTJiADh06iOeaPXs20tPT3/Y2KdDQ0ACAUodTJCYmYsyYMXBwcIClpSV69eqFrVu3QhAEsY6rqyvCwsIAAL1794arqytCQ0PRsWNHAMCRI0cU5tu4ffs2pkyZgrZt28Lc3BxffvklVq1ahYKCArnzu7q6wtvbGyEhIeLnZ+HChf/5momICQciIiL6SNy5cwfA/yUeAGD58uWYOHEi0tLS0KdPH/Tp0wc3btzAkCFDSp1kcsWKFbh8+TK8vLzQokULmJmZIS8vD/3798eKFSugp6eHfv36icmJQYMGiQ92BQUFGD58OJYsWQJdXV14eXnB2dkZhw8fhru7O65du6ZwvsWLF+O3335Dq1atMHDgQDx//hzLli1DQEAAAMDY2Bi+vr4AgM8++wy+vr7i+PfIyEh4e3vj0qVLcHNzw6BBg2BtbY3Tp09j2LBhcg+8t2/fhqenJw4ePIhWrVqhX79+SE1NxYABAxQSNO9yHa+KjIzE8+fP4ezsLFferFkzVK1aFRs2bEB2djYiIiKQnJwMGxsbAMDu3buRmpqKsWPHvvEcAMTj79+/v1z1VWX+/Pn46aef8Pz5c3zzzTdo2bIlli1bVub8E/fu3ROTQAMHDkSzZs3w119/wcvLSy5Zc//+fbi7u2Pbtm0wMzPD4MGD0bBhQ6xZswbe3t5ydYGSRNDEiRNhY2ODPn36wNbWttzXkJmZicGDB+P48eOwt7fHkCFD0KRJE2zduhU+Pj4KD+lvS5YI6Ny5s1z58ePH0a9fP8TExKBDhw7w8vJCcXExZs+ejVmzZon1fHx8xHlAPD094ePjg+bNm8PHxwcA0LBhQ/j6+opzwCQmJuLrr7/GoUOH0Lp1awwePBj6+vpYsmQJRo8eLSb9ZK5fv465c+fCzc0NXbt2RcuWLf/T9RLR/ycQERERqVhAQIAgkUiEkJCQUrfHx8cLLVq0ECwtLYWMjAxBEAQhLi5OkEqlgpeXl5CbmyvWzczMFDp16iRYWVmJdWNiYgSJRCJYWVkJaWlpcsdetmyZIJFIhF9++UUoLi4Wy//44w9BIpEI69atEwRBEIKCggSJRCIsWrRIITYzMzPh66+/FstCQkIEiUQitGrVSkhJSRHLU1NTBTMzM6Ft27Zyx5BIJELPnj3lyrp06SLY29sL6enpcuWrV68WJBKJ8Ouvv4plI0eOFCQSiXDw4EGx7MWLF0L//v0FiUQieHl5ieVvcx1lmTJliiCRSIQHDx4obAsKChKkUqkgkUgEiUQi9OvXT8jPzxfy8/OFDh06CHPmzBHrvny/y2Jvby907979jfXeVocOHQSJRCIEBASU+eXl5aXQLqdOnSpIJBLhypUrgiCU3DepVCp4eHgI2dnZYr2///5bvAcxMTFiuaxs5cqVcvGMHj1akEgkwuHDh8Wy4cOHC1KpVPj777/l6v7555+CRCIR/ve//4llslgXLFhQruuXfeYiIiIEQRCETZs2CRKJRNi1a5dcvTlz5ggSiUQhhlfJ2ryXl5fcPVy4cKEwcOBAQSqVCkuWLJHbJzc3V2jdurXQpk0bITU1VSwvKioSxo0bJ0gkEuHYsWNi+av3XhBKPlMSiUQYPXq0WFZcXCx0795dsLCwEC5fvix3zvnz5wsSiUTYvHmzWCZrCxs3bnzDXSOit8VJI4mIiOiDERkZiXv37onfFxYW4tatWzh27BgKCwvxww8/wMjICACwa9cuCIIAPz8/VKtWTdzH0NAQw4cPx4wZM3Dw4EEMHDhQ3GZjY6MwOeP+/fuhp6eH7777DmpqamK5l5cXnj59iiZNmojnq1GjhsIqGRYWFujatSv27t2L69evy02E2LlzZ7mhAyYmJmjcuDGuXr2KFy9eoGrVqqXeh+LiYnz33XfQ0tJSWA1BNtGfbAK9zMxMHD9+HLa2tujatatYT0tLC1OmTEH//v3l9n+X63jVlStXoKuri7p16yps+/bbb2FnZ4cLFy6gXr166NixI6pUqYLg4GBkZGRg1KhRyMvLw4wZM/DXX3+hatWq8PLywsSJE+Xuv0zjxo1x4cIF5Ofnv/PqBq/zX1fB2L17NwRBwMSJE6GrqyuWt2/fHo6Ojjh58qTCPtra2hg6dKhcWYcOHXDkyBFxWEtaWhpOnDgBFxcXtG/fXq6ul5cX1q1bh7CwMPj5+clte7UHQXkVFxcDKOkZ0Lt3b3EIxKRJkzB69OhyT2p69uxZnD17VqFcW1sbBQUFyMvLEz+vR48eRWZmJvz8/GBiYiLWVVdXx3fffYe//voLISEhcHFxeatriYuLw7Vr1zBw4ECYm5vLbZswYQKCg4MRGhoq97sBePd7R0RlY8KBiIiIPhhHjhzBkSNHxO81NTVhYGAAR0dHDBw4EE5OTuK2xMREAMDhw4dx7NgxueM8fPgQABTG2b/8UAOUzN1w+/Zt2NnZKTz86+rq4vvvvwcA5OTk4NatW6hVqxZWrlypEPe///4rnu/lB/UGDRoo1K1evTqAknkUyko4qKuro1OnTgBKut9fv34dd+7cwY0bN8T5AF5+QCwuLoalpaXCcaysrFClyv/9ufeu1/GqjIwMGBoalrndysoKVlZW4vfPnz/HypUrMWDAANSuXRuLFy/G8ePHsWDBAjx9+hTz58+HqalpqasXGBoaQhAEPH78WG44jbIkJSWVuS0wMPCNCYnLly8DQKn338bGptSEw+eff66QPJHdT9kwiStXrkAQBGRlZcnN6yGjqamJBw8e4NGjR3L35dU2Xl5dunTBihUrEBwcjAMHDsDJyQnt2rWDi4vLW62g4uvrK7dKRV5eHu7evYt169Zh7dq1SEhIwMaNGwEACQkJAEracGnXqKGhgatXr771tch+N9y5c6fU4+rq6iIpKQmCIIhJLk1NzQppX0SVHRMORERE9MFYsGBBqctilubZs2cAgNWrV5dZ58mTJ3Lfv/qA//TpUwAQJzUsi2weh/T09Nc+gL56vtLeyMsecISXJsQrTVJSEubNmye+LdbU1ETjxo1hbm6OlJQUcf/Hjx8DgEJPCKDkgU3WI+S/XMersrOzSz1fWYKDg5Gbm4vhw4cDKOll4e7uLq5wcfr0aWzZsqXUhIPsbfjTp0/LfCAs7aHSzc1NHM9fkR4/fgwdHR253g0ytWvXLnWfshJNwP+1C1nbvHTpEi5dulRm/aysLLn7oq2tXZ6wFdSpUwe7du3CypUrceTIEezduxd79+6FpqYm+vbtixkzZrxTD5Nq1aqhadOmmD9/Pq5fv44zZ84gOjoaTk5O4mf4dXN0vKktlkZ276KiohAVFVVmvZycHPGz/673jYhejwkHIiIi+ijp6OhAQ0MDcXFx0NTUfOdjACUPHqXJzc2Fjo6OWM/W1hbBwcHvFvBbyM7OxtChQ/Hs2TNMnToVbdu2RaNGjaClpYW4uDjs27dPrCt7YHp11QKZl69NWdehr68vPiy+SXZ2NoKCguDj4wMjIyM8fvwYWVlZckNNGjRoUOpKDsD/JZZe90BYWvLE2Nj4vSQc9PT0cPfuXRQUFCi0w7J+JuUh+1mNGTPmtatNKJOpqSnmz5+PoqIiJCQkICoqCqGhodi+fTuqV68u9vh5F2pqarC3t8fly5dx9epVODk5ide4YcMGtGnTRlmXIR73l19+KTWJRUTvD1epICIioo+SVCpFUVGRwrAJoOStsL+/P86fP//aY1SvXh2ff/45/vnnH+Tn58tty8/Ph6OjI4YOHYrq1aujXr16uHHjBp4/f65wnPDwcAQGBpa6rOG7iImJwb///ouBAwdi6NChaNasmfh2OTk5GcD/vQk3MzODmpoa4uPjFY5z48YNuYSDsq6jVq1aCqtflGXDhg0oLi7GsGHDAEBcHeDlVQJevHhR6vwNQEkPAnV19dd2d09KSlL4Km9Pmf/KzMwMRUVFYjf+l8XFxb3zcWXLpcqGHbwqICAAq1evVmi37+rIkSOYPXs2srOzoaGhASsrK/j6+oqJKdkSsv+FrOeBbFjR664xKysLv/zyi9xqM6W1kdLKXnfcgoICLFy4EJs2bXqHKyCit8WEAxEREX2U+vTpA6BkScKX3yRnZ2dj9uzZCAoKUlj6rjQ9e/bEs2fPsGLFCrnyjRs3Ijc3V3zz2qdPH2RlZcHf31+cPwEoeaifO3cu1q9fDwMDg3e6Fk1NTbllB2Vd7mUTQ8rcv39ffJtfWFgIoKQrvKOjI06dOoXjx4+LdfPz87F48WKFcynjOpo2bYrnz5+LExyWJSsrCxs2bBCTNgBQs2ZN6Ovryw0TiIuLK3W+i+LiYiQnJ6Nhw4YVMmGkMsgSG0uXLkVeXp5YHhMTg8jIyHc+rqmpKezs7HDixAkcOnRIblt4eDhWrFiBqKgopd2XmzdvYuvWrdi6datcuWwS13r16v2n46empuLQoUPQ1NREu3btAACdOnWCnp4e1qxZg1u3bsnVX7x4MTZu3CguhwtAnI/k5c9KaWV2dnYwMTHBrl27cPHiRbnjrl69GuvXry81QUREyschFURERPRRat26Nby9vbFp0yZ89dVXcHFxgZaWFiIjI/HgwQP069dPXNHhdUaOHIljx47hjz/+wLlz52BlZYWbN2/i2LFjsLS0xKBBgwAAI0aMQHR0NDZt2oTY2FjY29vj6dOnOHToEPLy8uDv7//GuSDKUrt2bdy8eRM//fQTXFxc0Lp1axgbG2P37t14/PgxmjVrhgcPHuDIkSOoWrUq1NTU5HoY/Pjjj/D09MTo0aPh5uaGOnXq4OTJk8jMzARQMgmljDKuo3379tizZw9iY2NhampaZr01a9ZAU1MTPj4+Ypmamhr69u2LDRs2QENDA0+ePEFcXBz8/f0V9r927Rqys7PF5NKHyNraGv369cO2bdvQu3dvODs7IyMjA4cPH0b16tXx+PFjccWHtzV37lwMHDgQEyZMQLt27dC0aVNx1RYDAwP89NNPSrsODw8P7NixA/7+/jh79iykUikyMjJw6NAh6OjoYMSIEeU6ztmzZ+Xm1CguLkZqaioiIyORl5eHiRMn4vPPPwcA1KhRA/PmzcOUKVPQp08fuLm5oXbt2jh37hzi4+NhYWEht5qHrJfLwoUL0bZtW/j6+sLQ0BBaWlo4c+YMFixYgE6dOsHW1hb/+9//MHz4cHh5eaFjx44wNTVFQkICYmJiYGJigsmTJyvt3hFR2ZhwICIioo/WjBkzYGFhga1bt2LPnj3Q0NBAw4YNMW7cuHI/pOrq6mLLli34/fffcejQIcTFxcHQ0FBcqlH2BllbWxsbN27EmjVrcODAAWzZsgXVq1eHjY0NRo4cCXt7+3e+jlmzZmHevHkICQlBYWEhXF1dsX79evj7+yM2Nhbnz5/H559/jp49e2Ls2LEYMWIEzp8/j5ycHOjq6qJRo0bYunUrfv31V5w6dQqFhYVo3bo1li5dip49e8otG6qM63B2doaWlhaio6PRu3fvUuv8+++/2Lx5MyZMmCCOqZeZPHky8vLysGfPHmhra2PixIniBJIvi46OBoBSt31IZs2ahfr162PHjh3Ytm0b6tSpg++//x7p6elYs2bNO09I2KhRI4SGhuL333/H8ePHcfr0adSuXRu9evXC2LFjX5vseVv6+vrYvHkzVq5ciZMnTyImJgZ6enpo164dfH19X7tqycteXRazSpUqMDAwgJ2dHTw9PeHm5iZX/8svv0TdunWxatUqREVFIS8vD8bGxhgzZgyGDRsmNxnngAEDcOHCBZw/fx7JyckYMmQIdHV1MWvWLAQEBIht2dbWFra2tti5cydWrlyJ06dP4++//0bdunXh7e2NUaNGvdWkp0T07tSEN02RTEREREQfLNkb5Hr16ilMWpiamgo3Nzf0798fs2fPVup5Z82ahd27d+PkyZPv3LPjTbp16wYDAwNs2bKlQo6vDOnp6eLyra+aOnUqwsPDcerUKdSsWfP9B0dEpGKcw4GIiIjoI6ampobevXujR48eChMIrl27FgDKNbTkbY0YMQKFhYVyk/opU2xsLJKTkzFq1KgKOb6y7NmzBw4ODggLC5Mrv3PnDiIiItCkSRMmG4io0mIPByIiIqKP3P/+9z+sW7cODRs2RLt27aChoYELFy7g0qVLcHJywpo1a8pcBeK/8Pf3x969e3H48GFxoktlGTRoEKpWrYrVq1cr9bjK9vDhQ/To0QN5eXno2LEj6tevj3///ReHDx9Gfn4+goKC0Lp1a1WHSUSkEkw4EBEREX3kiouLERISgp07d+LWrVsoLCyEiYkJevTogSFDhigMtVCW/Px89O7dG3369MHw4cOVdtyoqChMmjQJ+/fvf+1ymB+K27dvY9WqVYiJiUF6ejpq1KiBVq1aYeTIkTAzM1N1eEREKsOEAxEREREREREpHedwICIiIiIiIiKlY8KBiIiIiIiIiJSOCQciIiIiIiIiUjomHIiIiIiIiIhI6ZhwICIiIiIiIiKlY8KBiIiIiIiIiJSOCQciIiIiIiIiUjomHIiIiIiIiIhI6ZhwICIiIiIiIiKl+3+z6gavCAHHswAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def plot_correctness(df, answer_columns, labels):\n", + " error_categories = [\"Expected and Right\", \"Expected but IDK\", \"Expected but Wrong\"]\n", + " annotations = [\"Correct\", \"I don't know\", \"Wrong\"]\n", + " cat_map = dict(zip(error_categories, annotations)) \n", + " # Prepare data for Seaborn\n", + " data = []\n", + " for i, col in enumerate(answer_columns):\n", + " results = evaluate_model(df, col)\n", + " for category in error_categories:\n", + " matrix_error = float(results.loc[category].replace(\"%\", \"\"))\n", + " data.append([labels[i], cat_map[category], matrix_error])\n", "\n", - "# Initialize the evaluator\n", - "evaluator = ConfusionMatrixEvaluator(\n", - " df, answers_column=\"ft_generated_answer_few_shot\"\n", - ")\n", - "evaluator.evaluate_answers()\n", - "error_categories = evaluator.generate_matrices(use_percentages=True)\n", - "error_categories" + " df_plot = pd.DataFrame(data, columns=[\"Model\", \"Error Category\", \"Percentage\"])\n", + " \n", + " # Create the plot\n", + " g = sns.catplot(x=\"Percentage\", y=\"Model\", hue=\"Error Category\", data=df_plot, kind=\"bar\", height=6, aspect=1.5, palette=\"icefire\")\n", + " \n", + " ax = g.facet_axis(0, 0)\n", + " \n", + " # Add annotations\n", + " for i, p in enumerate(ax.patches):\n", + " ax.annotate(f\"{p.get_width():.0f}%\", (p.get_width(), p.get_y() + p.get_height() / 2),\n", + " ha=\"left\", va=\"center\")\n", + " plt.tight_layout()\n", + " plt.xlabel(\"Percentage (%) - Higher is Better\")\n", + " plt.title(\"When Expecting an Answer\")\n", + " plt.show()\n", + "\n", + "plot_correctness(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], labels=[\"Base Model\", \"Fine-Tuned Model\", \"Few Shot Fine-Tuned Model with Qdrant\"])" ] } ], From 5260c4f026dff889032cbb7cd05a2f3f33f8546e Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 19:07:23 +0530 Subject: [PATCH 20/38] * chore(ModelFinetune.ipynb): add introduction and table of contents to the blog post * feat(ModelFinetune.ipynb): add section on why to read the blog post --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 78f2c91a28..61c643ede6 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -9,6 +9,12 @@ "\n", "The aim of this blog is to walk through a comprehensive example of how to fine-tune OpenAI models for Retrieval Augmented Generation (RAG). We will also be integrating Qdrant and Few-Shot Learning to boost the model's performance and reduce hallucinations. This could serve as a practical guide for ML practitioners, data scientists, and researchers interested in leveraging the power of OpenAI models for specific use-cases. 🤩\n", "\n", + "## Why should you read this blog?\n", + "\n", + "- You want to learn how to fine-tune OpenAI models for specific use-cases\n", + "- You want to learn how to use Qdrant to improve the performance of your RAG model\n", + "- You want to learn how to use fine-tune to improve the correctness of your RAG model\n", + "\n", "To begin, we've selected a dataset where we've a guarantee that the retrieval is perfect. We've selected a subset of the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset, which is a collection of questions and answers about Wikipedia articles. We've also included samples where the answer is not present in the context, to demonstrate how RAG handles this case.\n", "\n", "## Table of Contents\n", From 2a61773d6f3abd8e9356112a1fe86962701049ea Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 19:08:14 +0530 Subject: [PATCH 21/38] * chore(ModelFinetune.ipynb): update bullet points in the introduction section * feat(ModelFinetune.ipynb): add information about reducing hallucinations in the introduction section --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 61c643ede6..faf5883d32 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -11,9 +11,10 @@ "\n", "## Why should you read this blog?\n", "\n", - "- You want to learn how to fine-tune OpenAI models for specific use-cases\n", - "- You want to learn how to use Qdrant to improve the performance of your RAG model\n", - "- You want to learn how to use fine-tune to improve the correctness of your RAG model\n", + "You want to learn how to \n", + "- Fine-tune OpenAI models for specific use-cases\n", + "- Use Qdrant to improve the performance of your RAG model\n", + "- Use fine-tuning to improve the correctness of your RAG model and reduce hallucinations\n", "\n", "To begin, we've selected a dataset where we've a guarantee that the retrieval is perfect. We've selected a subset of the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset, which is a collection of questions and answers about Wikipedia articles. We've also included samples where the answer is not present in the context, to demonstrate how RAG handles this case.\n", "\n", From 448a0595b84ced3bebc9a1568b625e748f9c1d60 Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 19:08:49 +0530 Subject: [PATCH 22/38] * docs(ModelFinetune.ipynb): update introduction to clarify the aim of the notebook and target audience --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index faf5883d32..c7f10010b8 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -7,7 +7,9 @@ "source": [ "# Fine-Tuning OpenAI Models for Retrieval Augmented Generation (RAG) with Qdrant and Few-Shot Learning\n", "\n", - "The aim of this blog is to walk through a comprehensive example of how to fine-tune OpenAI models for Retrieval Augmented Generation (RAG). We will also be integrating Qdrant and Few-Shot Learning to boost the model's performance and reduce hallucinations. This could serve as a practical guide for ML practitioners, data scientists, and researchers interested in leveraging the power of OpenAI models for specific use-cases. 🤩\n", + "The aim of this notebook is to walk through a comprehensive example of how to fine-tune OpenAI models for Retrieval Augmented Generation (RAG). \n", + "\n", + "We will also be integrating Qdrant and Few-Shot Learning to boost the model's performance and reduce hallucinations. This could serve as a practical guide for ML practitioners, data scientists, and AI Engineers interested in leveraging the power of OpenAI models for specific use-cases. 🤩\n", "\n", "## Why should you read this blog?\n", "\n", From 5c6ae43cd0ba1edd64734d43479a2066e00a0f7b Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 19:23:51 +0530 Subject: [PATCH 23/38] Replace the data verification with a link --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 180 +------------------- 1 file changed, 1 insertion(+), 179 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index c7f10010b8..f39ee5e06a 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -300,185 +300,7 @@ "source": [ "#### [Optional] Verify the Fine-Tuning Data\n", "\n", - "The script below will verify that the data is in the format that OpenAI expects." - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Num examples: 100\n", - "First example:\n", - "{'role': 'system', 'content': 'You are a helpful assistant.'}\n", - "{'role': 'user', 'content': \"Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\\n Question: What is a cirque?\\n\\n\\n Context: Glaciers form where the accumulation of snow and ice exceeds ablation. The area in which a glacier forms is called a cirque (corrie or cwm) - a typically armchair-shaped geological feature (such as a depression between mountains enclosed by arêtes) - which collects and compresses through gravity the snow which falls into it. This snow collects and is compacted by the weight of the snow falling above it forming névé. Further crushing of the individual snowflakes and squeezing the air from the snow turns it into 'glacial ice'. This glacial ice will fill the cirque until it 'overflows' through a geological weakness or vacancy, such as the gap between two mountains. When the mass of snow and ice is sufficiently thick, it begins to move due to a combination of surface slope, gravity and pressure. On steeper slopes, this can occur with as little as 15 m (50 ft) of snow-ice.\\n\\n\\n Answer:\\n\"}\n", - "{'role': 'assistant', 'content': 'The area in which a glacier forms'}\n", - "No errors found\n", - "Num examples missing system message: 0\n", - "Num examples missing user message: 0\n", - "\n", - "#### Distribution of num_messages_per_example:\n", - "min / max: 3, 3\n", - "mean / median: 3.0, 3.0\n", - "p5 / p95: 3.0, 3.0\n", - "\n", - "#### Distribution of num_total_tokens_per_example:\n", - "min / max: 114, 689\n", - "mean / median: 236.88, 217.0\n", - "p5 / p95: 166.7, 321.3\n", - "\n", - "#### Distribution of num_assistant_tokens_per_example:\n", - "min / max: 1, 13\n", - "mean / median: 3.81, 4.0\n", - "p5 / p95: 1.0, 5.0\n", - "\n", - "0 examples may be over the 4096 token limit, they will be truncated during fine-tuning\n", - "Dataset has ~23688 tokens that will be charged for during training\n", - "By default, you'll train for 3 epochs on this dataset\n", - "By default, you'll be charged for ~71064 tokens\n", - "See pricing page to estimate total costs\n" - ] - } - ], - "source": [ - "# Specify the data path and open the JSONL file\n", - "\n", - "data_path = \"local_cache/100_train.jsonl\"\n", - "\n", - "# Load dataset\n", - "with open(data_path) as f:\n", - " dataset = [json.loads(line) for line in f]\n", - "\n", - "# We can inspect the data quickly by checking the number of examples and the first item\n", - "\n", - "# Initial dataset stats\n", - "print(\"Num examples:\", len(dataset))\n", - "print(\"First example:\")\n", - "for message in dataset[0][\"messages\"]:\n", - " print(message)\n", - "\n", - "# Now that we have a sense of the data, we need to go through all the different examples and check to make sure the formatting is correct and matches the Chat completions message structure\n", - "\n", - "# Format error checks\n", - "format_errors = defaultdict(int)\n", - "\n", - "for ex in dataset:\n", - " if not isinstance(ex, dict):\n", - " format_errors[\"data_type\"] += 1\n", - " continue\n", - "\n", - " messages = ex.get(\"messages\", None)\n", - " if not messages:\n", - " format_errors[\"missing_messages_list\"] += 1\n", - " continue\n", - "\n", - " for message in messages:\n", - " if \"role\" not in message or \"content\" not in message:\n", - " format_errors[\"message_missing_key\"] += 1\n", - "\n", - " if any(k not in (\"role\", \"content\", \"name\") for k in message):\n", - " format_errors[\"message_unrecognized_key\"] += 1\n", - "\n", - " if message.get(\"role\", None) not in (\"system\", \"user\", \"assistant\"):\n", - " format_errors[\"unrecognized_role\"] += 1\n", - "\n", - " content = message.get(\"content\", None)\n", - " if not content or not isinstance(content, str):\n", - " format_errors[\"missing_content\"] += 1\n", - "\n", - " if not any(message.get(\"role\", None) == \"assistant\" for message in messages):\n", - " format_errors[\"example_missing_assistant_message\"] += 1\n", - "\n", - "if format_errors:\n", - " print(\"Found errors:\")\n", - " for k, v in format_errors.items():\n", - " print(f\"{k}: {v}\")\n", - "else:\n", - " print(\"No errors found\")\n", - "\n", - "# Beyond the structure of the message, we also need to ensure that the length does not exceed the 4096 token limit.\n", - "\n", - "# Token counting functions\n", - "encoding = tiktoken.get_encoding(\"cl100k_base\")\n", - "\n", - "# not exact!\n", - "# simplified from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb\n", - "def num_tokens_from_messages(messages, tokens_per_message=3, tokens_per_name=1):\n", - " num_tokens = 0\n", - " for message in messages:\n", - " num_tokens += tokens_per_message\n", - " for key, value in message.items():\n", - " num_tokens += len(encoding.encode(value))\n", - " if key == \"name\":\n", - " num_tokens += tokens_per_name\n", - " num_tokens += 3\n", - " return num_tokens\n", - "\n", - "def num_assistant_tokens_from_messages(messages):\n", - " num_tokens = 0\n", - " for message in messages:\n", - " if message[\"role\"] == \"assistant\":\n", - " num_tokens += len(encoding.encode(message[\"content\"]))\n", - " return num_tokens\n", - "\n", - "def print_distribution(values, name):\n", - " print(f\"\\n#### Distribution of {name}:\")\n", - " print(f\"min / max: {min(values)}, {max(values)}\")\n", - " print(f\"mean / median: {np.mean(values)}, {np.median(values)}\")\n", - " print(f\"p5 / p95: {np.quantile(values, 0.1)}, {np.quantile(values, 0.9)}\")\n", - "\n", - "# Last, we can look at the results of the different formatting operations before proceeding with creating a fine-tuning job:\n", - "\n", - "# Warnings and tokens counts\n", - "n_missing_system = 0\n", - "n_missing_user = 0\n", - "n_messages = []\n", - "convo_lens = []\n", - "assistant_message_lens = []\n", - "\n", - "for ex in dataset:\n", - " messages = ex[\"messages\"]\n", - " if not any(message[\"role\"] == \"system\" for message in messages):\n", - " n_missing_system += 1\n", - " if not any(message[\"role\"] == \"user\" for message in messages):\n", - " n_missing_user += 1\n", - " n_messages.append(len(messages))\n", - " convo_lens.append(num_tokens_from_messages(messages))\n", - " assistant_message_lens.append(num_assistant_tokens_from_messages(messages))\n", - "\n", - "print(\"Num examples missing system message:\", n_missing_system)\n", - "print(\"Num examples missing user message:\", n_missing_user)\n", - "print_distribution(n_messages, \"num_messages_per_example\")\n", - "print_distribution(convo_lens, \"num_total_tokens_per_example\")\n", - "print_distribution(assistant_message_lens, \"num_assistant_tokens_per_example\")\n", - "n_too_long = sum(l > 4096 for l in convo_lens)\n", - "print(f\"\\n{n_too_long} examples may be over the 4096 token limit, they will be truncated during fine-tuning\")\n", - "\n", - "# Pricing and default n_epochs estimate\n", - "MAX_TOKENS_PER_EXAMPLE = 4096\n", - "\n", - "MIN_TARGET_EXAMPLES = 100\n", - "MAX_TARGET_EXAMPLES = 25000\n", - "TARGET_EPOCHS = 3\n", - "MIN_EPOCHS = 1\n", - "MAX_EPOCHS = 25\n", - "\n", - "n_epochs = TARGET_EPOCHS\n", - "n_train_examples = len(dataset)\n", - "if n_train_examples * TARGET_EPOCHS < MIN_TARGET_EXAMPLES:\n", - " n_epochs = min(MAX_EPOCHS, MIN_TARGET_EXAMPLES // n_train_examples)\n", - "elif n_train_examples * TARGET_EPOCHS > MAX_TARGET_EXAMPLES:\n", - " n_epochs = max(MIN_EPOCHS, MAX_TARGET_EXAMPLES // n_train_examples)\n", - "\n", - "n_billing_tokens_in_dataset = sum(min(MAX_TOKENS_PER_EXAMPLE, length) for length in convo_lens)\n", - "print(f\"Dataset has ~{n_billing_tokens_in_dataset} tokens that will be charged for during training\")\n", - "print(f\"By default, you'll train for {n_epochs} epochs on this dataset\")\n", - "print(f\"By default, you'll be charged for ~{n_epochs * n_billing_tokens_in_dataset} tokens\")\n", - "print(\"See pricing page to estimate total costs\")" + "You can see this [cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/Chat_finetuning_data_prep.ipynb) for more details on how to prepare the data for fine-tuning." ] }, { From 4c74a97c631cfaf168857c20738a89046e1d362d Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 19:26:23 +0530 Subject: [PATCH 24/38] * feat(ModelFinetune.ipynb): refactor code to use OpenAIFineTuner class for fine-tuning the OpenAI model --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 285 ++++---------------- 1 file changed, 54 insertions(+), 231 deletions(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index f39ee5e06a..28b66badfd 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -307,199 +307,74 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Upload the Fine-Tuning Data to OpenAI" + "## Fine-Tune OpenAI Model" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ - "file_object = openai.File.create(\n", - " file=open(\"local_cache/100_train.jsonl\", \"r\"),\n", - " purpose=\"fine-tune\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "💡 Wait: For file to be uploaded and then processed by OpenAI." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - " JSON: {\n", - " \"object\": \"file\",\n", - " \"id\": \"file-X0QsHJXqT3DrG1OaXHKMnwXg\",\n", - " \"purpose\": \"fine-tune\",\n", - " \"filename\": \"file\",\n", - " \"bytes\": 120415,\n", - " \"created_at\": 1694085592,\n", - " \"status\": \"processed\",\n", - " \"status_details\": null\n", - "}" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "while file_object.status!='processed':\n", - " time.sleep(5)\n", - " file_object.refresh()\n", - "file_object" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Create a Fine Tuning Job" + "class OpenAIFineTuner:\n", + " def __init__(self, training_file_path, model_name, suffix):\n", + " self.training_file_path = training_file_path\n", + " self.model_name = model_name\n", + " self.suffix = suffix\n", + " self.file_object = None\n", + " self.fine_tuning_job = None\n", + " self.model_id = None\n", + "\n", + " def create_openai_file(self):\n", + " self.file_object = openai.File.create(\n", + " file=open(self.training_file_path, \"r\"),\n", + " purpose=\"fine-tune\",\n", + " )\n", + "\n", + " def wait_for_file_processing(self, sleep_time=20):\n", + " while self.file_object.status != 'processed':\n", + " time.sleep(sleep_time)\n", + " self.file_object.refresh()\n", + " print(\"File Status: \", self.file_object.status)\n", + "\n", + " def create_fine_tuning_job(self):\n", + " self.fine_tuning_job = openai.FineTuningJob.create(\n", + " training_file=self.file_object[\"id\"],\n", + " model=self.model_name,\n", + " suffix=self.suffix,\n", + " )\n", + "\n", + " def wait_for_fine_tuning(self, sleep_time=45):\n", + " while self.fine_tuning_job.status != 'succeeded':\n", + " time.sleep(sleep_time)\n", + " self.fine_tuning_job.refresh()\n", + " print(\"Job Status: \", self.fine_tuning_job.status)\n", + "\n", + " def retrieve_fine_tuned_model(self):\n", + " self.model_id = openai.FineTuningJob.retrieve(self.fine_tuning_job[\"id\"]).fine_tuned_model\n", + " return self.model_id\n", + "\n", + " def fine_tune_model(self):\n", + " self.create_openai_file()\n", + " self.wait_for_file_processing()\n", + " self.create_fine_tuning_job()\n", + " self.wait_for_fine_tuning()\n", + " return self.retrieve_fine_tuned_model()\n", + "\n", + "fine_tuner = OpenAIFineTuner(\n", + " training_file_path=\"local_cache/100_train.jsonl\",\n", + " model_name=\"gpt-3.5-turbo\",\n", + " suffix=\"100trn20230907\"\n", + " )" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 11, "metadata": {}, "outputs": [], "source": [ - "ft_job = openai.FineTuningJob.create(\n", - " training_file=file_object[\"id\"], model=\"gpt-3.5-turbo\", suffix=\"100train20230906\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "💡 Wait: For the fine-tuning job to complete and status to be \"succeeded\"." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: running\n", - "Status: succeeded\n" - ] - } - ], - "source": [ - "while ft_job.status!='succeeded':\n", - " time.sleep(15)\n", - " ft_job.refresh()\n", - " print(\"Status: \", ft_job.status)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'ft:gpt-3.5-turbo-0613:qdrant:100train20230906:7w7eYRbu'" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "model_id = openai.FineTuningJob.retrieve(ft_job[\"id\"]).fine_tuned_model\n", + "model_id = fine_tuner.fine_tune_model()\n", "model_id" ] }, @@ -1092,58 +967,6 @@ " f.write(dataframe_to_jsonl(train_sample))" ] }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [], - "source": [ - "class OpenAIFineTuner:\n", - " def __init__(self, training_file_path, model_name, suffix):\n", - " self.training_file_path = training_file_path\n", - " self.model_name = model_name\n", - " self.suffix = suffix\n", - " self.file_object = None\n", - " self.fine_tuning_job = None\n", - " self.model_id = None\n", - "\n", - " def create_openai_file(self):\n", - " self.file_object = openai.File.create(\n", - " file=open(self.training_file_path, \"r\"),\n", - " purpose=\"fine-tune\",\n", - " )\n", - "\n", - " def wait_for_file_processing(self, sleep_time=20):\n", - " while self.file_object.status != 'processed':\n", - " time.sleep(sleep_time)\n", - " self.file_object.refresh()\n", - " print(\"File Status: \", self.file_object.status)\n", - "\n", - " def create_fine_tuning_job(self):\n", - " self.fine_tuning_job = openai.FineTuningJob.create(\n", - " training_file=self.file_object[\"id\"],\n", - " model=self.model_name,\n", - " suffix=self.suffix,\n", - " )\n", - "\n", - " def wait_for_fine_tuning(self, sleep_time=45):\n", - " while self.fine_tuning_job.status != 'succeeded':\n", - " time.sleep(sleep_time)\n", - " self.fine_tuning_job.refresh()\n", - " print(\"Job Status: \", self.fine_tuning_job.status)\n", - "\n", - " def retrieve_fine_tuned_model(self):\n", - " self.model_id = openai.FineTuningJob.retrieve(self.fine_tuning_job[\"id\"]).fine_tuned_model\n", - " return self.model_id\n", - "\n", - " def fine_tune_model(self):\n", - " self.create_openai_file()\n", - " self.wait_for_file_processing()\n", - " self.create_fine_tuning_job()\n", - " self.wait_for_fine_tuning()\n", - " return self.retrieve_fine_tuned_model()" - ] - }, { "cell_type": "code", "execution_count": 65, From 4fa2b68a7ca7e1fb16090a34552914cb27ecc5fb Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 19:30:45 +0530 Subject: [PATCH 25/38] * chore(ModelFinetune.ipynb): update error category descriptions in evaluation section --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 28b66badfd..14ea4d8d97 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -452,7 +452,7 @@ "To evaluate the model's performance, compare the predicted answer to the actual answers -- if any of the actual answers are present in the predicted answer, then it's a match. We've also created error categories to help you understand where the model is struggling.\n", "\n", "1. Expected and Right: The model responsded the correct answer. It may have also included other answers that were not in the context.\n", - "2. Expected but \"IDK\": The model responded with \"I don't know\" (IDK) while the answer was present in the context. *This is a model error* and better than giving the wrong answer. We exclude this from the overall error rate.\n", + "2. Expected but \"IDK\": The model responded with \"I don't know\" (IDK) while the answer was present in the context. *This is a model error* and better than giving the wrong answer. It's better for the model say \"I don't know\" than giving a hallucinated or made up and wrong answer. In our design, we know that a true answer exists and hence we're able to measure it -- this is not always the case. We exclude this from the overall error rate. \n", "3. Expected but Wrong: The model responded with an incorrect answer. *This is a model ERROR.*\n", "4. Hallucination: The model responded with an answer, when \"I don't know\" was expected. **This is a model error.** \n", "5. Did not expect and IDK: The model responded with \"I don't know\" (IDK) and the answer was not present in the context. *This is a model WIN.*" From 3d1311450c6f07fefdd8f76126cfe69d728962d6 Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 19:31:45 +0530 Subject: [PATCH 26/38] * docs(ModelFinetune.ipynb): add link to OpenAI Cookbook guide for fine-tuning chat models --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index 14ea4d8d97..f5eb297639 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -307,7 +307,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Fine-Tune OpenAI Model" + "## Fine-Tune OpenAI Model\n", + "\n", + "If you're new to OpenAI Model Fine-Tuning, please refer to the [How to finetune Chat models](https://github.com/openai/openai-cookbook/blob/448a0595b84ced3bebc9a1568b625e748f9c1d60/examples/How_to_finetune_chat_models.ipynb)." ] }, { From b9d633e9390b6354b009a54ec5b1fb1649ee9c57 Mon Sep 17 00:00:00 2001 From: NirantK Date: Thu, 7 Sep 2023 19:32:21 +0530 Subject: [PATCH 27/38] * docs(ModelFinetune.ipynb): update link to OpenAI Fine-Tuning Docs and add more details in the comment --- examples/fine-tuned-RAG/ModelFinetune.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned-RAG/ModelFinetune.ipynb index f5eb297639..2bc25359af 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned-RAG/ModelFinetune.ipynb @@ -309,7 +309,7 @@ "source": [ "## Fine-Tune OpenAI Model\n", "\n", - "If you're new to OpenAI Model Fine-Tuning, please refer to the [How to finetune Chat models](https://github.com/openai/openai-cookbook/blob/448a0595b84ced3bebc9a1568b625e748f9c1d60/examples/How_to_finetune_chat_models.ipynb)." + "If you're new to OpenAI Model Fine-Tuning, please refer to the [How to finetune Chat models](https://github.com/openai/openai-cookbook/blob/448a0595b84ced3bebc9a1568b625e748f9c1d60/examples/How_to_finetune_chat_models.ipynb) notebook. You can also refer to the [OpenAI Fine-Tuning Docs](platform.openai.com/docs/guides/fine-tuning/use-a-fine-tuned-model) for more details." ] }, { From 1c9ea7eb72333de6802a514d794cbc8d7605f370 Mon Sep 17 00:00:00 2001 From: NirantK Date: Fri, 8 Sep 2023 13:11:52 +0530 Subject: [PATCH 28/38] * chore(fine-tuned-RAG): remove .gitignore file for *.parquet --- examples/fine-tuned-RAG/.gitignore | 1 - 1 file changed, 1 deletion(-) delete mode 100644 examples/fine-tuned-RAG/.gitignore diff --git a/examples/fine-tuned-RAG/.gitignore b/examples/fine-tuned-RAG/.gitignore deleted file mode 100644 index bccc1450f2..0000000000 --- a/examples/fine-tuned-RAG/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.parquet \ No newline at end of file From f347a9c283fb38f8dfa93f08cb145582724de44d Mon Sep 17 00:00:00 2001 From: NirantK Date: Fri, 8 Sep 2023 13:19:53 +0530 Subject: [PATCH 29/38] Move nbs --- .../ft_retrieval_augmented_generation.ipynb} | 104 +++++++----------- 1 file changed, 41 insertions(+), 63 deletions(-) rename examples/{fine-tuned-RAG/ModelFinetune.ipynb => fine-tuned_qa/ft_retrieval_augmented_generation.ipynb} (63%) diff --git a/examples/fine-tuned-RAG/ModelFinetune.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb similarity index 63% rename from examples/fine-tuned-RAG/ModelFinetune.ipynb rename to examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb index 2bc25359af..8489e364a6 100644 --- a/examples/fine-tuned-RAG/ModelFinetune.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb @@ -55,7 +55,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -631,7 +631,7 @@ " plt.show()\n", "\n", "# Plot only the overall error\n", - "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], labels=[\"Base gpt-3.5-turbo-0613 Model\", \"Fine-Tuned Model\"])\n" + "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], labels=[\"Base gpt-3.5-turbo-0613 Model\", \"Fine-Tuned Model\"])" ] }, { @@ -923,7 +923,7 @@ " rag_prompt = [{\"role\": \"system\", \"content\": instruction}] + rag_prompt\n", " return rag_prompt\n", "\n", - "train_sample[\"few_shot_prompt_1K\"] = train_sample.progress_apply(get_few_shot_prompt, axis=1)" + "train_sample[\"few_shot_prompt\"] = train_sample.progress_apply(get_few_shot_prompt, axis=1)" ] }, { @@ -959,7 +959,7 @@ "# Prepare the OpenAI File format i.e. JSONL from train_sample\n", "def dataframe_to_jsonl(df):\n", " def create_jsonl_entry(row):\n", - " messages = row[\"few_shot_prompt_1K\"]\n", + " messages = row[\"few_shot_prompt\"]\n", " return json.dumps({\"messages\": messages})\n", "\n", " jsonl_output = df.progress_apply(create_jsonl_entry, axis=1)\n", @@ -971,48 +971,19 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 7, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "File Status: uploaded\n", - "File Status: uploaded\n", - "File Status: processed\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: succeeded\n" + "ename": "NameError", + "evalue": "name 'OpenAIFineTuner' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m fine_tuner \u001b[39m=\u001b[39m OpenAIFineTuner(\n\u001b[1;32m 2\u001b[0m training_file_path\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mlocal_cache/100_train_few_shot.jsonl\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m model_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mgpt-3.5-turbo\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m suffix\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mtrnfewshot20230907\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m model_id \u001b[39m=\u001b[39m fine_tuner\u001b[39m.\u001b[39mfine_tune_model()\n\u001b[1;32m 8\u001b[0m model_id\n", + "\u001b[0;31mNameError\u001b[0m: name 'OpenAIFineTuner' is not defined" ] - }, - { - "data": { - "text/plain": [ - "'ft:gpt-3.5-turbo-0613:qdrant:trnfewshot20230907:7w8Fhrg6'" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -1050,6 +1021,15 @@ "df[\"ft_generated_answer_few_shot\"] = df.progress_apply(answer_question, model=model_id, prompt_func=get_few_shot_prompt, axis=1)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.to_json(\"local_cache/100_val_ft_few_shot.json\", orient=\"records\", lines=True)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -1104,28 +1084,19 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 1, "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: The figure layout has changed to tight\n", - " self._figure.tight_layout(*args, **kwargs)\n", - "/var/folders/b4/grpbcmrd36gc7q5_11whbn540000gn/T/ipykernel_3534/765969223.py:24: UserWarning: The figure layout has changed to tight\n", - " plt.tight_layout()\n" + "ename": "NameError", + "evalue": "name 'df' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 29\u001b[0m\n\u001b[1;32m 26\u001b[0m plt\u001b[39m.\u001b[39mtitle(\u001b[39m\"\u001b[39m\u001b[39mWhen Expecting an Answer\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 27\u001b[0m plt\u001b[39m.\u001b[39mshow()\n\u001b[0;32m---> 29\u001b[0m plot_correctness(df, answer_columns\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mgenerated_answer\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mft_generated_answer\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mft_generated_answer_few_shot\u001b[39m\u001b[39m\"\u001b[39m], labels\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mBase Model\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mFine-Tuned Model\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mFew Shot Fine-Tuned Model with Qdrant\u001b[39m\u001b[39m\"\u001b[39m])\n", + "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined" ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ @@ -1141,10 +1112,10 @@ " matrix_error = float(results.loc[category].replace(\"%\", \"\"))\n", " data.append([labels[i], cat_map[category], matrix_error])\n", "\n", - " df_plot = pd.DataFrame(data, columns=[\"Model\", \"Error Category\", \"Percentage\"])\n", + " df_plot = pd.DataFrame(data, columns=[\"Model\", \"Response\", \"Percentage\"])\n", " \n", " # Create the plot\n", - " g = sns.catplot(x=\"Percentage\", y=\"Model\", hue=\"Error Category\", data=df_plot, kind=\"bar\", height=6, aspect=1.5, palette=\"icefire\")\n", + " g = sns.catplot(x=\"Percentage\", y=\"Model\", hue=\"Response\", data=df_plot, kind=\"bar\", height=6, aspect=1.5, palette=\"icefire\")\n", " \n", " ax = g.facet_axis(0, 0)\n", " \n", @@ -1153,12 +1124,19 @@ " ax.annotate(f\"{p.get_width():.0f}%\", (p.get_width(), p.get_y() + p.get_height() / 2),\n", " ha=\"left\", va=\"center\")\n", " plt.tight_layout()\n", - " plt.xlabel(\"Percentage (%) - Higher is Better\")\n", + " plt.xlabel(\"Percentage (%)\")\n", " plt.title(\"When Expecting an Answer\")\n", " plt.show()\n", "\n", "plot_correctness(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], labels=[\"Base Model\", \"Fine-Tuned Model\", \"Few Shot Fine-Tuned Model with Qdrant\"])" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From faca10b112dbb68ce4aaf014f9bbdbe3619d9497 Mon Sep 17 00:00:00 2001 From: NirantK Date: Fri, 8 Sep 2023 16:22:40 +0530 Subject: [PATCH 30/38] * chore(.gitignore): update ignored directory path for fine-tuned_qa/local_cache/ --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e4a66a7b25..1ecd08687b 100644 --- a/.gitignore +++ b/.gitignore @@ -133,4 +133,4 @@ dmypy.json /examples/data/transactions* *.DS_Store tmp_* -examples/fine-tuned-RAG/local_cache/* +examples/fine-tuned_qa/local_cache/* From d1c0f4eb97ba44503a7a8f24142811b9c9fbe393 Mon Sep 17 00:00:00 2001 From: NirantK Date: Fri, 8 Sep 2023 18:56:56 +0530 Subject: [PATCH 31/38] Better numbering, easier to read, renamed error categories --- .../ft_retrieval_augmented_generation.ipynb | 714 +++++++++++++----- 1 file changed, 507 insertions(+), 207 deletions(-) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb index 8489e364a6..a7385e5184 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb @@ -14,8 +14,8 @@ "## Why should you read this blog?\n", "\n", "You want to learn how to \n", - "- Fine-tune OpenAI models for specific use-cases\n", - "- Use Qdrant to improve the performance of your RAG model\n", + "- [Fine-tune OpenAI models](https://platform.openai.com/docs/guides/fine-tuning/) for specific use-cases\n", + "- Use [Qdrant](https://qdrant.tech/documentation/) to improve the performance of your RAG model\n", "- Use fine-tuning to improve the correctness of your RAG model and reduce hallucinations\n", "\n", "To begin, we've selected a dataset where we've a guarantee that the retrieval is perfect. We've selected a subset of the [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) dataset, which is a collection of questions and answers about Wikipedia articles. We've also included samples where the answer is not present in the context, to demonstrate how RAG handles this case.\n", @@ -24,9 +24,10 @@ "1. Setting up the Environment\n", "\n", "### Section A: Zero-Shot Learning\n", - "2. Data Preparation\n", - "3. OpenAI Model Fine-Tuning\n", - "4. Baseline Results\n", + "2. Data Preparation: SQuADv2 Dataset\n", + "3. Answering using Base gpt-3.5-turbo-0613 model\n", + "4. Fine-tuning and Answering using Fine-tuned model\n", + "5. **Evaluation**: How well does the model perform?\n", "\n", "### Section B: Few-Shot Learning\n", "5. Using Qdrant to Improve RAG Prompt\n", @@ -39,7 +40,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## 1. Setting Up\n", + "## 1. Setting Up the Environment\n", "\n", "### Install and Import Dependencies" ] @@ -79,20 +80,13 @@ "tqdm.pandas()" ] }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "openai.api_key = os.environ[\"OPENAI_API_KEY\"]" - ] - }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ + "## Section A\n", + "\n", "## 2. Data Preparation: SQuADv2 Data Subsets\n", "\n", "For the purpose of demonstration, we'll make small slices from the train and validation splits of the [SQuADv2](https://rajpurkar.github.io/SQuAD-explorer/) dataset. This dataset has questions and contexts where the answer is not present in the context, to help us evaluate how LLM handles this case.\n", @@ -104,25 +98,54 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "--2023-09-08 16:40:51-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.111.153, 185.199.108.153, 185.199.109.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.111.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 42123633 (40M) [application/json]\n", + "Saving to: ‘local_cache/train.json’\n", + "\n", + "local_cache/train.j 100%[===================>] 40.17M 36.1MB/s in 1.1s \n", + "\n", + "2023-09-08 16:40:55 (36.1 MB/s) - ‘local_cache/train.json’ saved [42123633/42123633]\n", + "\n", + "--2023-09-08 16:40:55-- https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\n", + "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n", + "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", + "HTTP request sent, awaiting response... 200 OK\n", + "Length: 4370528 (4.2M) [application/json]\n", + "Saving to: ‘local_cache/dev.json’\n", + "\n", + "local_cache/dev.jso 100%[===================>] 4.17M 14.0MB/s in 0.3s \n", + "\n", + "2023-09-08 16:40:56 (14.0 MB/s) - ‘local_cache/dev.json’ saved [4370528/4370528]\n", + "\n" + ] + } + ], "source": [ - "# !mkdir -p local_cache\n", - "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O local_cache/train.json\n", - "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O local_cache/dev.json" + "!mkdir -p local_cache\n", + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O local_cache/train.json\n", + "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O local_cache/dev.json" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "## Read JSON to DataFrame" + "### Read JSON to DataFrame" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -171,14 +194,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Setting up Baseline Model Performance\n", + "## 3. Answering using Base gpt-3.5-turbo-0613 model\n", "\n", - "### Utility Functions: Zero Shot Prompt, API Call to OpenAI" + "### 3.1 Zero Shot Prompt" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -193,9 +216,22 @@ " Context: {row.context}\\n\\n\n", " Answer:\\n\"\"\",\n", " },\n", - " ]\n", - "\n", - "\n", + " ]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3.2 Answering using Zero Shot Prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ "# Function with tenacity for retries\n", "@retry(wait=wait_exponential(multiplier=1, min=2, max=6))\n", "def api_call(messages, model):\n", @@ -215,16 +251,23 @@ " return response[\"choices\"][0][\"message\"][\"content\"]" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "⏰ **Time to run: ~3 min**" + ] + }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 100/100 [02:21<00:00, 1.41s/it]\n" + "100%|██████████| 100/100 [01:28<00:00, 1.13it/s]\n" ] } ], @@ -235,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -246,25 +289,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Fine-Tuning the OpenAI Model\n", + "## 4. Fine-tuning and Answering using Fine-tuned model\n", "\n", "For the complete fine-tuning process, please refer to the [OpenAI Fine-Tuning Docs](https://platform.openai.com/docs/guides/fine-tuning/use-a-fine-tuned-model).\n", "\n", - "### Prepare the Fine-Tuning Data\n", + "### 4.1 Prepare the Fine-Tuning Data\n", "\n", "We need to prepare the data for fine-tuning. We'll use a few samples from train split of same dataset as before, but we'll add the answer to the context. This will help the model learn to retrieve the answer from the context." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 100/100 [00:00<00:00, 68067.25it/s]\n" + "100%|██████████| 100/100 [00:00<00:00, 65659.11it/s]\n" ] } ], @@ -298,23 +341,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### [Optional] Verify the Fine-Tuning Data\n", + "**Tip: 💡 Verify the Fine-Tuning Data**\n", "\n", - "You can see this [cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/Chat_finetuning_data_prep.ipynb) for more details on how to prepare the data for fine-tuning." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Fine-Tune OpenAI Model\n", + "You can see this [cookbook](https://github.com/openai/openai-cookbook/blob/main/examples/Chat_finetuning_data_prep.ipynb) for more details on how to prepare the data for fine-tuning.\n", + "\n", + "### 4.2 Fine-Tune OpenAI Model\n", "\n", "If you're new to OpenAI Model Fine-Tuning, please refer to the [How to finetune Chat models](https://github.com/openai/openai-cookbook/blob/448a0595b84ced3bebc9a1568b625e748f9c1d60/examples/How_to_finetune_chat_models.ipynb) notebook. You can also refer to the [OpenAI Fine-Tuning Docs](platform.openai.com/docs/guides/fine-tuning/use-a-fine-tuned-model) for more details." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -370,11 +408,58 @@ " )" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "⏰ **Time to run: ~10-20 minutes**" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File Status: uploaded\n", + "File Status: processed\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: succeeded\n" + ] + }, + { + "data": { + "text/plain": [ + "'ft:gpt-3.5-turbo-0613:qdrant:100trn20230907:7wU85Xwb'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model_id = fine_tuner.fine_tune_model()\n", "model_id" @@ -384,12 +469,12 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Try out the Fine-Tuned Model" + "#### 4.2.1 Try out the Fine-Tuned Model" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -424,19 +509,26 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Using the Fine-Tuned Model" + "### 4.3 Answer Using the Fine-Tuned Model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "⏰ **Time to run: ~5 min**" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "100%|██████████| 100/100 [04:54<00:00, 2.94s/it]\n" + "100%|██████████| 100/100 [05:09<00:00, 3.10s/it]\n" ] } ], @@ -449,20 +541,25 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Evaluate Baseline Model Performance\n", + "## 5. Evaluation: How well does the model perform?\n", "\n", "To evaluate the model's performance, compare the predicted answer to the actual answers -- if any of the actual answers are present in the predicted answer, then it's a match. We've also created error categories to help you understand where the model is struggling.\n", "\n", - "1. Expected and Right: The model responsded the correct answer. It may have also included other answers that were not in the context.\n", - "2. Expected but \"IDK\": The model responded with \"I don't know\" (IDK) while the answer was present in the context. *This is a model error* and better than giving the wrong answer. It's better for the model say \"I don't know\" than giving a hallucinated or made up and wrong answer. In our design, we know that a true answer exists and hence we're able to measure it -- this is not always the case. We exclude this from the overall error rate. \n", - "3. Expected but Wrong: The model responded with an incorrect answer. *This is a model ERROR.*\n", - "4. Hallucination: The model responded with an answer, when \"I don't know\" was expected. **This is a model error.** \n", - "5. Did not expect and IDK: The model responded with \"I don't know\" (IDK) and the answer was not present in the context. *This is a model WIN.*" + "When we know that a correct answer exists in the context, we can measure the model's performance, there are 3 possible outcomes:\n", + "\n", + "1. ✅ **Answered Correctly**: The model responsded the correct answer. It may have also included other answers that were not in the context.\n", + "2. ❎ **Skipped**: The model responded with \"I don't know\" (IDK) while the answer was present in the context. It's better than giving the wrong answer. It's better for the model say \"I don't know\" than giving the wrong answer. In our design, we know that a true answer exists and hence we're able to measure it -- this is not always the case. *This is a model error*. We exclude this from the overall error rate. \n", + "3. ❌ **Wrong**: The model responded with an incorrect answer. **This is a model ERROR.**\n", + "\n", + "When we know that a correct answer does not exist in the context, we can measure the model's performance, there are 2 possible outcomes:\n", + "\n", + "4. ❌ **Hallucination**: The model responded with an answer, when \"I don't know\" was expected. **This is a model ERROR.** \n", + "5. ✅ **I don't know**: The model responded with \"I don't know\" (IDK) and the answer was not present in the context. **This is a model WIN.**" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 30, "metadata": {}, "outputs": [], "source": [ @@ -471,11 +568,11 @@ " self.df = df\n", " self.y_pred = []\n", " self.labels = [\n", - " \"Expected and Right\",\n", - " \"Expected but IDK\",\n", - " \"Expected but Wrong\",\n", - " \"Hallucination\",\n", - " \"Did not Expect and IDK\",\n", + " \"✅ Answered Correctly\",\n", + " \"❎ Skipped\",\n", + " \"❌ Wrong Answer\", \n", + " \"❌ Hallucination\", \n", + " \"✅ I don't know\"\n", " ]\n", " self.answers_column = answers_column\n", "\n", @@ -485,16 +582,16 @@ " actual_answers = [ans.lower() for ans in row[\"answers\"]]\n", "\n", " y_pred = (\n", - " \"Expected and Right\"\n", + " \"✅ Answered Correctly\"\n", " if not is_impossible\n", " and any(ans in generated_answer for ans in actual_answers)\n", - " else \"Expected but IDK\"\n", + " else \"❎ Skipped\"\n", " if not is_impossible and generated_answer == \"i don't know\"\n", - " else \"Expected but Wrong\"\n", + " else \"❌ Wrong Answer\"\n", " if not is_impossible and generated_answer not in actual_answers\n", - " else \"Hallucination\"\n", + " else \"❌ Hallucination\"\n", " if is_impossible and generated_answer != \"i don't know\"\n", - " else \"Did not Expect and IDK\"\n", + " else \"✅ I don't know\"\n", " )\n", " return y_pred\n", "\n", @@ -522,21 +619,21 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Expected and Right 43.00%\n", - "Expected but IDK 0.00%\n", - "Expected but Wrong 10.00%\n", - "Hallucination 47.00%\n", - "Did not Expect and IDK 0.00%\n", + "✅ Answered Correctly 44.00%\n", + "❎ Skipped 0.00%\n", + "❌ Wrong Answer 9.00%\n", + "❌ Hallucination 47.00%\n", + "✅ I don't know 0.00%\n", "Name: count, dtype: object" ] }, - "execution_count": 19, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -547,21 +644,21 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Expected and Right 34.00%\n", - "Expected but IDK 17.00%\n", - "Expected but Wrong 2.00%\n", - "Hallucination 8.00%\n", - "Did not Expect and IDK 39.00%\n", + "✅ Answered Correctly 32.00%\n", + "❎ Skipped 18.00%\n", + "❌ Wrong Answer 3.00%\n", + "❌ Hallucination 7.00%\n", + "✅ I don't know 40.00%\n", "Name: count, dtype: object" ] }, - "execution_count": 20, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -572,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 33, "metadata": {}, "outputs": [], "source": [ @@ -580,6 +677,15 @@ "df.to_json(\"local_cache/100_val_ft.json\", orient=\"records\", lines=True)" ] }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_json(\"local_cache/100_val_ft.json\", orient=\"records\", lines=True)" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -590,12 +696,24 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": 36, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n" + ] + }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -606,7 +724,7 @@ ], "source": [ "def plot_overall_error(df, answer_columns, labels):\n", - " error_categories = [\"Expected but Wrong\", \"Hallucination\"]\n", + " error_categories = [\"❌ Hallucination\", \"❌ Wrong Answer\"]\n", " \n", " # Prepare data for Seaborn\n", " data = []\n", @@ -634,19 +752,114 @@ "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], labels=[\"Base gpt-3.5-turbo-0613 Model\", \"Fine-Tuned Model\"])" ] }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: The figure layout has changed to tight\n", + " self._figure.tight_layout(*args, **kwargs)\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 9989 (\\N{WHITE HEAVY CHECK MARK}) missing from current font.\n", + " fig.canvas.draw()\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 10062 (\\N{NEGATIVE SQUARED CROSS MARK}) missing from current font.\n", + " fig.canvas.draw()\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 10060 (\\N{CROSS MARK}) missing from current font.\n", + " fig.canvas.draw()\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 9989 (\\N{WHITE HEAVY CHECK MARK}) missing from current font.\n", + " fig.canvas.print_figure(bytes_io, **kw)\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 10062 (\\N{NEGATIVE SQUARED CROSS MARK}) missing from current font.\n", + " fig.canvas.print_figure(bytes_io, **kw)\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 10060 (\\N{CROSS MARK}) missing from current font.\n", + " fig.canvas.print_figure(bytes_io, **kw)\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def plot_correctness(df, answer_columns, labels):\n", + " error_categories = [\n", + " \"✅ Answered Correctly\",\n", + " \"❎ Skipped\",\n", + " \"❌ Wrong Answer\", \n", + " \"❌ Hallucination\", \n", + " \"✅ I don't know\"\n", + " ]\n", + " annotations = [\n", + " \"✅ Answered Correctly\",\n", + " \"❎ Skipped\",\n", + " \"❌ Wrong Answer\", \n", + " \"❌ Hallucination\", \n", + " \"✅ I don't know\"\n", + " ]\n", + " cat_map = dict(zip(error_categories, annotations)) \n", + " # Prepare data for Seaborn\n", + " data = []\n", + " for i, col in enumerate(answer_columns):\n", + " results = evaluate_model(df, col)\n", + " for category in error_categories:\n", + " matrix_error = float(results.loc[category].replace(\"%\", \"\"))\n", + " data.append([labels[i], cat_map[category], matrix_error])\n", + "\n", + " df_plot = pd.DataFrame(data, columns=[\"Model\", \"Response\", \"Percentage\"])\n", + " \n", + " # Create the plot\n", + " g = sns.catplot(x=\"Percentage\", y=\"Model\", hue=\"Response\", data=df_plot, kind=\"bar\", height=6, aspect=1.5, palette=\"icefire\")\n", + " \n", + " ax = g.facet_axis(0, 0)\n", + " \n", + " # Add annotations\n", + " for i, p in enumerate(ax.patches):\n", + " ax.annotate(f\"{p.get_width():.0f}%\", (p.get_width(), p.get_y() + p.get_height() / 2),\n", + " ha=\"left\", va=\"center\")\n", + " plt.xlabel(\"Percentage (%)\")\n", + " plt.title(\"Answer Distribution\")\n", + " plt.show()\n", + "\n", + "plot_correctness(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], labels=[\"Base Model\", \"Fine-Tuned Model\"])" + ] + }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "# Few Shot Learning\n", + "# Section B: Few Shot Learning\n", "\n", "\n", "We'll select a few examples from the dataset, including cases where the answer is not present in the context. We'll then use these examples to create a prompt that we can use to fine-tune the model. We'll then measure the performance of the fine-tuned model.\n", "\n", "## 5. Fine-Tuning OpenAI Model with Qdrant\n", "\n", - "So far, we've been using the OpenAI model to answer questions where the answer is present in the context. But what if we want to answer questions where the answer is not present in the context? This is where few-shot learning comes in. Few-shot learning is a type of transfer learning that allows us to answer questions where the answer is not present in the context. We can do this by providing a few examples of the answer we're looking for, and the model will learn to answer questions where the answer is not present in the context." + "So far, we've been using the OpenAI model to answer questions without using examples of the answer. The previous step made it work better on in-context examples, while this one helps it generalize to unseen data, and attempt to learn when to say \"I don't know\" and when to give an answer.\n", + "\n", + "This is where few-shot learning comes in!\n", + "\n", + "Few-shot learning is a type of transfer learning that allows us to answer questions where the answer is not present in the context. We can do this by providing a few examples of the answer we're looking for, and the model will learn to answer questions where the answer is not present in the context." ] }, { @@ -654,14 +867,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Embed the Training Data\n", + "### 5.1 Embed the Training Data\n", "\n", "Embeddings are a way to represent sentences as an array of floats. We'll use the embeddings to find the most similar questions to the ones we're looking for." ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 38, "metadata": {}, "outputs": [], "source": [ @@ -670,7 +883,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 39, "metadata": {}, "outputs": [], "source": [ @@ -690,7 +903,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 40, "metadata": {}, "outputs": [], "source": [ @@ -709,9 +922,17 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 41, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 77.7M/77.7M [00:07<00:00, 10.9MiB/s]\n" + ] + } + ], "source": [ "from fastembed.embedding import DefaultEmbedding\n", "from typing import List\n", @@ -728,16 +949,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Embedding the Questions\n", + "### 5.3 Embedding the Questions\n", "\n", "We embed the entire training set questions. We'll use the question to question similarity to find the most similar questions to the question we're looking for. This is a workflow which is used in RAG to leverage the OpenAI model ability of incontext learning with more examples. This is what we call Few Shot Learning here.\n", "\n", - "### ❗️ Important Note: This step can take upto 3 hours to complete. Please be patient. If you see Out of Memory errors or Kernel Crashes, please reduce the batch size to 32, restart the kernel and run the notebook again. This code needs to be run only ONCE. " + "**❗️⏰ Important Note: This step can take upto 3 hours to complete. Please be patient. If you see Out of Memory errors or Kernel Crashes, please reduce the batch size to 32, restart the kernel and run the notebook again. This code needs to be run only ONCE.**" ] }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 42, "metadata": {}, "outputs": [], "source": [ @@ -794,14 +1015,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Upload the Embeddings to Qdrant\n", + "#### Upload the Embeddings to Qdrant\n", "\n", "Note that configuring Qdrant is outside the scope of this notebook. Please refer to the [Qdrant](https://qdrant.tech) for more information. We used a timeout of 600 seconds for the upload, and grpc compression to speed up the upload." ] }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 43, "metadata": {}, "outputs": [], "source": [ @@ -815,31 +1036,24 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Using Qdrant to Improve RAG Prompt\n", + "## 6. Using Qdrant to Improve RAG Prompt\n", "\n", - "Now that we've uploaded the embeddings to Qdrant, we can use Qdrant to find the most similar questions to the question we're looking for. We'll use the top 5 most similar questions to create a prompt that we can use to fine-tune the model. We'll then measure the performance of the fine-tuned model on the same validation set, but with few shot prompting!" + "Now that we've uploaded the embeddings to Qdrant, we can use Qdrant to find the most similar questions to the question we're looking for. We'll use the top 5 most similar questions to create a prompt that we can use to fine-tune the model. We'll then measure the performance of the fine-tuned model on the same validation set, but with few shot prompting!\n", + "\n", + "Our main function `get_few_shot_prompt` serves as the workhorse for generating prompts for few-shot learning. It does this by retrieving similar questions from Qdrant - a vector search engine, using an embeddings model. Here is the high-level workflow:\n", + "\n", + "1. Retrieve similar questions from Qdrant where the **answer is present** in the context\n", + "2. Retrieve similar questions from Qdrant where the **answer is IMPOSSIBLE** i.e. the expected answer is \"I don't know\" to find in the context\n", + "3. Create a prompt using the retrieved questions\n", + "4. Fine-tune the model using the prompt\n", + "5. Evaluate the fine-tuned model on the validation set with the same prompting technique" ] }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 75, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "ca88ff9dbfe24e9592ca03fc5c33c80e", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/100 [00:00= 1:\n", - " rag_prompt += q_to_prompt(q2[1])\n", " if len(q1) >= 1:\n", " rag_prompt += q_to_prompt(q1[1])\n", - " rag_prompt += q_to_prompt(q1[2])\n", " if len(q2) >= 1:\n", - " rag_prompt += q_to_prompt(q2[2])\n", + " rag_prompt += q_to_prompt(q2[1])\n", + " # if len(q1) >= 1:\n", + " # rag_prompt += q_to_prompt(q1[2])\n", + " # if len(q2) >= 1:\n", + " # rag_prompt += q_to_prompt(q2[2])\n", " \n", " \n", "\n", @@ -921,8 +1135,31 @@ " ]\n", "\n", " rag_prompt = [{\"role\": \"system\", \"content\": instruction}] + rag_prompt\n", - " return rag_prompt\n", - "\n", + " return rag_prompt" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c23b4225fb4647a2aef13fa524c16e76", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/100 [00:00 1\u001b[0m fine_tuner \u001b[39m=\u001b[39m OpenAIFineTuner(\n\u001b[1;32m 2\u001b[0m training_file_path\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mlocal_cache/100_train_few_shot.jsonl\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m model_name\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mgpt-3.5-turbo\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m 4\u001b[0m suffix\u001b[39m=\u001b[39m\u001b[39m\"\u001b[39m\u001b[39mtrnfewshot20230907\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m 5\u001b[0m )\n\u001b[1;32m 7\u001b[0m model_id \u001b[39m=\u001b[39m fine_tuner\u001b[39m.\u001b[39mfine_tune_model()\n\u001b[1;32m 8\u001b[0m model_id\n", - "\u001b[0;31mNameError\u001b[0m: name 'OpenAIFineTuner' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "File Status: uploaded\n", + "File Status: uploaded\n", + "File Status: uploaded\n", + "File Status: uploaded\n", + "File Status: uploaded\n", + "File Status: uploaded\n", + "File Status: uploaded\n", + "File Status: uploaded\n", + "File Status: processed\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n", + "Job Status: running\n" ] } ], @@ -999,13 +1258,78 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Correct Answer: Rajasthan\n", + "Model Answer:\n", + "{\n", + " \"role\": \"assistant\",\n", + " \"content\": \"Rajasthan\"\n", + "}\n" + ] + } + ], + "source": [ + "# Let's try this out\n", + "completion = openai.ChatCompletion.create(\n", + " model=model_id,\n", + " messages=[\n", + " {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Can you answer the following question based on the given context? If not, say, I don't know:\\n\\nQuestion: What is the capital of France?\\n\\nContext: The capital of Mars is Gaia. Answer:\",\n", + " },\n", + " {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"I don't know\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Question: Where did Maharana Pratap die?\\n\\nContext: Rana Pratap's defiance of the mighty Mughal empire, almost alone and unaided by the other Rajput states, constitute a glorious saga of Rajput valour and the spirit of self sacrifice for cherished principles. Rana Pratap's methods of guerrilla warfare was later elaborated further by Malik Ambar, the Deccani general, and by Emperor Shivaji.\\nAnswer:\",\n", + " },\n", + " {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"I don't know\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Question: Who did Rana Pratap fight against?\\n\\nContext: In stark contrast to other Rajput rulers who accommodated and formed alliances with the various Muslim dynasties in the subcontinent, by the time Pratap ascended to the throne, Mewar was going through a long standing conflict with the Mughals which started with the defeat of his grandfather Rana Sanga in the Battle of Khanwa in 1527 and continued with the defeat of his father Udai Singh II in Siege of Chittorgarh in 1568. Pratap Singh, gained distinction for his refusal to form any political alliance with the Mughal Empire and his resistance to Muslim domination. The conflicts between Pratap Singh and Akbar led to the Battle of Haldighati. Answer:\",\n", + " },\n", + " {\n", + " \"role\": \"assistant\",\n", + " \"content\": \"Akbar\",\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"Question: Which state is Chittorgarh in?\\n\\nContext: Chittorgarh, located in the southern part of the state of Rajasthan, 233 km (144.8 mi) from Ajmer, midway between Delhi and Mumbai on the National Highway 8 (India) in the road network of Golden Quadrilateral. Chittorgarh is situated where National Highways No. 76 & 79 intersect. Answer:\",\n", + " },\n", + " ],\n", + ")\n", + "print(\"Correct Answer: Rajasthan\\nModel Answer:\")\n", + "print(completion.choices[0].message)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "⏰ **Time to run: 5-15 min**" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "418ad403b26e43eeba085657e063046e", + "model_id": "455a302d772e4dd4ac4927193db90214", "version_major": 2, "version_minor": 0 }, @@ -1039,21 +1363,21 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Expected and Right 46.00%\n", - "Expected but IDK 5.00%\n", - "Expected but Wrong 2.00%\n", - "Hallucination 25.00%\n", - "Did not Expect and IDK 22.00%\n", + "✅ Answered Correctly 46.00%\n", + "❎ Skipped 0.00%\n", + "❌ Wrong Answer 7.00%\n", + "❌ Hallucination 45.00%\n", + "✅ I don't know 2.00%\n", "Name: count, dtype: object" ] }, - "execution_count": 67, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } @@ -1064,14 +1388,46 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", + " if pd.api.types.is_categorical_dtype(vector):\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: The figure layout has changed to tight\n", + " self._figure.tight_layout(*args, **kwargs)\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 9989 (\\N{WHITE HEAVY CHECK MARK}) missing from current font.\n", + " fig.canvas.draw()\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 10062 (\\N{NEGATIVE SQUARED CROSS MARK}) missing from current font.\n", + " fig.canvas.draw()\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 10060 (\\N{CROSS MARK}) missing from current font.\n", + " fig.canvas.draw()\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 9989 (\\N{WHITE HEAVY CHECK MARK}) missing from current font.\n", + " fig.canvas.print_figure(bytes_io, **kw)\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 10062 (\\N{NEGATIVE SQUARED CROSS MARK}) missing from current font.\n", + " fig.canvas.print_figure(bytes_io, **kw)\n", + "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 10060 (\\N{CROSS MARK}) missing from current font.\n", + " fig.canvas.print_figure(bytes_io, **kw)\n" + ] + }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -1079,64 +1435,8 @@ } ], "source": [ - "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], labels=[\"Base Model\", \"Fine-Tuned Model\", \"Fine-Tuned Model with Few-Shot\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'df' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 29\u001b[0m\n\u001b[1;32m 26\u001b[0m plt\u001b[39m.\u001b[39mtitle(\u001b[39m\"\u001b[39m\u001b[39mWhen Expecting an Answer\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 27\u001b[0m plt\u001b[39m.\u001b[39mshow()\n\u001b[0;32m---> 29\u001b[0m plot_correctness(df, answer_columns\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mgenerated_answer\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mft_generated_answer\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mft_generated_answer_few_shot\u001b[39m\u001b[39m\"\u001b[39m], labels\u001b[39m=\u001b[39m[\u001b[39m\"\u001b[39m\u001b[39mBase Model\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mFine-Tuned Model\u001b[39m\u001b[39m\"\u001b[39m, \u001b[39m\"\u001b[39m\u001b[39mFew Shot Fine-Tuned Model with Qdrant\u001b[39m\u001b[39m\"\u001b[39m])\n", - "\u001b[0;31mNameError\u001b[0m: name 'df' is not defined" - ] - } - ], - "source": [ - "def plot_correctness(df, answer_columns, labels):\n", - " error_categories = [\"Expected and Right\", \"Expected but IDK\", \"Expected but Wrong\"]\n", - " annotations = [\"Correct\", \"I don't know\", \"Wrong\"]\n", - " cat_map = dict(zip(error_categories, annotations)) \n", - " # Prepare data for Seaborn\n", - " data = []\n", - " for i, col in enumerate(answer_columns):\n", - " results = evaluate_model(df, col)\n", - " for category in error_categories:\n", - " matrix_error = float(results.loc[category].replace(\"%\", \"\"))\n", - " data.append([labels[i], cat_map[category], matrix_error])\n", - "\n", - " df_plot = pd.DataFrame(data, columns=[\"Model\", \"Response\", \"Percentage\"])\n", - " \n", - " # Create the plot\n", - " g = sns.catplot(x=\"Percentage\", y=\"Model\", hue=\"Response\", data=df_plot, kind=\"bar\", height=6, aspect=1.5, palette=\"icefire\")\n", - " \n", - " ax = g.facet_axis(0, 0)\n", - " \n", - " # Add annotations\n", - " for i, p in enumerate(ax.patches):\n", - " ax.annotate(f\"{p.get_width():.0f}%\", (p.get_width(), p.get_y() + p.get_height() / 2),\n", - " ha=\"left\", va=\"center\")\n", - " plt.tight_layout()\n", - " plt.xlabel(\"Percentage (%)\")\n", - " plt.title(\"When Expecting an Answer\")\n", - " plt.show()\n", - "\n", "plot_correctness(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], labels=[\"Base Model\", \"Fine-Tuned Model\", \"Few Shot Fine-Tuned Model with Qdrant\"])" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From 9db7a32cd32faab09a597559c2b04af743b69c96 Mon Sep 17 00:00:00 2001 From: NirantK Date: Fri, 8 Sep 2023 20:16:40 +0530 Subject: [PATCH 32/38] Add clean observations and results --- .../ft_retrieval_augmented_generation.ipynb | 120 +++++++++++++----- 1 file changed, 90 insertions(+), 30 deletions(-) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb index a7385e5184..a5a35c446d 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb @@ -843,6 +843,21 @@ "plot_correctness(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], labels=[\"Base Model\", \"Fine-Tuned Model\"])" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Observations\n", + "\n", + "1. The fine-tuned model is better at saying \"I don't know\"\n", + "2. Hallucinations drop from 47% to 7% with fine-tuning\n", + "3. Wrong answers drop from 9% to 3% with fine-tuning\n", + "\n", + "**Correct answers also drop from 44% to 32% with fine-tuning** - this is because the fine-tuned model is **more conservative** and says \"I don't know\" more often. This is a good thing because it's better to say \"I don't know\" than to give a wrong answer.\n", + "\n", + "That said, we want to improve the correctness of the model, even if that increases the hallucinations. We'll use Qdrant and Few-Shot Learning to achieve this." + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -1051,7 +1066,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 84, "metadata": {}, "outputs": [], "source": [ @@ -1116,14 +1131,13 @@ " ]\n", "\n", " rag_prompt = []\n", + " \n", " if len(q1) >= 1:\n", " rag_prompt += q_to_prompt(q1[1])\n", " if len(q2) >= 1:\n", " rag_prompt += q_to_prompt(q2[1])\n", - " # if len(q1) >= 1:\n", - " # rag_prompt += q_to_prompt(q1[2])\n", - " # if len(q2) >= 1:\n", - " # rag_prompt += q_to_prompt(q2[2])\n", + " if len(q1) >= 1:\n", + " rag_prompt += q_to_prompt(q1[2])\n", " \n", " \n", "\n", @@ -1140,13 +1154,13 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "c23b4225fb4647a2aef13fa524c16e76", + "model_id": "2d5cd9516e6e44a2b4088c444813d631", "version_major": 2, "version_minor": 0 }, @@ -1174,13 +1188,13 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 86, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "e2bb01e078ad49269f6b7ce9dec47fe2", + "model_id": "fd872956e92846948c16735391394c63", "version_major": 2, "version_minor": 0 }, @@ -1215,19 +1229,13 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 87, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "File Status: uploaded\n", - "File Status: uploaded\n", - "File Status: uploaded\n", - "File Status: uploaded\n", - "File Status: uploaded\n", - "File Status: uploaded\n", "File Status: uploaded\n", "File Status: uploaded\n", "File Status: processed\n", @@ -1241,8 +1249,18 @@ "Job Status: running\n", "Job Status: running\n", "Job Status: running\n", - "Job Status: running\n" + "Job Status: succeeded\n" ] + }, + { + "data": { + "text/plain": [ + "'ft:gpt-3.5-turbo-0613:qdrant:trnfewshot20230907:7wWa4owB'" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -1258,7 +1276,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 88, "metadata": {}, "outputs": [ { @@ -1323,13 +1341,13 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 89, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "455a302d772e4dd4ac4927193db90214", + "model_id": "f3132d336156412dae3494cec0c5c226", "version_major": 2, "version_minor": 0 }, @@ -1347,7 +1365,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 90, "metadata": {}, "outputs": [], "source": [ @@ -1363,21 +1381,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "✅ Answered Correctly 46.00%\n", - "❎ Skipped 0.00%\n", - "❌ Wrong Answer 7.00%\n", - "❌ Hallucination 45.00%\n", - "✅ I don't know 2.00%\n", + "✅ Answered Correctly 44.00%\n", + "❎ Skipped 5.00%\n", + "❌ Wrong Answer 4.00%\n", + "❌ Hallucination 25.00%\n", + "✅ I don't know 22.00%\n", "Name: count, dtype: object" ] }, - "execution_count": 73, + "execution_count": 91, "metadata": {}, "output_type": "execute_result" } @@ -1388,7 +1406,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 92, "metadata": {}, "outputs": [ { @@ -1425,7 +1443,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -1437,6 +1455,48 @@ "source": [ "plot_correctness(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], labels=[\"Base Model\", \"Fine-Tuned Model\", \"Few Shot Fine-Tuned Model with Qdrant\"])" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Results\n", + "\n", + "Category | Base | Fine-Tuned | Fine-Tuned with Qdrant |\n", + "| --- | --- | --- | --- |\n", + "| Correct | 44% | 32% | 44% |\n", + "| Skipped | 0% | 18% | 5% |\n", + "| Wrong | 9% | 3% | 4% |\n", + "| Hallucination | 47% | 7% | 25% |\n", + "| I don't know | 0% | 40% | 22% |" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Observations\n", + "\n", + "#### Compared to base model\n", + "1. The few shot fine-tuned with Qdrant model is as good as the base model at answering questions where the answer is present in the context. 44% of the questions are answered correctly in both cases.\n", + "2. The few shot fine-tuned with Qdrant model is better at saying \"I don't know\" when the answer is not present in the context. 22% of the questions are answered with \"I don't know\" vs 0% for the base model.\n", + "3. The few shot fine-tuned with Qdrant model is better at reducing hallucinations. 25% of the questions are answered with hallucinations vs 47% for the base model.\n", + "\n", + "\n", + "#### Compared to fine-tuned model\n", + "1. The few shot fine-tuned with Qdrant model gets more correct answers than the fine-tuned model: **44% of the questions are answered correctly vs 32%** for the fine-tuned model\n", + "2. The few shot fine-tuned with Qdrant model is better at deciding when to say \"I don't know\" when the answer is not present in the context. **22% of the questions are answered with \"I don't know\" vs 40%** for the fine-tuned model.\n", + "\n", + "Few Shot Fine-Tuning with Qdrant is a great way to control and steer the performance of your RAG system. \n", + "\n", + "Here, we made the model less conservative and more confident by using Qdrant to find similar questions. \n", + "\n", + "One can also use Qdrant to make the model more conservative. We did this by giving examples of questions where the answer is not present in the context. \n", + "This is biasing the model to say \"I don't know\" more often. \n", + "\n", + "Similarly, one can also use Qdrant to make the model more confident by giving examples of questions where the answer is present in the context. \n", + "This biases the model to give an answer more often. " + ] } ], "metadata": { From b4adbcc6aa98d6033d1378bbd53e7f1bcd1339df Mon Sep 17 00:00:00 2001 From: NirantK Date: Fri, 8 Sep 2023 20:19:42 +0530 Subject: [PATCH 33/38] * chore(ft_retrieval_augmented_generation.ipynb): update markdown heading for plotting the results --- examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb index a5a35c446d..87194ceb28 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb @@ -691,7 +691,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Plotting the Results" + "#### Plotting the Results" ] }, { From 1ae556e2534a9c7bf8828ca785907b2974e6e3d9 Mon Sep 17 00:00:00 2001 From: NirantK Date: Mon, 11 Sep 2023 18:16:50 +0530 Subject: [PATCH 34/38] Crisp story --- .../ft_retrieval_augmented_generation.ipynb | 1113 ++++++++--------- 1 file changed, 503 insertions(+), 610 deletions(-) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb index 87194ceb28..c4fa1269a2 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb @@ -35,6 +35,28 @@ "7. Comparison and Results" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Terms, Definitions, and References\n", + "\n", + "**Retrieval Augmented Generation (RAG)?**\n", + "The phrase Retrieval Augmented Generation (RAG) comes from a [recent paper](https://arxiv.org/abs/2005.11401) by Lewis et al. from Facebook AI. The idea is to use a pre-trained language model (LM) to generate text, but to use a separate retrieval system to find relevant documents to condition the LM on. \n", + "\n", + "**What is Qdrant?**\n", + "Qdrant is an open-source vector search engine that allows you to search for similar vectors in a large dataset. It is built in Rust and here we'll use the Python client to interact with it. This is the Retrieval part of RAG.\n", + "\n", + "**What is Few-Shot Learning?**\n", + "Few-shot learning is a type of machine learning where the model is \"improved\" via training or fine-tuning on a small amount of data. In this case, we'll use it to fine-tune the RAG model on a small number of examples from the SQuAD dataset. This is the Augmented part of RAG.\n", + "\n", + "**What is Zero-Shot Learning?**\n", + "Zero-shot learning is a type of machine learning where the model is \"improved\" via training or fine-tuning without any dataset specific information. \n", + "\n", + "**What is Fine-Tuning?**\n", + "Fine-tuning is a type of machine learning where the model is \"improved\" via training or fine-tuning on a small amount of data. In this case, we'll use it to fine-tune the RAG model on a small number of examples from the SQuAD dataset. The LLM is what makes the Generation part of RAG." + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -47,7 +69,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -76,6 +98,8 @@ "import numpy as np\n", "from sklearn.metrics import confusion_matrix\n", "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", "\n", "tqdm.pandas()" ] @@ -98,42 +122,13 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "--2023-09-08 16:40:51-- https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json\n", - "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.111.153, 185.199.108.153, 185.199.109.153, ...\n", - "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.111.153|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 42123633 (40M) [application/json]\n", - "Saving to: ‘local_cache/train.json’\n", - "\n", - "local_cache/train.j 100%[===================>] 40.17M 36.1MB/s in 1.1s \n", - "\n", - "2023-09-08 16:40:55 (36.1 MB/s) - ‘local_cache/train.json’ saved [42123633/42123633]\n", - "\n", - "--2023-09-08 16:40:55-- https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json\n", - "Resolving rajpurkar.github.io (rajpurkar.github.io)... 185.199.108.153, 185.199.109.153, 185.199.110.153, ...\n", - "Connecting to rajpurkar.github.io (rajpurkar.github.io)|185.199.108.153|:443... connected.\n", - "HTTP request sent, awaiting response... 200 OK\n", - "Length: 4370528 (4.2M) [application/json]\n", - "Saving to: ‘local_cache/dev.json’\n", - "\n", - "local_cache/dev.jso 100%[===================>] 4.17M 14.0MB/s in 0.3s \n", - "\n", - "2023-09-08 16:40:56 (14.0 MB/s) - ‘local_cache/dev.json’ saved [4370528/4370528]\n", - "\n" - ] - } - ], + "outputs": [], "source": [ - "!mkdir -p local_cache\n", - "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O local_cache/train.json\n", - "!wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O local_cache/dev.json" + "# !mkdir -p local_cache\n", + "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v2.0.json -O local_cache/train.json\n", + "# !wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v2.0.json -O local_cache/dev.json" ] }, { @@ -145,7 +140,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -174,6 +169,9 @@ " return df\n", "\n", "def get_diverse_sample(df, sample_size=100, random_state=42):\n", + " \"\"\"\n", + " Get a diverse sample of the dataframe by sampling from each title\n", + " \"\"\"\n", " sample_df = df.groupby(['title', 'is_impossible']).apply(lambda x: x.sample(min(len(x), max(1, sample_size // 50)), random_state=random_state)).reset_index(drop=True)\n", " \n", " if len(sample_df) < sample_size:\n", @@ -196,12 +194,18 @@ "source": [ "## 3. Answering using Base gpt-3.5-turbo-0613 model\n", "\n", - "### 3.1 Zero Shot Prompt" + "### 3.1 Zero Shot Prompt\n", + "\n", + "Let's start by using the base gpt-3.5-turbo-0613 model to answer the questions. This prompt is a simple concatenation of the question and context, with a separator token in between: `\\n\\n`. We've a simple instruction part of the prompt: \n", + "\n", + "> Answer the following Question based on the Context only. Only answer from the Context. If you don't know the answer, say 'I don't know'.\n", + "\n", + "Other prompts are possible, but this is a good starting point. We'll use this prompt to answer the questions in the validation set. " ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -223,12 +227,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 3.2 Answering using Zero Shot Prompt" + "### 3.2 Answering using Zero Shot Prompt\n", + "\n", + "Next, you'll need some re-usable functions which make an OpenAI API Call and return the answer. You'll use the `ChatCompletion.create` endpoint of the API, which takes a prompt and returns the completed text." ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -255,34 +261,198 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "⏰ **Time to run: ~3 min**" + "⏰ **Time to run: ~3 min**, 🛜 Needs Internet Connection" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 7, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 100/100 [01:28<00:00, 1.13it/s]\n" - ] - } - ], + "outputs": [], "source": [ "# Use progress_apply with tqdm for progress bar\n", - "df[\"generated_answer\"] = df.progress_apply(answer_question, axis=1)" + "df[\"generated_answer\"] = df.progress_apply(answer_question, axis=1)\n", + "df.to_json(\"local_cache/100_val.json\", orient=\"records\", lines=True)\n", + "df = pd.read_json(\"local_cache/100_val.json\", orient=\"records\", lines=True)" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
titlequestioncontextis_impossibleanswers
0Scottish_ParliamentWhat consequence of establishing the Scottish ...A procedural consequence of the establishment ...False[able to vote on domestic legislation that app...
1ImperialismImperialism is less often associated with whic...The principles of imperialism are often genera...True[]
2Economic_inequalityWhat issues can't prevent women from working o...When a person’s capabilities are lowered, they...True[]
3Southern_CaliforniaWhat county are Los Angeles, Orange, San Diego...Its counties of Los Angeles, Orange, San Diego...True[]
4French_and_Indian_WarWhen was the deportation of Canadians?Britain gained control of French Canada and Ac...True[]
..................
95GeologyIn the layered Earth model, what is the inner ...Seismologists can use the arrival times of sei...True[]
96Prime_numberWhat type of value would the Basel function ha...The zeta function is closely related to prime ...True[]
97Fresno,_CaliforniaWhat does the San Joaquin Valley Railroad cros...Passenger rail service is provided by Amtrak S...True[]
98Victoria_(Australia)What party rules in Melbourne's inner regions?The centre-left Australian Labor Party (ALP), ...False[The Greens, Australian Greens, Greens]
99Immune_systemThe speed of the killing response of the human...In humans, this response is activated by compl...False[signal amplification, signal amplification, s...
\n", + "

100 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " title question \\\n", + "0 Scottish_Parliament What consequence of establishing the Scottish ... \n", + "1 Imperialism Imperialism is less often associated with whic... \n", + "2 Economic_inequality What issues can't prevent women from working o... \n", + "3 Southern_California What county are Los Angeles, Orange, San Diego... \n", + "4 French_and_Indian_War When was the deportation of Canadians? \n", + ".. ... ... \n", + "95 Geology In the layered Earth model, what is the inner ... \n", + "96 Prime_number What type of value would the Basel function ha... \n", + "97 Fresno,_California What does the San Joaquin Valley Railroad cros... \n", + "98 Victoria_(Australia) What party rules in Melbourne's inner regions? \n", + "99 Immune_system The speed of the killing response of the human... \n", + "\n", + " context is_impossible \\\n", + "0 A procedural consequence of the establishment ... False \n", + "1 The principles of imperialism are often genera... True \n", + "2 When a person’s capabilities are lowered, they... True \n", + "3 Its counties of Los Angeles, Orange, San Diego... True \n", + "4 Britain gained control of French Canada and Ac... True \n", + ".. ... ... \n", + "95 Seismologists can use the arrival times of sei... True \n", + "96 The zeta function is closely related to prime ... True \n", + "97 Passenger rail service is provided by Amtrak S... True \n", + "98 The centre-left Australian Labor Party (ALP), ... False \n", + "99 In humans, this response is activated by compl... False \n", + "\n", + " answers \n", + "0 [able to vote on domestic legislation that app... \n", + "1 [] \n", + "2 [] \n", + "3 [] \n", + "4 [] \n", + ".. ... \n", + "95 [] \n", + "96 [] \n", + "97 [] \n", + "98 [The Greens, Australian Greens, Greens] \n", + "99 [signal amplification, signal amplification, s... \n", + "\n", + "[100 rows x 5 columns]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df.to_json(\"local_cache/100_val.json\", orient=\"records\", lines=True)" + "df" ] }, { @@ -295,22 +465,16 @@ "\n", "### 4.1 Prepare the Fine-Tuning Data\n", "\n", - "We need to prepare the data for fine-tuning. We'll use a few samples from train split of same dataset as before, but we'll add the answer to the context. This will help the model learn to retrieve the answer from the context." + "We need to prepare the data for fine-tuning. We'll use a few samples from train split of same dataset as before, but we'll add the answer to the context. This will help the model learn to retrieve the answer from the context. \n", + "\n", + "Our instruction prompt is the same as before, and so is the system prompt. " ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 100/100 [00:00<00:00, 65659.11it/s]\n" - ] - } - ], + "outputs": [], "source": [ "def dataframe_to_jsonl(df):\n", " def create_jsonl_entry(row):\n", @@ -328,7 +492,7 @@ " ]\n", " return json.dumps({\"messages\": messages})\n", "\n", - " jsonl_output = df.progress_apply(create_jsonl_entry, axis=1)\n", + " jsonl_output = df.apply(create_jsonl_entry, axis=1)\n", " return \"\\n\".join(jsonl_output)\n", "\n", "train_sample = get_diverse_sample(train_df, sample_size=100, random_state=42)\n", @@ -352,11 +516,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "class OpenAIFineTuner:\n", + " \"\"\"\n", + " Class to fine tune OpenAI models\n", + " \"\"\"\n", " def __init__(self, training_file_path, model_name, suffix):\n", " self.training_file_path = training_file_path\n", " self.model_name = model_name\n", @@ -412,54 +579,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "⏰ **Time to run: ~10-20 minutes**" + "⏰ **Time to run: ~10-20 minutes**, 🛜 Needs Internet Connection" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 11, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "File Status: uploaded\n", - "File Status: processed\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: running\n", - "Job Status: succeeded\n" - ] - }, - { - "data": { - "text/plain": [ - "'ft:gpt-3.5-turbo-0613:qdrant:100trn20230907:7wU85Xwb'" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "model_id = fine_tuner.fine_tune_model()\n", "model_id" @@ -469,25 +596,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### 4.2.1 Try out the Fine-Tuned Model" + "#### 4.2.1 Try out the Fine-Tuned Model\n", + "\n", + "Let's try out the fine-tuned model on the same validation set as before. You'll use the same prompt as before, but you will use the fine-tuned model instead of the base model. Before you do that, you can make a simple call to get a sense of how the fine-tuned model is doing." ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 12, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"role\": \"assistant\",\n", - " \"content\": \"I don't know\"\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "completion = openai.ChatCompletion.create(\n", " model=model_id,\n", @@ -509,29 +627,18 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4.3 Answer Using the Fine-Tuned Model" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "⏰ **Time to run: ~5 min**" + "### 4.3 Answer Using the Fine-Tuned Model\n", + "\n", + "This is the same as before, but you'll use the fine-tuned model instead of the base model.\n", + "\n", + "⏰ **Time to run: ~5 min**, 🛜 Needs Internet Connection" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 100/100 [05:09<00:00, 3.10s/it]\n" - ] - } - ], + "outputs": [], "source": [ "df[\"ft_generated_answer\"] = df.progress_apply(answer_question, model=model_id, axis=1)" ] @@ -559,163 +666,168 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 193, "metadata": {}, "outputs": [], "source": [ - "class ConfusionMatrixEvaluator:\n", - " def __init__(self, df, answers_column=\"generated_answer\"):\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "\n", + "class Evaluator:\n", + " def __init__(self, df):\n", " self.df = df\n", - " self.y_pred = []\n", - " self.labels = [\n", - " \"✅ Answered Correctly\",\n", - " \"❎ Skipped\",\n", - " \"❌ Wrong Answer\", \n", - " \"❌ Hallucination\", \n", - " \"✅ I don't know\"\n", - " ]\n", - " self.answers_column = answers_column\n", + " self.y_pred = pd.Series() # Initialize as empty Series\n", + " self.labels_answer_expected = [\"✅ Answered Correctly\", \"❎ Skipped\", \"❌ Wrong Answer\"]\n", + " self.labels_idk_expected = [\"❌ Hallucination\", \"✅ I don't know\"]\n", "\n", - " def _evaluate_single_row(self, row):\n", - " is_impossible = row[\"is_impossible\"]\n", - " generated_answer = row[self.answers_column].lower()\n", + " def _evaluate_answer_expected(self, row, answers_column):\n", + " generated_answer = row[answers_column].lower()\n", " actual_answers = [ans.lower() for ans in row[\"answers\"]]\n", - "\n", - " y_pred = (\n", - " \"✅ Answered Correctly\"\n", - " if not is_impossible\n", - " and any(ans in generated_answer for ans in actual_answers)\n", - " else \"❎ Skipped\"\n", - " if not is_impossible and generated_answer == \"i don't know\"\n", + " return (\n", + " \"✅ Answered Correctly\" if any(ans in generated_answer for ans in actual_answers)\n", + " else \"❎ Skipped\" if generated_answer == \"i don't know\"\n", " else \"❌ Wrong Answer\"\n", - " if not is_impossible and generated_answer not in actual_answers\n", - " else \"❌ Hallucination\"\n", - " if is_impossible and generated_answer != \"i don't know\"\n", - " else \"✅ I don't know\"\n", " )\n", - " return y_pred\n", "\n", - " def evaluate_answers(self):\n", - " self.y_pred = self.df.apply(self._evaluate_single_row, axis=1)\n", - "\n", - " def generate_matrices(self, use_percentages=False):\n", - " # Using value_counts to create a Series of frequencies, then reindexing to include missing labels with count 0\n", - " freq_series = self.y_pred.value_counts().reindex(self.labels, fill_value=0)\n", - " if use_percentages:\n", - " total = freq_series.sum()\n", - " freq_series = (freq_series / total * 100).apply(\"{0:.2f}%\".format)\n", - " return freq_series\n", + " def _evaluate_idk_expected(self, row, answers_column):\n", + " generated_answer = row[answers_column].lower()\n", + " return (\n", + " \"❌ Hallucination\" if generated_answer != \"i don't know\"\n", + " else \"✅ I don't know\"\n", + " )\n", "\n", + " def _evaluate_single_row(self, row, answers_column):\n", + " is_impossible = row[\"is_impossible\"]\n", + " return (\n", + " self._evaluate_answer_expected(row, answers_column) if not is_impossible\n", + " else self._evaluate_idk_expected(row, answers_column)\n", + " )\n", "\n", - "def evaluate_model(df, answers_column):\n", - " \"\"\"\n", - " Evaluate the confusion matrix for a given DataFrame and answer column.\n", - " \"\"\"\n", - " evaluator = ConfusionMatrixEvaluator(df, answers_column=answers_column)\n", - " evaluator.evaluate_answers()\n", - " error_categories = evaluator.generate_matrices(use_percentages=True)\n", - " return error_categories" + " def evaluate_model(self, answers_column=\"generated_answer\"):\n", + " self.y_pred = pd.Series(self.df.apply(self._evaluate_single_row, answers_column=answers_column, axis=1))\n", + " freq_series = self.y_pred.value_counts()\n", + " \n", + " # Counting rows for each scenario\n", + " total_answer_expected = len(self.df[self.df['is_impossible'] == False])\n", + " total_idk_expected = len(self.df[self.df['is_impossible'] == True])\n", + " \n", + " freq_answer_expected = (freq_series / total_answer_expected * 100).round(2).reindex(self.labels_answer_expected, fill_value=0)\n", + " freq_idk_expected = (freq_series / total_idk_expected * 100).round(2).reindex(self.labels_idk_expected, fill_value=0)\n", + " return freq_answer_expected.to_dict(), freq_idk_expected.to_dict()\n", + "\n", + " def print_eval(self):\n", + " answer_columns=[\"generated_answer\", \"ft_generated_answer\"]\n", + " baseline_correctness, baseline_idk = self.evaluate_model()\n", + " ft_correctness, ft_idk = self.evaluate_model(self.df, answer_columns[1])\n", + " print(\"When the model should answer correctly:\")\n", + " eval_df = pd.merge(\n", + " baseline_correctness.rename(\"Baseline\"),\n", + " ft_correctness.rename(\"Fine-Tuned\"),\n", + " left_index=True,\n", + " right_index=True,\n", + " )\n", + " print(eval_df)\n", + " print(\"\\n\\n\\nWhen the model should say 'I don't know':\")\n", + " eval_df = pd.merge(\n", + " baseline_idk.rename(\"Baseline\"),\n", + " ft_idk.rename(\"Fine-Tuned\"),\n", + " left_index=True,\n", + " right_index=True,\n", + " )\n", + " print(eval_df)\n", + " \n", + " def plot_model_comparison(self, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], scenario=\"answer_expected\", nice_names=[\"Baseline\", \"Fine-Tuned\"]):\n", + " \n", + " results = []\n", + " for col in answer_columns:\n", + " answer_expected, idk_expected = self.evaluate_model(col)\n", + " if scenario == \"answer_expected\":\n", + " results.append(answer_expected)\n", + " elif scenario == \"idk_expected\":\n", + " results.append(idk_expected)\n", + " else:\n", + " raise ValueError(\"Invalid scenario\")\n", + " \n", + " \n", + " results_df = pd.DataFrame(results, index=nice_names)\n", + " if scenario == \"answer_expected\":\n", + " results_df = results_df.reindex(self.labels_answer_expected, axis=1)\n", + " elif scenario == \"idk_expected\":\n", + " results_df = results_df.reindex(self.labels_idk_expected, axis=1)\n", + " \n", + " melted_df = results_df.reset_index().melt(id_vars='index', var_name='Status', value_name='Frequency')\n", + " sns.set_theme(style=\"whitegrid\", palette=\"icefire\")\n", + " g = sns.catplot(data=melted_df, x='Frequency', y='index', hue='Status', kind='bar', height=5, aspect=2)\n", + "\n", + " # Annotating each bar\n", + " for p in g.ax.patches:\n", + " g.ax.annotate(f\"{p.get_width():.0f}%\", (p.get_width()+5, p.get_y() + p.get_height() / 2),\n", + " textcoords=\"offset points\",\n", + " xytext=(0, 0),\n", + " ha='center', va='center')\n", + " plt.ylabel(\"Model\")\n", + " plt.xlabel(\"Percentage\")\n", + " plt.xlim(0, 100)\n", + " plt.tight_layout()\n", + " plt.title(scenario.replace(\"_\", \" \").title())\n", + " plt.show()\n", + "\n", + "\n", + "# Compare the results by merging into one dataframe\n", + "evaluator = Evaluator(df)\n", + "# evaluator.evaluate_model(answers_column=\"ft_generated_answer\")\n", + "# evaluator.plot_model_comparison([\"generated_answer\", \"ft_generated_answer\"], scenario=\"answer_expected\", nice_names=[\"Baseline\", \"Fine-Tuned\"])" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 98, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "✅ Answered Correctly 44.00%\n", - "❎ Skipped 0.00%\n", - "❌ Wrong Answer 9.00%\n", - "❌ Hallucination 47.00%\n", - "✅ I don't know 0.00%\n", - "Name: count, dtype: object" - ] - }, - "execution_count": 31, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "evaluate_model(df, \"generated_answer\")" + "# Optionally, save the results to a JSON file\n", + "df.to_json(\"local_cache/100_val_ft.json\", orient=\"records\", lines=True)\n", + "df = pd.read_json(\"local_cache/100_val_ft.json\", orient=\"records\", lines=True)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 194, "metadata": {}, "outputs": [ { "data": { + "image/png": "", "text/plain": [ - "✅ Answered Correctly 32.00%\n", - "❎ Skipped 18.00%\n", - "❌ Wrong Answer 3.00%\n", - "❌ Hallucination 7.00%\n", - "✅ I don't know 40.00%\n", - "Name: count, dtype: object" + "
" ] }, - "execution_count": 32, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "evaluate_model(df, \"ft_generated_answer\")" + "evaluator.plot_model_comparison([\"generated_answer\", \"ft_generated_answer\"], scenario=\"answer_expected\", nice_names=[\"Baseline\", \"Fine-Tuned\"])" ] }, { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "# Optionally, save the results to a JSON file\n", - "df.to_json(\"local_cache/100_val_ft.json\", orient=\"records\", lines=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_json(\"local_cache/100_val_ft.json\", orient=\"records\", lines=True)" - ] - }, - { - "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ - "#### Plotting the Results" + "Notice that the fine-tuned model skips questions more often -- and makes fewer misakes. This is because the fine-tuned model is more conservative and skips questions when it's not sure." ] }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 195, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n" - ] - }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -723,139 +835,24 @@ } ], "source": [ - "def plot_overall_error(df, answer_columns, labels):\n", - " error_categories = [\"❌ Hallucination\", \"❌ Wrong Answer\"]\n", - " \n", - " # Prepare data for Seaborn\n", - " data = []\n", - " for i, col in enumerate(answer_columns):\n", - " results = evaluate_model(df, col)\n", - " matrix_error = sum([float(results.loc[cat].replace(\"%\", \"\")) for cat in error_categories])\n", - " data.append([labels[i], matrix_error])\n", - "\n", - " df_plot = pd.DataFrame(data, columns=[\"Model\", \"Overall Error\"])\n", - " \n", - " # Create the plot\n", - " plt.figure(figsize=(10, 6))\n", - " ax = sns.barplot(x=\"Overall Error\", y=\"Model\", data=df_plot, palette=\"icefire\")\n", - " \n", - " # Add annotations\n", - " for i, p in enumerate(ax.patches):\n", - " ax.text(p.get_width() - 6, p.get_y() + p.get_height() / 2, f\"{p.get_width():.0f}%\", \n", - " va='center', color='white', fontweight='bold')\n", - "\n", - " plt.xlabel(\"Error (%) - Lower is Better\")\n", - " plt.title(\"Total Error Comparison\")\n", - " plt.show()\n", - "\n", - "# Plot only the overall error\n", - "plot_overall_error(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], labels=[\"Base gpt-3.5-turbo-0613 Model\", \"Fine-Tuned Model\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: The figure layout has changed to tight\n", - " self._figure.tight_layout(*args, **kwargs)\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 9989 (\\N{WHITE HEAVY CHECK MARK}) missing from current font.\n", - " fig.canvas.draw()\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 10062 (\\N{NEGATIVE SQUARED CROSS MARK}) missing from current font.\n", - " fig.canvas.draw()\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 10060 (\\N{CROSS MARK}) missing from current font.\n", - " fig.canvas.draw()\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 9989 (\\N{WHITE HEAVY CHECK MARK}) missing from current font.\n", - " fig.canvas.print_figure(bytes_io, **kw)\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 10062 (\\N{NEGATIVE SQUARED CROSS MARK}) missing from current font.\n", - " fig.canvas.print_figure(bytes_io, **kw)\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 10060 (\\N{CROSS MARK}) missing from current font.\n", - " fig.canvas.print_figure(bytes_io, **kw)\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "def plot_correctness(df, answer_columns, labels):\n", - " error_categories = [\n", - " \"✅ Answered Correctly\",\n", - " \"❎ Skipped\",\n", - " \"❌ Wrong Answer\", \n", - " \"❌ Hallucination\", \n", - " \"✅ I don't know\"\n", - " ]\n", - " annotations = [\n", - " \"✅ Answered Correctly\",\n", - " \"❎ Skipped\",\n", - " \"❌ Wrong Answer\", \n", - " \"❌ Hallucination\", \n", - " \"✅ I don't know\"\n", - " ]\n", - " cat_map = dict(zip(error_categories, annotations)) \n", - " # Prepare data for Seaborn\n", - " data = []\n", - " for i, col in enumerate(answer_columns):\n", - " results = evaluate_model(df, col)\n", - " for category in error_categories:\n", - " matrix_error = float(results.loc[category].replace(\"%\", \"\"))\n", - " data.append([labels[i], cat_map[category], matrix_error])\n", - "\n", - " df_plot = pd.DataFrame(data, columns=[\"Model\", \"Response\", \"Percentage\"])\n", - " \n", - " # Create the plot\n", - " g = sns.catplot(x=\"Percentage\", y=\"Model\", hue=\"Response\", data=df_plot, kind=\"bar\", height=6, aspect=1.5, palette=\"icefire\")\n", - " \n", - " ax = g.facet_axis(0, 0)\n", - " \n", - " # Add annotations\n", - " for i, p in enumerate(ax.patches):\n", - " ax.annotate(f\"{p.get_width():.0f}%\", (p.get_width(), p.get_y() + p.get_height() / 2),\n", - " ha=\"left\", va=\"center\")\n", - " plt.xlabel(\"Percentage (%)\")\n", - " plt.title(\"Answer Distribution\")\n", - " plt.show()\n", - "\n", - "plot_correctness(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\"], labels=[\"Base Model\", \"Fine-Tuned Model\"])" + "evaluator.plot_model_comparison([\"generated_answer\", \"ft_generated_answer\"], scenario=\"idk_expected\", nice_names=[\"Baseline\", \"Fine-Tuned\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ + "Notice that the fine-tuned model has learnt to say \"I don't know\" a lot better than the prompt. Or, the model has gotten good at skipping questions.\n", + "\n", "### Observations\n", "\n", "1. The fine-tuned model is better at saying \"I don't know\"\n", - "2. Hallucinations drop from 47% to 7% with fine-tuning\n", - "3. Wrong answers drop from 9% to 3% with fine-tuning\n", + "2. Hallucinations drop from 100% to 0% with fine-tuning\n", + "3. Wrong answers drop from 17% to 6% with fine-tuning\n", "\n", - "**Correct answers also drop from 44% to 32% with fine-tuning** - this is because the fine-tuned model is **more conservative** and says \"I don't know\" more often. This is a good thing because it's better to say \"I don't know\" than to give a wrong answer.\n", + "**Correct answers also drop from 83% to 60% with fine-tuning** - this is because the fine-tuned model is **more conservative** and says \"I don't know\" more often. This is a good thing because it's better to say \"I don't know\" than to give a wrong answer.\n", "\n", - "That said, we want to improve the correctness of the model, even if that increases the hallucinations. We'll use Qdrant and Few-Shot Learning to achieve this." + "That said, we want to improve the correctness of the model, even if that increases the hallucinations. We're looking for a model that is both correct and conservative, striking a balance between the two. We'll use Qdrant and Few-Shot Learning to achieve this." ] }, { @@ -863,11 +860,21 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Section B: Few Shot Learning\n", + "**💪 You're 2/3rds of the way there! Keep reading!**\n", "\n", + "# Section B: Few Shot Learning\n", "\n", "We'll select a few examples from the dataset, including cases where the answer is not present in the context. We'll then use these examples to create a prompt that we can use to fine-tune the model. We'll then measure the performance of the fine-tuned model.\n", "\n", + "**What is next?**\n", + "\n", + "5. Fine-Tuning OpenAI Model with Qdrant\n", + " 5.1 Embed the Fine-Tuning Data\n", + " 5.2 Embedding the Questions\n", + "6. Using Qdrant to Improve RAG Prompt\n", + "7. Comparison and Results\n", + "\n", + "\n", "## 5. Fine-Tuning OpenAI Model with Qdrant\n", "\n", "So far, we've been using the OpenAI model to answer questions without using examples of the answer. The previous step made it work better on in-context examples, while this one helps it generalize to unseen data, and attempt to learn when to say \"I don't know\" and when to give an answer.\n", @@ -889,16 +896,7 @@ }, { "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install fastembed" - ] - }, - { - "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -918,7 +916,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -928,7 +926,7 @@ "\n", "collection_name = \"squadv2-cookbook\"\n", "\n", - "# # Create the collection\n", + "# # Create the collection, run this only once\n", "# qdrant_client.recreate_collection(\n", "# collection_name=collection_name,\n", "# vectors_config=VectorParams(size=384, distance=Distance.COSINE),\n", @@ -937,17 +935,9 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|██████████| 77.7M/77.7M [00:07<00:00, 10.9MiB/s]\n" - ] - } - ], + "outputs": [], "source": [ "from fastembed.embedding import DefaultEmbedding\n", "from typing import List\n", @@ -964,66 +954,76 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 5.3 Embedding the Questions\n", + "### 5.2 Embedding the Questions\n", + "\n", + "Next, you'll embed the entire training set questions. You'll use the question to question similarity to find the most similar questions to the question we're looking for. This is a workflow which is used in RAG to leverage the OpenAI model ability of incontext learning with more examples. This is what we call Few Shot Learning here.\n", + "\n", + "**❗️⏰ Important Note: This step can take upto 3 hours to complete. Please be patient. If you see Out of Memory errors or Kernel Crashes, please reduce the batch size to 32, restart the kernel and run the notebook again. This code needs to be run only ONCE.**\n", + "\n", + "## Function Breakdown for `generate_points_from_dataframe`\n", "\n", - "We embed the entire training set questions. We'll use the question to question similarity to find the most similar questions to the question we're looking for. This is a workflow which is used in RAG to leverage the OpenAI model ability of incontext learning with more examples. This is what we call Few Shot Learning here.\n", + "1. **Initialization**: `batch_size = 512` and `total_batches` set the stage for how many questions will be processed in one go. This is to prevent memory issues. If your machine can handle more, feel free to increase the batch size. If your kernel crashes, reduce the batch size to 32 and try again.\n", + "2. **Progress Bar**: `tqdm` gives you a nice progress bar so you don't fall asleep.\n", + "3. **Batch Loop**: The for-loop iterates through batches. `start_idx` and `end_idx` define the slice of the DataFrame to process.\n", + "4. **Generate Embeddings**: `batch_embeddings = embedding_model.embed(batch, batch_size=batch_size)` - This is where the magic happens. Your questions get turned into embeddings.\n", + "5. **PointStruct Generation**: Using `.progress_apply`, it turns each row into a `PointStruct` object. This includes an ID, the embedding vector, and other metadata.\n", "\n", - "**❗️⏰ Important Note: This step can take upto 3 hours to complete. Please be patient. If you see Out of Memory errors or Kernel Crashes, please reduce the batch size to 32, restart the kernel and run the notebook again. This code needs to be run only ONCE.**" + "Returns the list of `PointStruct` objects, which can be used to create a collection in Qdrant." ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# def generate_points_from_dataframe(df: pd.DataFrame) -> List[PointStruct]:\n", - "# batch_size = 512\n", - "# questions = df[\"question\"].tolist()\n", - "# total_batches = len(questions) // batch_size + 1\n", + "def generate_points_from_dataframe(df: pd.DataFrame) -> List[PointStruct]:\n", + " batch_size = 512\n", + " questions = df[\"question\"].tolist()\n", + " total_batches = len(questions) // batch_size + 1\n", " \n", - "# pbar = tqdm(total=len(questions), desc=\"Generating embeddings\")\n", + " pbar = tqdm(total=len(questions), desc=\"Generating embeddings\")\n", " \n", - "# # Generate embeddings in batches to improve performance\n", - "# embeddings = []\n", - "# for i in range(total_batches):\n", - "# start_idx = i * batch_size\n", - "# end_idx = min((i + 1) * batch_size, len(questions))\n", - "# batch = questions[start_idx:end_idx]\n", + " # Generate embeddings in batches to improve performance\n", + " embeddings = []\n", + " for i in range(total_batches):\n", + " start_idx = i * batch_size\n", + " end_idx = min((i + 1) * batch_size, len(questions))\n", + " batch = questions[start_idx:end_idx]\n", " \n", - "# batch_embeddings = embedding_model.embed(batch, batch_size=batch_size)\n", - "# embeddings.extend(batch_embeddings)\n", - "# pbar.update(len(batch))\n", + " batch_embeddings = embedding_model.embed(batch, batch_size=batch_size)\n", + " embeddings.extend(batch_embeddings)\n", + " pbar.update(len(batch))\n", " \n", - "# pbar.close()\n", + " pbar.close()\n", " \n", - "# # Convert embeddings to list of lists\n", - "# embeddings_list = [embedding.tolist() for embedding in embeddings]\n", + " # Convert embeddings to list of lists\n", + " embeddings_list = [embedding.tolist() for embedding in embeddings]\n", " \n", - "# # Create a temporary DataFrame to hold the embeddings and existing DataFrame columns\n", - "# temp_df = df.copy()\n", - "# temp_df[\"embeddings\"] = embeddings_list\n", - "# temp_df[\"id\"] = temp_df.index\n", + " # Create a temporary DataFrame to hold the embeddings and existing DataFrame columns\n", + " temp_df = df.copy()\n", + " temp_df[\"embeddings\"] = embeddings_list\n", + " temp_df[\"id\"] = temp_df.index\n", " \n", - "# # Generate PointStruct objects using DataFrame apply method\n", - "# points = temp_df.progress_apply(\n", - "# lambda row: PointStruct(\n", - "# id=row[\"id\"],\n", - "# vector=row[\"embeddings\"],\n", - "# payload={\n", - "# \"question\": row[\"question\"],\n", - "# \"title\": row[\"title\"],\n", - "# \"context\": row[\"context\"],\n", - "# \"is_impossible\": row[\"is_impossible\"],\n", - "# \"answers\": row[\"answers\"],\n", - "# },\n", - "# ),\n", - "# axis=1,\n", - "# ).tolist()\n", - "\n", - "# return points\n", - "\n", - "# points = generate_points_from_dataframe(train_df)" + " # Generate PointStruct objects using DataFrame apply method\n", + " points = temp_df.progress_apply(\n", + " lambda row: PointStruct(\n", + " id=row[\"id\"],\n", + " vector=row[\"embeddings\"],\n", + " payload={\n", + " \"question\": row[\"question\"],\n", + " \"title\": row[\"title\"],\n", + " \"context\": row[\"context\"],\n", + " \"is_impossible\": row[\"is_impossible\"],\n", + " \"answers\": row[\"answers\"],\n", + " },\n", + " ),\n", + " axis=1,\n", + " ).tolist()\n", + "\n", + " return points\n", + "\n", + "points = generate_points_from_dataframe(train_df)" ] }, { @@ -1037,14 +1037,14 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "# operation_info = qdrant_client.upsert(\n", - "# collection_name=collection_name, wait=True, points=points\n", - "# )\n", - "# print(operation_info)" + "operation_info = qdrant_client.upsert(\n", + " collection_name=collection_name, wait=True, points=points\n", + ")\n", + "print(operation_info)" ] }, { @@ -1066,7 +1066,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -1154,24 +1154,9 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "2d5cd9516e6e44a2b4088c444813d631", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/100 [00:00" ] }, - "execution_count": 91, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "evaluate_model(df, \"ft_generated_answer_few_shot\")" + "evaluator = Evaluator(df)\n", + "evaluator.plot_model_comparison([\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], scenario=\"answer_expected\", nice_names=[\"Baseline\", \"Fine-Tuned\", \"Fine-Tuned with Few-Shot\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This is quite amazing -- we're able to get the best of both worlds! We're able to get the model to be both correct and conservative: \n", + "\n", + "1. The model is correct 83% of the time -- this is the same as the base model\n", + "2. The model gives the wrong answer only 8% of the time -- down from 17% with the base model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, let's look at the hallucinations. We want to reduce the hallucinations, but not at the cost of correctness. We want to strike a balance between the two. We've struck a good balance here:\n", + "\n", + "1. The model hallucinates 53% of the time -- down from 100% with the base model\n", + "2. The model says \"I don't know\" 47% of the time -- up from NEVER with the base model" ] }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 202, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", - " if pd.api.types.is_categorical_dtype(vector):\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/axisgrid.py:118: UserWarning: The figure layout has changed to tight\n", - " self._figure.tight_layout(*args, **kwargs)\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 9989 (\\N{WHITE HEAVY CHECK MARK}) missing from current font.\n", - " fig.canvas.draw()\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 10062 (\\N{NEGATIVE SQUARED CROSS MARK}) missing from current font.\n", - " fig.canvas.draw()\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/seaborn/utils.py:80: UserWarning: Glyph 10060 (\\N{CROSS MARK}) missing from current font.\n", - " fig.canvas.draw()\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 9989 (\\N{WHITE HEAVY CHECK MARK}) missing from current font.\n", - " fig.canvas.print_figure(bytes_io, **kw)\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 10062 (\\N{NEGATIVE SQUARED CROSS MARK}) missing from current font.\n", - " fig.canvas.print_figure(bytes_io, **kw)\n", - "/opt/homebrew/Caskroom/miniconda/base/envs/fst/lib/python3.9/site-packages/IPython/core/pylabtools.py:152: UserWarning: Glyph 10060 (\\N{CROSS MARK}) missing from current font.\n", - " fig.canvas.print_figure(bytes_io, **kw)\n" - ] - }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ - "
" + "
" ] }, "metadata": {}, @@ -1453,14 +1344,16 @@ } ], "source": [ - "plot_correctness(df, answer_columns=[\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], labels=[\"Base Model\", \"Fine-Tuned Model\", \"Few Shot Fine-Tuned Model with Qdrant\"])" + "evaluator.plot_model_comparison([\"generated_answer\", \"ft_generated_answer\", \"ft_generated_answer_few_shot\"], scenario=\"idk_expected\", nice_names=[\"Baseline\", \"Fine-Tuned\", \"Fine-Tuned with Few-Shot\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Results\n", + "### Results Breakdown\n", + "\n", + "So far, we've looked at the results for each scenario separately, i.e. each scenario summed to 100. Let's look at the results as an aggregate to get a broader sense of how the model is performing:\n", "\n", "Category | Base | Fine-Tuned | Fine-Tuned with Qdrant |\n", "| --- | --- | --- | --- |\n", @@ -1478,14 +1371,14 @@ "### Observations\n", "\n", "#### Compared to base model\n", - "1. The few shot fine-tuned with Qdrant model is as good as the base model at answering questions where the answer is present in the context. 44% of the questions are answered correctly in both cases.\n", - "2. The few shot fine-tuned with Qdrant model is better at saying \"I don't know\" when the answer is not present in the context. 22% of the questions are answered with \"I don't know\" vs 0% for the base model.\n", - "3. The few shot fine-tuned with Qdrant model is better at reducing hallucinations. 25% of the questions are answered with hallucinations vs 47% for the base model.\n", + "1. The few shot fine-tuned with Qdrant model is as good as the base model at answering questions where the answer is present in the context. \n", + "2. The few shot fine-tuned with Qdrant model is better at saying \"I don't know\" when the answer is not present in the context.\n", + "3. The few shot fine-tuned with Qdrant model is better at reducing hallucinations.\n", "\n", "\n", "#### Compared to fine-tuned model\n", - "1. The few shot fine-tuned with Qdrant model gets more correct answers than the fine-tuned model: **44% of the questions are answered correctly vs 32%** for the fine-tuned model\n", - "2. The few shot fine-tuned with Qdrant model is better at deciding when to say \"I don't know\" when the answer is not present in the context. **22% of the questions are answered with \"I don't know\" vs 40%** for the fine-tuned model.\n", + "1. The few shot fine-tuned with Qdrant model gets more correct answers than the fine-tuned model: **83% of the questions are answered correctly vs 60%** for the fine-tuned model\n", + "2. The few shot fine-tuned with Qdrant model is better at deciding when to say \"I don't know\" when the answer is not present in the context. **34% skip rate for the plain fine-tuning mode, vs 9% for the few shot fine-tuned with Qdrant model**\n", "\n", "Few Shot Fine-Tuning with Qdrant is a great way to control and steer the performance of your RAG system. \n", "\n", From 1fdd15a4de359808c2365a324968ddf4962ee7e1 Mon Sep 17 00:00:00 2001 From: NirantK Date: Tue, 12 Sep 2023 13:10:55 +0530 Subject: [PATCH 35/38] * chore(ft_retrieval_augmented_generation.ipynb): update markdown content * docs(ft_retrieval_augmented_generation.ipynb): add instructions and insights to the results breakdown --- .../ft_retrieval_augmented_generation.ipynb | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb index c4fa1269a2..eb9fbdd80a 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb @@ -1351,11 +1351,20 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "Few Shot Fine-Tuning with Qdrant is a great way to control and steer the performance of your RAG system. Here, we made the model less conservative compared to zero shot and more confident by using Qdrant to find similar questions. \n", + "\n", + "You can also use Qdrant to make the model more conservative. We did this by giving examples of questions where the answer is not present in the context. \n", + "This is biasing the model to say \"I don't know\" more often. \n", + "\n", + "Similarly, one can also use Qdrant to make the model more confident by giving examples of questions where the answer is present in the context. This biases the model to give an answer more often. The trade-off is that the model will also hallucinate more often.\n", + "\n", + "You can make this trade off by adjusting the training data: distribution of questions and examples, as well as the kind and number of examples you retrieve from Qdrant.\n", + "\n", "### Results Breakdown\n", "\n", "So far, we've looked at the results for each scenario separately, i.e. each scenario summed to 100. Let's look at the results as an aggregate to get a broader sense of how the model is performing:\n", "\n", - "Category | Base | Fine-Tuned | Fine-Tuned with Qdrant |\n", + "| Category | Base | Fine-Tuned | Fine-Tuned with Qdrant |\n", "| --- | --- | --- | --- |\n", "| Correct | 44% | 32% | 44% |\n", "| Skipped | 0% | 18% | 5% |\n", @@ -1380,15 +1389,12 @@ "1. The few shot fine-tuned with Qdrant model gets more correct answers than the fine-tuned model: **83% of the questions are answered correctly vs 60%** for the fine-tuned model\n", "2. The few shot fine-tuned with Qdrant model is better at deciding when to say \"I don't know\" when the answer is not present in the context. **34% skip rate for the plain fine-tuning mode, vs 9% for the few shot fine-tuned with Qdrant model**\n", "\n", - "Few Shot Fine-Tuning with Qdrant is a great way to control and steer the performance of your RAG system. \n", - "\n", - "Here, we made the model less conservative and more confident by using Qdrant to find similar questions. \n", "\n", - "One can also use Qdrant to make the model more conservative. We did this by giving examples of questions where the answer is not present in the context. \n", - "This is biasing the model to say \"I don't know\" more often. \n", + "Now, you should be able to:\n", "\n", - "Similarly, one can also use Qdrant to make the model more confident by giving examples of questions where the answer is present in the context. \n", - "This biases the model to give an answer more often. " + "1. Notice the trade-offs between number of correct answers and hallucinations -- and how training dataset choice influences that!\n", + "2. Fine-tune OpenAI models for specific use-cases and use Qdrant to improve the performance of your RAG model\n", + "3. Get started on how to evaluate the performance of your RAG model" ] } ], From 12f800cd5d7b211a7e04fe57bdd0ca0b4edb49a7 Mon Sep 17 00:00:00 2001 From: NirantK Date: Tue, 12 Sep 2023 13:25:18 +0530 Subject: [PATCH 36/38] * chore(ft_retrieval_augmented_generation.ipynb): reorganize sections and update section numbering * feat(ft_retrieval_augmented_generation.ipynb): add new section for evaluation * fix(ft_retrieval_augmented_generation.ipynb): fix section numbering and update section --- .../ft_retrieval_augmented_generation.ipynb | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb index eb9fbdd80a..be80855d58 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb @@ -30,9 +30,14 @@ "5. **Evaluation**: How well does the model perform?\n", "\n", "### Section B: Few-Shot Learning\n", - "5. Using Qdrant to Improve RAG Prompt\n", - "6. Fine-Tuning OpenAI Model with Qdrant\n", - "7. Comparison and Results" + "\n", + "6. Using Qdrant to Improve RAG Prompt\n", + "7. Fine-Tuning OpenAI Model with Qdrant\n", + "8. Evaluation\n", + "\n", + "9. **Conclusion**\n", + " - Aggregate Results\n", + " - Observations" ] }, { @@ -868,14 +873,14 @@ "\n", "**What is next?**\n", "\n", - "5. Fine-Tuning OpenAI Model with Qdrant\n", - " 5.1 Embed the Fine-Tuning Data\n", - " 5.2 Embedding the Questions\n", - "6. Using Qdrant to Improve RAG Prompt\n", - "7. Comparison and Results\n", + "6. Fine-Tuning OpenAI Model with Qdrant\n", + " 6.1 Embed the Fine-Tuning Data\n", + " 6.2 Embedding the Questions\n", + "7. Using Qdrant to Improve RAG Prompt\n", + "8. \n", "\n", "\n", - "## 5. Fine-Tuning OpenAI Model with Qdrant\n", + "## 6. Fine-Tuning OpenAI Model with Qdrant\n", "\n", "So far, we've been using the OpenAI model to answer questions without using examples of the answer. The previous step made it work better on in-context examples, while this one helps it generalize to unseen data, and attempt to learn when to say \"I don't know\" and when to give an answer.\n", "\n", @@ -1194,6 +1199,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "### 7.2 Fine-Tune the Model\n", + "\n", "⏰ **Time to run: ~15-30 minutes**" ] }, @@ -1279,9 +1286,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Conclusion\n", - "\n", - "In this notebook, we've demonstrated how to fine-tune OpenAI models for specific use-cases. We've also demonstrated how to use Qdrant and Few-Shot Learning to improve the performance of the model.\n", + "## 8. Evaluation\n", "\n", "But how well does the model perform? Let's compare the results from the 3 different models we've looked at so far:" ] @@ -1360,7 +1365,11 @@ "\n", "You can make this trade off by adjusting the training data: distribution of questions and examples, as well as the kind and number of examples you retrieve from Qdrant.\n", "\n", - "### Results Breakdown\n", + "## 9. Conclusion\n", + "\n", + "In this notebook, we've demonstrated how to fine-tune OpenAI models for specific use-cases. We've also demonstrated how to use Qdrant and Few-Shot Learning to improve the performance of the model.\n", + "\n", + "### Aggregate Results\n", "\n", "So far, we've looked at the results for each scenario separately, i.e. each scenario summed to 100. Let's look at the results as an aggregate to get a broader sense of how the model is performing:\n", "\n", From 49a01468a11fee3011efba9218ab9fc28ba9a56e Mon Sep 17 00:00:00 2001 From: NirantK Date: Tue, 12 Sep 2023 13:34:05 +0530 Subject: [PATCH 37/38] * chore(examples): rename ft_retrieval_augmented_generation.ipynb to ft_retrieval_augmented_generation_qdrant.ipynb --- ...ation.ipynb => ft_retrieval_augmented_generation_qdrant.ipynb} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/fine-tuned_qa/{ft_retrieval_augmented_generation.ipynb => ft_retrieval_augmented_generation_qdrant.ipynb} (100%) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb similarity index 100% rename from examples/fine-tuned_qa/ft_retrieval_augmented_generation.ipynb rename to examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb From 21fa0d9399b5e52143de30d4e69c26b88ac5dbbe Mon Sep 17 00:00:00 2001 From: NirantK Date: Tue, 12 Sep 2023 13:39:09 +0530 Subject: [PATCH 38/38] * chore(ft_retrieval_augmented_generation_qdrant.ipynb): fix pip install command * feat(ft_retrieval_augmented_generation_qdrant.ipynb): add cell to set OpenAI and Qdrant keys --- ...etrieval_augmented_generation_qdrant.ipynb | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb index be80855d58..25b911bc03 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb @@ -78,7 +78,7 @@ "metadata": {}, "outputs": [], "source": [ - "# !pip install pandas openai tqdm tenacity scikit-learn tiktoken python-dotenv seaborn --upgrade --quiet" + "!pip install pandas openai tqdm tenacity scikit-learn tiktoken python-dotenv seaborn --upgrade --quiet" ] }, { @@ -109,6 +109,25 @@ "tqdm.pandas()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set your keys\n", + "Get your OpenAI keys [here](https://platform.openai.com/account/api-keys) and Qdrant keys after making a free cluster [here](https://cloud.qdrant.io/login)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "openai.api_key = \"sk-xxx\"\n", + "os.environ[\"QDRANT_URL\"] = \"https://xxx.cloud.qdrant.io:6333\"\n", + "os.environ[\"QDRANT_API_KEY\"] = \"xxx\"" + ] + }, { "attachments": {}, "cell_type": "markdown",