From 004e0593906c343c9f54e5aa2ea3e23f310ceb27 Mon Sep 17 00:00:00 2001
From: Josh XT <josh@devxt.com>
Date: Tue, 10 Sep 2024 07:05:09 -0400
Subject: [PATCH 1/3] Rewrite Azure OpenAI Provider

---
 agixt/providers/azure.py           | 114 ++++++++++++++++++-----------
 docs/3-Providers/2-Azure OpenAI.md |  16 ++--
 2 files changed, 82 insertions(+), 48 deletions(-)

diff --git a/agixt/providers/azure.py b/agixt/providers/azure.py
index 47277f3a833e..ce9a8b672a45 100644
--- a/agixt/providers/azure.py
+++ b/agixt/providers/azure.py
@@ -1,13 +1,5 @@
 from time import time
-
-try:
-    import openai
-except ImportError:
-    import sys
-    import subprocess
-
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
-    import openai
+from openai import AzureOpenAI
 import logging
 
 
@@ -15,47 +7,87 @@ class AzureProvider:
     def __init__(
         self,
         AZURE_API_KEY: str = "",
-        AZURE_OPENAI_ENDPOINT: str = "",
-        DEPLOYMENT_ID: str = "",
-        AZURE_EMBEDDER_DEPLOYMENT_ID: str = "",
-        AI_MODEL: str = "gpt-35-turbo",
+        AZURE_OPENAI_ENDPOINT: str = "https://your-endpoint.openai.azure.com",
+        AZURE_DEPLOYMENT_NAME: str = "gpt-4o",
         AI_TEMPERATURE: float = 0.7,
         AI_TOP_P: float = 0.7,
-        MAX_TOKENS: int = 4096,
+        MAX_TOKENS: int = 120000,
+        WAIT_BETWEEN_REQUESTS: int = 1,
+        WAIT_AFTER_FAILURE: int = 3,
         **kwargs,
     ):
-        openai.api_type = "azure"
-        openai.base_url = AZURE_OPENAI_ENDPOINT
-        openai.api_version = "2023-05-15"
-        openai.api_key = AZURE_API_KEY
         self.requirements = ["openai"]
-        self.DEPLOYMENT_ID = DEPLOYMENT_ID
         self.AZURE_API_KEY = AZURE_API_KEY
-        self.AI_MODEL = AI_MODEL if AI_MODEL else "gpt-35-turbo"
+        self.AZURE_OPENAI_ENDPOINT = AZURE_OPENAI_ENDPOINT
+        self.AI_MODEL = AZURE_DEPLOYMENT_NAME
         self.AI_TEMPERATURE = AI_TEMPERATURE if AI_TEMPERATURE else 0.7
         self.AI_TOP_P = AI_TOP_P if AI_TOP_P else 0.7
-        self.MAX_TOKENS = MAX_TOKENS if MAX_TOKENS else 4096
-        self.AZURE_EMBEDDER_DEPLOYMENT_ID = AZURE_EMBEDDER_DEPLOYMENT_ID
+        self.MAX_TOKENS = MAX_TOKENS if MAX_TOKENS else 120000
+        self.WAIT_AFTER_FAILURE = WAIT_AFTER_FAILURE if WAIT_AFTER_FAILURE else 3
+        self.WAIT_BETWEEN_REQUESTS = (
+            WAIT_BETWEEN_REQUESTS if WAIT_BETWEEN_REQUESTS else 1
+        )
 
     @staticmethod
     def services():
-        return ["llm"]
-
-    async def inference(self, prompt: str, tokens: int = 0, images: list = []) -> str:
-        num_retries = 3
-        messages = [{"role": "system", "content": prompt}]
-        for _ in range(num_retries):
-            try:
-                resp = openai.chat.completions.create(
-                    engine=self.AI_MODEL,
-                    messages=messages,
-                    max_tokens=int(self.MAX_TOKENS),
-                    temperature=float(self.AI_TEMPERATURE),
-                    top_p=float(self.AI_TOP_P),
-                )
-                return resp.choices[0].message.content
+        return ["llm", "vision"]
 
-            except:
-                logging.info("Rate limit exceeded. Retrying after 20 seconds.")
-                time.sleep(20)
-                continue
+    async def inference(self, prompt, tokens: int = 0, images: list = []):
+        if not self.AZURE_OPENAI_ENDPOINT.endswith("/"):
+            self.AZURE_OPENAI_ENDPOINT += "/"
+        openai = AzureOpenAI(
+            api_key=self.AZURE_API_KEY,
+            api_version="2024-02-01",
+            azure_endpoint=self.AZURE_OPENAI_ENDPOINT,
+        )
+        if self.AZURE_API_KEY == "" or self.AZURE_API_KEY == "YOUR_API_KEY":
+            if self.AZURE_OPENAI_ENDPOINT == "https://your-endpoint.openai.azure.com":
+                return "Please go to the Agent Management page to set your Azure OpenAI API key."
+        messages = []
+        if len(images) > 0:
+            messages.append(
+                {"role": "user", "content": [{"type": "text", "text": prompt}]}
+            )
+            for image in images:
+                if image.startswith("http"):
+                    messages[0]["content"].append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": image,
+                            },
+                        }
+                    )
+                else:
+                    file_type = image.split(".")[-1]
+                    with open(image, "rb") as f:
+                        image_base64 = f.read()
+                    messages[0]["content"].append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/{file_type};base64,{image_base64}"
+                            },
+                        }
+                    )
+        else:
+            messages.append({"role": "user", "content": prompt})
+        if int(self.WAIT_BETWEEN_REQUESTS) > 0:
+            time.sleep(int(self.WAIT_BETWEEN_REQUESTS))
+        try:
+            response = openai.chat.completions.create(
+                model=self.AI_MODEL,
+                messages=messages,
+                temperature=float(self.AI_TEMPERATURE),
+                max_tokens=4096,
+                top_p=float(self.AI_TOP_P),
+                n=1,
+                stream=False,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            logging.warning(f"Azure OpenAI API Error: {e}")
+            if int(self.WAIT_AFTER_FAILURE) > 0:
+                time.sleep(int(self.WAIT_AFTER_FAILURE))
+                return await self.inference(prompt=prompt, tokens=tokens)
+            return str(response)
diff --git a/docs/3-Providers/2-Azure OpenAI.md b/docs/3-Providers/2-Azure OpenAI.md
index 321bdbca7c45..0a3485b83f04 100644
--- a/docs/3-Providers/2-Azure OpenAI.md	
+++ b/docs/3-Providers/2-Azure OpenAI.md	
@@ -1,17 +1,19 @@
 # Azure OpenAI
+
 - [Azure OpenAI](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models)
 - [AGiXT](https://github.com/Josh-XT/AGiXT)
 
 ⚠️ **Please note that using some AI providers, such as OpenAI's API, can be expensive. Monitor your usage carefully to avoid incurring unexpected costs. We're NOT responsible for your usage under any circumstance.**
 
 ## Quick Start Guide
+
+- [Instructions for setting up Azure OpenAI .](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal)
+
 ### Update your agent settings
+
 1. Set `AI_PROVIDER` to `azure`.
 2. Set `AZURE_API_KEY` to your Azure OpenAI API key.
-3. Set `DEPLOYMENT_ID` to your Azure OpenAI deployment ID for your primary model.
-4. Set `EMBEDDER_DEPLOYMENT_ID` to your Azure OpenAI deployment ID for your embedder model.
-5. Set `AZURE_OPENAI_ENDPOINT` to your Azure OpenAI endpoint.
-6. Choose your `AI_MODEL`.  Enter `gpt-3.5-turbo`, `gpt-4`, `gpt-4-32k`, or any other model you may have access to.
-7. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-8. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.  The maximum for `gpt-3.5-turbo` is 4096, `gpt-4` is 8192, `gpt-4-32k` is 32768, `gpt-3.5-turbo-16k` is 16000.
-
+3. Set `AZURE_OPENAI_ENDPOINT` to your Azure OpenAI endpoint.
+4. Set `AZURE_DEPLOYMENT_NAME` to your Azure OpenAI deployment ID for your primary model.
+5. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
+6. Set `MAX_TOKENS` to the maximum number of input tokens. `gpt-4o` allows up to `120,000` input tokens.

From ae88410bb552c3517df7cce9ee77eb190db9893d Mon Sep 17 00:00:00 2001
From: Josh XT <josh@devxt.com>
Date: Tue, 10 Sep 2024 07:23:49 -0400
Subject: [PATCH 2/3] handle potential failures to prevent infinite loop

---
 agixt/providers/azure.py  | 5 +++++
 agixt/providers/openai.py | 4 ++++
 2 files changed, 9 insertions(+)

diff --git a/agixt/providers/azure.py b/agixt/providers/azure.py
index ce9a8b672a45..8e803ca14eeb 100644
--- a/agixt/providers/azure.py
+++ b/agixt/providers/azure.py
@@ -27,6 +27,7 @@ def __init__(
         self.WAIT_BETWEEN_REQUESTS = (
             WAIT_BETWEEN_REQUESTS if WAIT_BETWEEN_REQUESTS else 1
         )
+        self.failures = 0
 
     @staticmethod
     def services():
@@ -39,6 +40,7 @@ async def inference(self, prompt, tokens: int = 0, images: list = []):
             api_key=self.AZURE_API_KEY,
             api_version="2024-02-01",
             azure_endpoint=self.AZURE_OPENAI_ENDPOINT,
+            azure_deployment=self.AI_MODEL,
         )
         if self.AZURE_API_KEY == "" or self.AZURE_API_KEY == "YOUR_API_KEY":
             if self.AZURE_OPENAI_ENDPOINT == "https://your-endpoint.openai.azure.com":
@@ -87,6 +89,9 @@ async def inference(self, prompt, tokens: int = 0, images: list = []):
             return response.choices[0].message.content
         except Exception as e:
             logging.warning(f"Azure OpenAI API Error: {e}")
+            self.failures += 1
+            if self.failures > 3:
+                return "Azure OpenAI API Error: Too many failures."
             if int(self.WAIT_AFTER_FAILURE) > 0:
                 time.sleep(int(self.WAIT_AFTER_FAILURE))
                 return await self.inference(prompt=prompt, tokens=tokens)
diff --git a/agixt/providers/openai.py b/agixt/providers/openai.py
index 2973f05de7eb..e6d138e86eb3 100644
--- a/agixt/providers/openai.py
+++ b/agixt/providers/openai.py
@@ -50,6 +50,7 @@ def __init__(
             TRANSCRIPTION_MODEL if TRANSCRIPTION_MODEL else "whisper-1"
         )
         self.FAILURES = []
+        self.failures = 0
         try:
             self.embedder = OpenAIEmbeddingFunction(
                 model_name="text-embedding-3-small",
@@ -142,6 +143,9 @@ async def inference(self, prompt, tokens: int = 0, images: list = []):
             return response.choices[0].message.content
         except Exception as e:
             logging.info(f"OpenAI API Error: {e}")
+            self.failures += 1
+            if self.failures > 3:
+                return "OpenAI API Error: Too many failures."
             if "," in self.API_URI:
                 self.rotate_uri()
             if int(self.WAIT_AFTER_FAILURE) > 0:

From 231b810db976a8f3187ebd915f66d013833308ff Mon Sep 17 00:00:00 2001
From: Josh XT <josh@devxt.com>
Date: Tue, 10 Sep 2024 07:28:13 -0400
Subject: [PATCH 3/3] Update all docs for providers

---
 docs/3-Providers/0-ezLocalai.md        | 2 +-
 docs/3-Providers/1-Anthropic Claude.md | 7 +++++--
 docs/3-Providers/2-Azure OpenAI.md     | 4 ++--
 docs/3-Providers/3-Google.md           | 1 +
 docs/3-Providers/4-GPT4Free.md         | 2 +-
 docs/3-Providers/5-Hugging Face.md     | 2 +-
 docs/3-Providers/6-OpenAI.md           | 6 ++++--
 7 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/docs/3-Providers/0-ezLocalai.md b/docs/3-Providers/0-ezLocalai.md
index 0fc741e76a84..496573d78336 100644
--- a/docs/3-Providers/0-ezLocalai.md
+++ b/docs/3-Providers/0-ezLocalai.md
@@ -20,7 +20,7 @@ Follow the instructions for setting up ezLocalai at <https://github.com/DevXT-LL
 2. Set `EZLOCALAI_API_KEY` to your API key that you set up with ezLocalai.
 3. Set `EZLOCALAI_API_URL` to the URL that you set up with ezLocalai. The default is `http://YOUR LOCAL IP:8091`.
 4. Set `AI_MODEL` to whichever model you are running with `ezlocalai`.
-5. Set `MAX_TOKENS` to the maximum number of tokens you want the model to generate.
+5. Set `MAX_TOKENS` to the maximum number of input tokens.
 6. Set `AI_TEMPERATURE` to the temperature you want to use for generation. This is a float value between 0 and 1. The default is `1.33`.
 7. Set `AI_TOP_P` to the top_p value you want to use for generation. This is a float value between 0 and 1. The default is `0.95`.
 8. Set `VOICE` to the voice you want to use for the generated audio. The default is `DukeNukem`. You can add cloning TTS voices to `ezlocalai` by putting any ~10 second wav file in the `voices` directory of the `ezlocalai` repository and then setting the `VOICE` variable to the name of the file without the `.wav` extension.
diff --git a/docs/3-Providers/1-Anthropic Claude.md b/docs/3-Providers/1-Anthropic Claude.md
index f580a164ac43..1fae18d44a2d 100644
--- a/docs/3-Providers/1-Anthropic Claude.md	
+++ b/docs/3-Providers/1-Anthropic Claude.md	
@@ -5,8 +5,11 @@
 
 ## Quick Start Guide
 
+- Get your Anthroic API key at <https://console.anthropic.com/settings/keys>.
+
 ### Update your agent settings
 
 1. Set `AI_PROVIDER` to `claude`.
-2. Set `ANTHROPIC_API_KEY` to your API key. Get your Anthroic API key at <https://console.anthropic.com/settings/keys>
-3. Set `AI_MODEL` to `claude-3-opus-20240229`. List of models available at <https://docs.anthropic.com/claude/docs/models-overview> .
+2. Set `ANTHROPIC_API_KEY` to your API key.
+3. Set `AI_MODEL` to `claude-3-5-sonnet-20240620`. List of models available at <https://docs.anthropic.com/claude/docs/models-overview> .
+4. Set `MAX_TOKENS` to the maximum number of input tokens. Default is `200000` for Claude.
diff --git a/docs/3-Providers/2-Azure OpenAI.md b/docs/3-Providers/2-Azure OpenAI.md
index 0a3485b83f04..1ea188ceafc8 100644
--- a/docs/3-Providers/2-Azure OpenAI.md	
+++ b/docs/3-Providers/2-Azure OpenAI.md	
@@ -7,7 +7,7 @@
 
 ## Quick Start Guide
 
-- [Instructions for setting up Azure OpenAI .](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal)
+- Instructions for setting up an Azure OpenAI Deployment can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal).
 
 ### Update your agent settings
 
@@ -16,4 +16,4 @@
 3. Set `AZURE_OPENAI_ENDPOINT` to your Azure OpenAI endpoint.
 4. Set `AZURE_DEPLOYMENT_NAME` to your Azure OpenAI deployment ID for your primary model.
 5. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-6. Set `MAX_TOKENS` to the maximum number of input tokens. `gpt-4o` allows up to `120,000` input tokens.
+6. Set `MAX_TOKENS` to the maximum number of input tokens. `gpt-4o` allows up to `120000` input tokens.
diff --git a/docs/3-Providers/3-Google.md b/docs/3-Providers/3-Google.md
index cdfb294df8c1..e4c35dc829fc 100644
--- a/docs/3-Providers/3-Google.md
+++ b/docs/3-Providers/3-Google.md
@@ -10,3 +10,4 @@
 1. Set `AI_PROVIDER` to `google`.
 2. Set `GOOGLE_API_KEY` to your Google API key.
 3. Set your `AI_MODEL` to `gemini-1.0-pro` or whichever version you want to use.
+4. Set `MAX_TOKENS` to the maximum number of input tokens.
diff --git a/docs/3-Providers/4-GPT4Free.md b/docs/3-Providers/4-GPT4Free.md
index f256224ec2e4..b559997e2d66 100644
--- a/docs/3-Providers/4-GPT4Free.md
+++ b/docs/3-Providers/4-GPT4Free.md
@@ -14,4 +14,4 @@ We do not know where your data goes when you use GPT4Free. We do not know if it
 1. Set `AI_PROVIDER` to `gpt4free`
 2. Set `AI_MODEL` to `gpt-4` or `gpt-3.5-turbo`.
 3. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-4. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.
+4. Set `MAX_TOKENS` to the maximum number of input tokens.
diff --git a/docs/3-Providers/5-Hugging Face.md b/docs/3-Providers/5-Hugging Face.md
index 9ffb60c8fd7e..37d7a160ecaa 100644
--- a/docs/3-Providers/5-Hugging Face.md	
+++ b/docs/3-Providers/5-Hugging Face.md	
@@ -13,4 +13,4 @@ _Note: AI_MODEL should stay `default` unless there is a folder in `prompts` spec
 2. Set `MODEL_PATH` to the path of your llama.cpp model (for docker containers `models/` is mapped to `/model`)
 3. Set `AI_MODEL` to `default` or the name of the model from the `prompts` folder.
 4. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-5. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.
+5. Set `MAX_TOKENS` to the maximum number of input tokens.
diff --git a/docs/3-Providers/6-OpenAI.md b/docs/3-Providers/6-OpenAI.md
index 62fa764722ee..d4e817bc2a44 100644
--- a/docs/3-Providers/6-OpenAI.md
+++ b/docs/3-Providers/6-OpenAI.md
@@ -7,10 +7,12 @@
 
 ## Quick Start Guide
 
+- Instructions for setting up an OpenAI API key can be found [here](https://platform.openai.com/docs/quickstart).
+
 ### Update your agent settings
 
 1. Set `AI_PROVIDER` to `openai`.
 2. Set `OPENAI_API_KEY` to your OpenAI API key.
-3. Set `AI_MODEL` to `gpt-3.5-turbo` for ChatGPT.
+3. Set `AI_MODEL` to `gpt-4o` or your preferred OpenAI model.
 4. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-5. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.  The maximum for `gpt-3.5-turbo` is 4000, `gpt-4` is 8000, `gpt-3.5-turbo-16k` is 16000.
+5. Set `MAX_TOKENS` to the maximum number of input tokens. `gpt-4o` allows up to `120000` input tokens.