Josh-XT · Josh-XT · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024 · Sep 10, 2024
diff --git a/agixt/providers/azure.py b/agixt/providers/azure.py
@@ -1,61 +1,98 @@
 from time import time
-
-try:
-    import openai
-except ImportError:
-    import sys
-    import subprocess
-
-    subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
-    import openai
+from openai import AzureOpenAI
 import logging
 
 
 class AzureProvider:
     def __init__(
         self,
         AZURE_API_KEY: str = "",
-        AZURE_OPENAI_ENDPOINT: str = "",
-        DEPLOYMENT_ID: str = "",
-        AZURE_EMBEDDER_DEPLOYMENT_ID: str = "",
-        AI_MODEL: str = "gpt-35-turbo",
+        AZURE_OPENAI_ENDPOINT: str = "https://your-endpoint.openai.azure.com",
+        AZURE_DEPLOYMENT_NAME: str = "gpt-4o",
         AI_TEMPERATURE: float = 0.7,
         AI_TOP_P: float = 0.7,
-        MAX_TOKENS: int = 4096,
+        MAX_TOKENS: int = 120000,
+        WAIT_BETWEEN_REQUESTS: int = 1,
+        WAIT_AFTER_FAILURE: int = 3,
         **kwargs,
     ):
-        openai.api_type = "azure"
-        openai.base_url = AZURE_OPENAI_ENDPOINT
-        openai.api_version = "2023-05-15"
-        openai.api_key = AZURE_API_KEY
         self.requirements = ["openai"]
-        self.DEPLOYMENT_ID = DEPLOYMENT_ID
         self.AZURE_API_KEY = AZURE_API_KEY
-        self.AI_MODEL = AI_MODEL if AI_MODEL else "gpt-35-turbo"
+        self.AZURE_OPENAI_ENDPOINT = AZURE_OPENAI_ENDPOINT
+        self.AI_MODEL = AZURE_DEPLOYMENT_NAME
         self.AI_TEMPERATURE = AI_TEMPERATURE if AI_TEMPERATURE else 0.7
         self.AI_TOP_P = AI_TOP_P if AI_TOP_P else 0.7
-        self.MAX_TOKENS = MAX_TOKENS if MAX_TOKENS else 4096
-        self.AZURE_EMBEDDER_DEPLOYMENT_ID = AZURE_EMBEDDER_DEPLOYMENT_ID
+        self.MAX_TOKENS = MAX_TOKENS if MAX_TOKENS else 120000
+        self.WAIT_AFTER_FAILURE = WAIT_AFTER_FAILURE if WAIT_AFTER_FAILURE else 3
+        self.WAIT_BETWEEN_REQUESTS = (
+            WAIT_BETWEEN_REQUESTS if WAIT_BETWEEN_REQUESTS else 1
+        )
+        self.failures = 0
 
     @staticmethod
     def services():
-        return ["llm"]
-
-    async def inference(self, prompt: str, tokens: int = 0, images: list = []) -> str:
-        num_retries = 3
-        messages = [{"role": "system", "content": prompt}]
-        for _ in range(num_retries):
-            try:
-                resp = openai.chat.completions.create(
-                    engine=self.AI_MODEL,
-                    messages=messages,
-                    max_tokens=int(self.MAX_TOKENS),
-                    temperature=float(self.AI_TEMPERATURE),
-                    top_p=float(self.AI_TOP_P),
-                )
-                return resp.choices[0].message.content
+        return ["llm", "vision"]
 
-            except:
-                logging.info("Rate limit exceeded. Retrying after 20 seconds.")
-                time.sleep(20)
-                continue
+    async def inference(self, prompt, tokens: int = 0, images: list = []):
+        if not self.AZURE_OPENAI_ENDPOINT.endswith("/"):
+            self.AZURE_OPENAI_ENDPOINT += "/"
+        openai = AzureOpenAI(
+            api_key=self.AZURE_API_KEY,
+            api_version="2024-02-01",
+            azure_endpoint=self.AZURE_OPENAI_ENDPOINT,
+            azure_deployment=self.AI_MODEL,
+        )
+        if self.AZURE_API_KEY == "" or self.AZURE_API_KEY == "YOUR_API_KEY":
+            if self.AZURE_OPENAI_ENDPOINT == "https://your-endpoint.openai.azure.com":
+                return "Please go to the Agent Management page to set your Azure OpenAI API key."
+        messages = []
+        if len(images) > 0:
+            messages.append(
+                {"role": "user", "content": [{"type": "text", "text": prompt}]}
+            )
+            for image in images:
+                if image.startswith("http"):
+                    messages[0]["content"].append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": image,
+                            },
+                        }
+                    )
+                else:
+                    file_type = image.split(".")[-1]
+                    with open(image, "rb") as f:
+                        image_base64 = f.read()
+                    messages[0]["content"].append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/{file_type};base64,{image_base64}"
+                            },
+                        }
+                    )
+        else:
+            messages.append({"role": "user", "content": prompt})
+        if int(self.WAIT_BETWEEN_REQUESTS) > 0:
+            time.sleep(int(self.WAIT_BETWEEN_REQUESTS))
+        try:
+            response = openai.chat.completions.create(
+                model=self.AI_MODEL,
+                messages=messages,
+                temperature=float(self.AI_TEMPERATURE),
+                max_tokens=4096,
+                top_p=float(self.AI_TOP_P),
+                n=1,
+                stream=False,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            logging.warning(f"Azure OpenAI API Error: {e}")
+            self.failures += 1
+            if self.failures > 3:
+                return "Azure OpenAI API Error: Too many failures."
+            if int(self.WAIT_AFTER_FAILURE) > 0:
+                time.sleep(int(self.WAIT_AFTER_FAILURE))
+                return await self.inference(prompt=prompt, tokens=tokens)
+            return str(response)
diff --git a/agixt/providers/openai.py b/agixt/providers/openai.py
@@ -50,6 +50,7 @@ def __init__(
             TRANSCRIPTION_MODEL if TRANSCRIPTION_MODEL else "whisper-1"
         )
         self.FAILURES = []
+        self.failures = 0
         try:
             self.embedder = OpenAIEmbeddingFunction(
                 model_name="text-embedding-3-small",
@@ -142,6 +143,9 @@ async def inference(self, prompt, tokens: int = 0, images: list = []):
             return response.choices[0].message.content
         except Exception as e:
             logging.info(f"OpenAI API Error: {e}")
+            self.failures += 1
+            if self.failures > 3:
+                return "OpenAI API Error: Too many failures."
             if "," in self.API_URI:
                 self.rotate_uri()
             if int(self.WAIT_AFTER_FAILURE) > 0:

diff --git a/docs/3-Providers/0-ezLocalai.md b/docs/3-Providers/0-ezLocalai.md
@@ -20,7 +20,7 @@ Follow the instructions for setting up ezLocalai at <https://github.com/DevXT-LL
 2. Set `EZLOCALAI_API_KEY` to your API key that you set up with ezLocalai.
 3. Set `EZLOCALAI_API_URL` to the URL that you set up with ezLocalai. The default is `http://YOUR LOCAL IP:8091`.
 4. Set `AI_MODEL` to whichever model you are running with `ezlocalai`.
-5. Set `MAX_TOKENS` to the maximum number of tokens you want the model to generate.
+5. Set `MAX_TOKENS` to the maximum number of input tokens.
 6. Set `AI_TEMPERATURE` to the temperature you want to use for generation. This is a float value between 0 and 1. The default is `1.33`.
 7. Set `AI_TOP_P` to the top_p value you want to use for generation. This is a float value between 0 and 1. The default is `0.95`.
 8. Set `VOICE` to the voice you want to use for the generated audio. The default is `DukeNukem`. You can add cloning TTS voices to `ezlocalai` by putting any ~10 second wav file in the `voices` directory of the `ezlocalai` repository and then setting the `VOICE` variable to the name of the file without the `.wav` extension.
diff --git a/docs/3-Providers/1-Anthropic Claude.md b/docs/3-Providers/1-Anthropic Claude.md
@@ -5,8 +5,11 @@
 
 ## Quick Start Guide
 
+- Get your Anthroic API key at <https://console.anthropic.com/settings/keys>.
+
 ### Update your agent settings
 
 1. Set `AI_PROVIDER` to `claude`.
-2. Set `ANTHROPIC_API_KEY` to your API key. Get your Anthroic API key at <https://console.anthropic.com/settings/keys>
-3. Set `AI_MODEL` to `claude-3-opus-20240229`. List of models available at <https://docs.anthropic.com/claude/docs/models-overview> .
+2. Set `ANTHROPIC_API_KEY` to your API key.
+3. Set `AI_MODEL` to `claude-3-5-sonnet-20240620`. List of models available at <https://docs.anthropic.com/claude/docs/models-overview> .
+4. Set `MAX_TOKENS` to the maximum number of input tokens. Default is `200000` for Claude.
diff --git a/docs/3-Providers/2-Azure OpenAI.md b/docs/3-Providers/2-Azure OpenAI.md
@@ -1,17 +1,19 @@
 # Azure OpenAI
+
 - [Azure OpenAI](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models)
 - [AGiXT](https://github.com/Josh-XT/AGiXT)
 
 ⚠️ **Please note that using some AI providers, such as OpenAI's API, can be expensive. Monitor your usage carefully to avoid incurring unexpected costs. We're NOT responsible for your usage under any circumstance.**
 
 ## Quick Start Guide
+
+- Instructions for setting up an Azure OpenAI Deployment can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal).
+
 ### Update your agent settings
+
 1. Set `AI_PROVIDER` to `azure`.
 2. Set `AZURE_API_KEY` to your Azure OpenAI API key.
-3. Set `DEPLOYMENT_ID` to your Azure OpenAI deployment ID for your primary model.
-4. Set `EMBEDDER_DEPLOYMENT_ID` to your Azure OpenAI deployment ID for your embedder model.
-5. Set `AZURE_OPENAI_ENDPOINT` to your Azure OpenAI endpoint.
-6. Choose your `AI_MODEL`.  Enter `gpt-3.5-turbo`, `gpt-4`, `gpt-4-32k`, or any other model you may have access to.
-7. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-8. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.  The maximum for `gpt-3.5-turbo` is 4096, `gpt-4` is 8192, `gpt-4-32k` is 32768, `gpt-3.5-turbo-16k` is 16000.
-
+3. Set `AZURE_OPENAI_ENDPOINT` to your Azure OpenAI endpoint.
+4. Set `AZURE_DEPLOYMENT_NAME` to your Azure OpenAI deployment ID for your primary model.
+5. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
+6. Set `MAX_TOKENS` to the maximum number of input tokens. `gpt-4o` allows up to `120000` input tokens.
diff --git a/docs/3-Providers/3-Google.md b/docs/3-Providers/3-Google.md
@@ -10,3 +10,4 @@
 1. Set `AI_PROVIDER` to `google`.
 2. Set `GOOGLE_API_KEY` to your Google API key.
 3. Set your `AI_MODEL` to `gemini-1.0-pro` or whichever version you want to use.
+4. Set `MAX_TOKENS` to the maximum number of input tokens.
diff --git a/docs/3-Providers/4-GPT4Free.md b/docs/3-Providers/4-GPT4Free.md
@@ -14,4 +14,4 @@ We do not know where your data goes when you use GPT4Free. We do not know if it
 1. Set `AI_PROVIDER` to `gpt4free`
 2. Set `AI_MODEL` to `gpt-4` or `gpt-3.5-turbo`.
 3. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-4. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.
+4. Set `MAX_TOKENS` to the maximum number of input tokens.
diff --git a/docs/3-Providers/5-Hugging Face.md b/docs/3-Providers/5-Hugging Face.md
@@ -13,4 +13,4 @@ _Note: AI_MODEL should stay `default` unless there is a folder in `prompts` spec
 2. Set `MODEL_PATH` to the path of your llama.cpp model (for docker containers `models/` is mapped to `/model`)
 3. Set `AI_MODEL` to `default` or the name of the model from the `prompts` folder.
 4. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-5. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.
+5. Set `MAX_TOKENS` to the maximum number of input tokens.
diff --git a/docs/3-Providers/6-OpenAI.md b/docs/3-Providers/6-OpenAI.md
@@ -7,10 +7,12 @@
 
 ## Quick Start Guide
 
+- Instructions for setting up an OpenAI API key can be found [here](https://platform.openai.com/docs/quickstart).
+
 ### Update your agent settings
 
 1. Set `AI_PROVIDER` to `openai`.
 2. Set `OPENAI_API_KEY` to your OpenAI API key.
-3. Set `AI_MODEL` to `gpt-3.5-turbo` for ChatGPT.
+3. Set `AI_MODEL` to `gpt-4o` or your preferred OpenAI model.
 4. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
-5. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.  The maximum for `gpt-3.5-turbo` is 4000, `gpt-4` is 8000, `gpt-3.5-turbo-16k` is 16000.
+5. Set `MAX_TOKENS` to the maximum number of input tokens. `gpt-4o` allows up to `120000` input tokens.