Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite Azure OpenAI Provider #1244

Merged
merged 3 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 78 additions & 41 deletions agixt/providers/azure.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,98 @@
from time import time

try:
import openai
except ImportError:
import sys
import subprocess

subprocess.check_call([sys.executable, "-m", "pip", "install", "openai"])
import openai
from openai import AzureOpenAI
import logging


class AzureProvider:
def __init__(
self,
AZURE_API_KEY: str = "",
AZURE_OPENAI_ENDPOINT: str = "",
DEPLOYMENT_ID: str = "",
AZURE_EMBEDDER_DEPLOYMENT_ID: str = "",
AI_MODEL: str = "gpt-35-turbo",
AZURE_OPENAI_ENDPOINT: str = "https://your-endpoint.openai.azure.com",
AZURE_DEPLOYMENT_NAME: str = "gpt-4o",
AI_TEMPERATURE: float = 0.7,
AI_TOP_P: float = 0.7,
MAX_TOKENS: int = 4096,
MAX_TOKENS: int = 120000,
WAIT_BETWEEN_REQUESTS: int = 1,
WAIT_AFTER_FAILURE: int = 3,
**kwargs,
):
openai.api_type = "azure"
openai.base_url = AZURE_OPENAI_ENDPOINT
openai.api_version = "2023-05-15"
openai.api_key = AZURE_API_KEY
self.requirements = ["openai"]
self.DEPLOYMENT_ID = DEPLOYMENT_ID
self.AZURE_API_KEY = AZURE_API_KEY
self.AI_MODEL = AI_MODEL if AI_MODEL else "gpt-35-turbo"
self.AZURE_OPENAI_ENDPOINT = AZURE_OPENAI_ENDPOINT
self.AI_MODEL = AZURE_DEPLOYMENT_NAME
self.AI_TEMPERATURE = AI_TEMPERATURE if AI_TEMPERATURE else 0.7
self.AI_TOP_P = AI_TOP_P if AI_TOP_P else 0.7
self.MAX_TOKENS = MAX_TOKENS if MAX_TOKENS else 4096
self.AZURE_EMBEDDER_DEPLOYMENT_ID = AZURE_EMBEDDER_DEPLOYMENT_ID
self.MAX_TOKENS = MAX_TOKENS if MAX_TOKENS else 120000
self.WAIT_AFTER_FAILURE = WAIT_AFTER_FAILURE if WAIT_AFTER_FAILURE else 3
self.WAIT_BETWEEN_REQUESTS = (
WAIT_BETWEEN_REQUESTS if WAIT_BETWEEN_REQUESTS else 1
)
self.failures = 0

@staticmethod
def services():
return ["llm"]

async def inference(self, prompt: str, tokens: int = 0, images: list = []) -> str:
num_retries = 3
messages = [{"role": "system", "content": prompt}]
for _ in range(num_retries):
try:
resp = openai.chat.completions.create(
engine=self.AI_MODEL,
messages=messages,
max_tokens=int(self.MAX_TOKENS),
temperature=float(self.AI_TEMPERATURE),
top_p=float(self.AI_TOP_P),
)
return resp.choices[0].message.content
return ["llm", "vision"]

except:
logging.info("Rate limit exceeded. Retrying after 20 seconds.")
time.sleep(20)
continue
async def inference(self, prompt, tokens: int = 0, images: list = []):
if not self.AZURE_OPENAI_ENDPOINT.endswith("/"):
self.AZURE_OPENAI_ENDPOINT += "/"
openai = AzureOpenAI(
api_key=self.AZURE_API_KEY,
api_version="2024-02-01",
azure_endpoint=self.AZURE_OPENAI_ENDPOINT,
azure_deployment=self.AI_MODEL,
)
if self.AZURE_API_KEY == "" or self.AZURE_API_KEY == "YOUR_API_KEY":
if self.AZURE_OPENAI_ENDPOINT == "https://your-endpoint.openai.azure.com":
return "Please go to the Agent Management page to set your Azure OpenAI API key."
messages = []
if len(images) > 0:
messages.append(
{"role": "user", "content": [{"type": "text", "text": prompt}]}
)
for image in images:
if image.startswith("http"):
messages[0]["content"].append(
{
"type": "image_url",
"image_url": {
"url": image,
},
}
)
else:
file_type = image.split(".")[-1]
with open(image, "rb") as f:
image_base64 = f.read()
messages[0]["content"].append(
{
"type": "image_url",
"image_url": {
"url": f"data:image/{file_type};base64,{image_base64}"
},
}
)
else:
messages.append({"role": "user", "content": prompt})
if int(self.WAIT_BETWEEN_REQUESTS) > 0:
time.sleep(int(self.WAIT_BETWEEN_REQUESTS))
try:
response = openai.chat.completions.create(
model=self.AI_MODEL,
messages=messages,
temperature=float(self.AI_TEMPERATURE),
max_tokens=4096,
top_p=float(self.AI_TOP_P),
n=1,
stream=False,
)
return response.choices[0].message.content
except Exception as e:
logging.warning(f"Azure OpenAI API Error: {e}")
self.failures += 1
if self.failures > 3:
return "Azure OpenAI API Error: Too many failures."
if int(self.WAIT_AFTER_FAILURE) > 0:
time.sleep(int(self.WAIT_AFTER_FAILURE))
return await self.inference(prompt=prompt, tokens=tokens)
return str(response)
4 changes: 4 additions & 0 deletions agixt/providers/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(
TRANSCRIPTION_MODEL if TRANSCRIPTION_MODEL else "whisper-1"
)
self.FAILURES = []
self.failures = 0
try:
self.embedder = OpenAIEmbeddingFunction(
model_name="text-embedding-3-small",
Expand Down Expand Up @@ -142,6 +143,9 @@ async def inference(self, prompt, tokens: int = 0, images: list = []):
return response.choices[0].message.content
except Exception as e:
logging.info(f"OpenAI API Error: {e}")
self.failures += 1
if self.failures > 3:
return "OpenAI API Error: Too many failures."
if "," in self.API_URI:
self.rotate_uri()
if int(self.WAIT_AFTER_FAILURE) > 0:
Expand Down
2 changes: 1 addition & 1 deletion docs/3-Providers/0-ezLocalai.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ Follow the instructions for setting up ezLocalai at <https://github.com/DevXT-LL
2. Set `EZLOCALAI_API_KEY` to your API key that you set up with ezLocalai.
3. Set `EZLOCALAI_API_URL` to the URL that you set up with ezLocalai. The default is `http://YOUR LOCAL IP:8091`.
4. Set `AI_MODEL` to whichever model you are running with `ezlocalai`.
5. Set `MAX_TOKENS` to the maximum number of tokens you want the model to generate.
5. Set `MAX_TOKENS` to the maximum number of input tokens.
6. Set `AI_TEMPERATURE` to the temperature you want to use for generation. This is a float value between 0 and 1. The default is `1.33`.
7. Set `AI_TOP_P` to the top_p value you want to use for generation. This is a float value between 0 and 1. The default is `0.95`.
8. Set `VOICE` to the voice you want to use for the generated audio. The default is `DukeNukem`. You can add cloning TTS voices to `ezlocalai` by putting any ~10 second wav file in the `voices` directory of the `ezlocalai` repository and then setting the `VOICE` variable to the name of the file without the `.wav` extension.
7 changes: 5 additions & 2 deletions docs/3-Providers/1-Anthropic Claude.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,11 @@

## Quick Start Guide

- Get your Anthroic API key at <https://console.anthropic.com/settings/keys>.

### Update your agent settings

1. Set `AI_PROVIDER` to `claude`.
2. Set `ANTHROPIC_API_KEY` to your API key. Get your Anthroic API key at <https://console.anthropic.com/settings/keys>
3. Set `AI_MODEL` to `claude-3-opus-20240229`. List of models available at <https://docs.anthropic.com/claude/docs/models-overview> .
2. Set `ANTHROPIC_API_KEY` to your API key.
3. Set `AI_MODEL` to `claude-3-5-sonnet-20240620`. List of models available at <https://docs.anthropic.com/claude/docs/models-overview> .
4. Set `MAX_TOKENS` to the maximum number of input tokens. Default is `200000` for Claude.
16 changes: 9 additions & 7 deletions docs/3-Providers/2-Azure OpenAI.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
# Azure OpenAI

- [Azure OpenAI](https://learn.microsoft.com/en-us/azure/cognitive-services/openai/concepts/models)
- [AGiXT](https://github.com/Josh-XT/AGiXT)

⚠️ **Please note that using some AI providers, such as OpenAI's API, can be expensive. Monitor your usage carefully to avoid incurring unexpected costs. We're NOT responsible for your usage under any circumstance.**

## Quick Start Guide

- Instructions for setting up an Azure OpenAI Deployment can be found [here](https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=web-portal).

### Update your agent settings

1. Set `AI_PROVIDER` to `azure`.
2. Set `AZURE_API_KEY` to your Azure OpenAI API key.
3. Set `DEPLOYMENT_ID` to your Azure OpenAI deployment ID for your primary model.
4. Set `EMBEDDER_DEPLOYMENT_ID` to your Azure OpenAI deployment ID for your embedder model.
5. Set `AZURE_OPENAI_ENDPOINT` to your Azure OpenAI endpoint.
6. Choose your `AI_MODEL`. Enter `gpt-3.5-turbo`, `gpt-4`, `gpt-4-32k`, or any other model you may have access to.
7. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
8. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output. The maximum for `gpt-3.5-turbo` is 4096, `gpt-4` is 8192, `gpt-4-32k` is 32768, `gpt-3.5-turbo-16k` is 16000.

3. Set `AZURE_OPENAI_ENDPOINT` to your Azure OpenAI endpoint.
4. Set `AZURE_DEPLOYMENT_NAME` to your Azure OpenAI deployment ID for your primary model.
5. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
6. Set `MAX_TOKENS` to the maximum number of input tokens. `gpt-4o` allows up to `120000` input tokens.
1 change: 1 addition & 0 deletions docs/3-Providers/3-Google.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
1. Set `AI_PROVIDER` to `google`.
2. Set `GOOGLE_API_KEY` to your Google API key.
3. Set your `AI_MODEL` to `gemini-1.0-pro` or whichever version you want to use.
4. Set `MAX_TOKENS` to the maximum number of input tokens.
2 changes: 1 addition & 1 deletion docs/3-Providers/4-GPT4Free.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ We do not know where your data goes when you use GPT4Free. We do not know if it
1. Set `AI_PROVIDER` to `gpt4free`
2. Set `AI_MODEL` to `gpt-4` or `gpt-3.5-turbo`.
3. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
4. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.
4. Set `MAX_TOKENS` to the maximum number of input tokens.
2 changes: 1 addition & 1 deletion docs/3-Providers/5-Hugging Face.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ _Note: AI_MODEL should stay `default` unless there is a folder in `prompts` spec
2. Set `MODEL_PATH` to the path of your llama.cpp model (for docker containers `models/` is mapped to `/model`)
3. Set `AI_MODEL` to `default` or the name of the model from the `prompts` folder.
4. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
5. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output.
5. Set `MAX_TOKENS` to the maximum number of input tokens.
6 changes: 4 additions & 2 deletions docs/3-Providers/6-OpenAI.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@

## Quick Start Guide

- Instructions for setting up an OpenAI API key can be found [here](https://platform.openai.com/docs/quickstart).

### Update your agent settings

1. Set `AI_PROVIDER` to `openai`.
2. Set `OPENAI_API_KEY` to your OpenAI API key.
3. Set `AI_MODEL` to `gpt-3.5-turbo` for ChatGPT.
3. Set `AI_MODEL` to `gpt-4o` or your preferred OpenAI model.
4. Set `AI_TEMPERATURE` to a value between 0 and 1. The higher the value, the more creative the output.
5. Set `MAX_TOKENS` to the maximum number of tokens to generate. The higher the value, the longer the output. The maximum for `gpt-3.5-turbo` is 4000, `gpt-4` is 8000, `gpt-3.5-turbo-16k` is 16000.
5. Set `MAX_TOKENS` to the maximum number of input tokens. `gpt-4o` allows up to `120000` input tokens.
Loading