infiniflow · gaozhengzhou · Oct 3, 2024
diff --git a/api/apps/llm_app.py b/api/apps/llm_app.py
@@ -61,7 +61,7 @@ def set_api_key():
     for llm in LLMService.query(fid=factory)[:3]:
         if not embd_passed and llm.model_type == LLMType.EMBEDDING.value:
             mdl = EmbeddingModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+                req["api_key"], llm.llm_name, base_url=req.get("base_url"), api_version=llm.api_version)
             try:
                 arr, tc = mdl.encode(["Test if the api key is available"])
                 if len(arr[0]) == 0:
@@ -71,7 +71,7 @@ def set_api_key():
                 msg += f"\nFail to access embedding model({llm.llm_name}) using this api key." + str(e)
         elif not chat_passed and llm.model_type == LLMType.CHAT.value:
             mdl = ChatModel[factory](
-                req["api_key"], llm.llm_name, base_url=req.get("base_url"))
+                req["api_key"], llm.llm_name, base_url=req.get("base_url"), api_version=llm.api_version)
             try:
                 m, tc = mdl.chat(None, [{"role": "user", "content": "Hello! How are you doing!"}], 
                                  {"temperature": 0.9,'max_tokens':50})

diff --git a/api/db/db_models.py b/api/db/db_models.py
@@ -619,6 +619,11 @@ class LLM(DataBaseModel):
         help_text="is it validate(0: wasted，1: validate)",
         default="1",
         index=True)
+    api_version = CharField(
+        max_length=255,
+        null=True,
+        help_text="2023-05-15, 2024-02-01...",
+        index=True)
 
     def __str__(self):
         return self.llm_name
@@ -1052,4 +1057,11 @@ def migrate_db():
             )
         except Exception as e:
             pass
+        try:
+            migrate(
+                migrator.add_column('llm', 'api_version',
+                                    CharField(max_length=255, null=True, help_text="2023-05-15, 2024-02-01...", index=True))
+            )
+        except Exception as e:
+            pass
 
diff --git a/api/db/services/llm_service.py b/api/db/services/llm_service.py
@@ -89,9 +89,9 @@ def model_instance(cls, tenant_id, llm_type,
         fid = None if len(tmp) < 2 else tmp[1]
         mdlnm = tmp[0]
         if model_config: model_config = model_config.to_dict()
+        llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
         if not model_config:
             if llm_type in [LLMType.EMBEDDING, LLMType.RERANK]:
-                llm = LLMService.query(llm_name=mdlnm) if not fid else LLMService.query(llm_name=mdlnm, fid=fid)
                 if llm and llm[0].fid in ["Youdao", "FastEmbed", "BAAI"]:
                     model_config = {"llm_factory": llm[0].fid, "api_key":"", "llm_name": mdlnm, "api_base": ""}
             if not model_config:
@@ -103,46 +103,51 @@ def model_instance(cls, tenant_id, llm_type,
                         raise LookupError(f"Type of {llm_type} model is not set.")
                     raise LookupError("Model({}) not authorized".format(mdlnm))
 
+        if llm and llm[0].api_version:
+            model_config["api_version"] = llm[0].api_version
+        else:
+            model_config["api_version"] = ''
+
         if llm_type == LLMType.EMBEDDING.value:
             if model_config["llm_factory"] not in EmbeddingModel:
                 return
             return EmbeddingModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"], api_version=model_config["api_version"])
 
         if llm_type == LLMType.RERANK:
             if model_config["llm_factory"] not in RerankModel:
                 return
             return RerankModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"], api_version=model_config["api_version"])
 
         if llm_type == LLMType.IMAGE2TEXT.value:
             if model_config["llm_factory"] not in CvModel:
                 return
             return CvModel[model_config["llm_factory"]](
                 model_config["api_key"], model_config["llm_name"], lang,
-                base_url=model_config["api_base"]
+                base_url=model_config["api_base"], api_version=model_config["api_version"]
             )
 
         if llm_type == LLMType.CHAT.value:
             if model_config["llm_factory"] not in ChatModel:
                 return
             return ChatModel[model_config["llm_factory"]](
-                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"])
+                model_config["api_key"], model_config["llm_name"], base_url=model_config["api_base"], api_version=model_config["api_version"])
 
         if llm_type == LLMType.SPEECH2TEXT:
             if model_config["llm_factory"] not in Seq2txtModel:
                 return
             return Seq2txtModel[model_config["llm_factory"]](
                 model_config["api_key"], model_config["llm_name"], lang,
-                base_url=model_config["api_base"]
+                base_url=model_config["api_base"], api_version=model_config["api_version"]
             )
         if llm_type == LLMType.TTS:
             if model_config["llm_factory"] not in TTSModel:
                 return
             return TTSModel[model_config["llm_factory"]](
                 model_config["api_key"],
                 model_config["llm_name"],
-                base_url=model_config["api_base"],
+                base_url=model_config["api_base"], api_version=model_config["api_version"]
             )
 
     @classmethod

diff --git a/conf/llm_factories.json b/conf/llm_factories.json
@@ -610,73 +610,85 @@
                     "llm_name": "gpt-4o-mini",
                     "tags": "LLM,CHAT,128K",
                     "max_tokens": 128000,
-                    "model_type": "image2text"
+                    "model_type": "image2text",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "gpt-4o",
                     "tags": "LLM,CHAT,128K",
                     "max_tokens": 128000,
-                    "model_type": "chat,image2text"
+                    "model_type": "chat,image2text",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "gpt-35-turbo",
                     "tags": "LLM,CHAT,4K",
                     "max_tokens": 4096,
-                    "model_type": "chat"
+                    "model_type": "chat",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "gpt-35-turbo-16k",
                     "tags": "LLM,CHAT,16k",
                     "max_tokens": 16385,
-                    "model_type": "chat"
+                    "model_type": "chat",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "text-embedding-ada-002",
                     "tags": "TEXT EMBEDDING,8K",
                     "max_tokens": 8191,
-                    "model_type": "embedding"
+                    "model_type": "embedding",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "text-embedding-3-small",
                     "tags": "TEXT EMBEDDING,8K",
                     "max_tokens": 8191,
-                    "model_type": "embedding"
+                    "model_type": "embedding",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "text-embedding-3-large",
                     "tags": "TEXT EMBEDDING,8K",
                     "max_tokens": 8191,
-                    "model_type": "embedding"
+                    "model_type": "embedding",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "whisper-1",
                     "tags": "SPEECH2TEXT",
                     "max_tokens": 26214400,
-                    "model_type": "speech2text"
+                    "model_type": "speech2text",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "gpt-4",
                     "tags": "LLM,CHAT,8K",
                     "max_tokens": 8191,
-                    "model_type": "chat"
+                    "model_type": "chat",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "gpt-4-turbo",
                     "tags": "LLM,CHAT,8K",
                     "max_tokens": 8191,
-                    "model_type": "chat"
+                    "model_type": "chat",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "gpt-4-32k",
                     "tags": "LLM,CHAT,32K",
                     "max_tokens": 32768,
-                    "model_type": "chat"
+                    "model_type": "chat",
+                    "api_version": "2024-02-01"
                 },
                 {
                     "llm_name": "gpt-4-vision-preview",
                     "tags": "LLM,CHAT,IMAGE2TEXT",
                     "max_tokens": 765,
-                    "model_type": "image2text"
+                    "model_type": "image2text",
+                    "api_version": "2024-02-01"
                 }
             ]
         },

diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
@@ -114,7 +114,7 @@ def __init__(self, key, model_name="deepseek-chat", base_url="https://api.deepse
 
 class AzureChat(Base):
     def __init__(self, key, model_name, **kwargs):
-        self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01")
+        self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version=kwargs["api_version"])
         self.model_name = model_name
 
 

diff --git a/rag/llm/embedding_model.py b/rag/llm/embedding_model.py
@@ -137,7 +137,7 @@ def encode_queries(self, text):
 
 class AzureEmbed(OpenAIEmbed):
     def __init__(self, key, model_name, **kwargs):
-        self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version="2024-02-01")
+        self.client = AzureOpenAI(api_key=key, azure_endpoint=kwargs["base_url"], api_version=kwargs["api_version"])
         self.model_name = model_name