Merge remote-tracking branch 'upstream/main'

bentwnghk · Jan 9, 2025 · 5bbf074 · 5bbf074
2 parents 2329ec6 + b2775b5
commit 5bbf074
Show file tree

Hide file tree

Showing 8 changed files with 43 additions and 190 deletions.
diff --git a/next.config.ts b/next.config.ts
@@ -26,6 +26,7 @@ const nextConfig: NextConfig = {
       'gpt-tokenizer',
     ],
     webVitalsAttribution: ['CLS', 'LCP'],
+    webpackMemoryOptimizations: true,
   },
   async headers() {
     return [

diff --git a/package.json b/package.json
@@ -321,12 +321,18 @@
     "registry": "https://registry.npmjs.org"
   },
   "pnpm": {
+    "overrides": {
+      "mdast-util-gfm-autolink-literal": "2.0.0"
+    },
     "packageExtensions": {
       "@inkjs/ui": {
         "dependencies": {
           "react": "^18"
         }
       }
     }
+  },
+  "overrides": {
+    "mdast-util-gfm-autolink-literal": "2.0.0"
   }
 }
diff --git a/src/app/(main)/discover/(detail)/provider/[slug]/features/ProviderConfig.tsx b/src/app/(main)/discover/(detail)/provider/[slug]/features/ProviderConfig.tsx
@@ -10,6 +10,7 @@ import { memo } from 'react';
 import { useTranslation } from 'react-i18next';
 import { FlexboxProps } from 'react-layout-kit';
 
+import { isServerMode } from '@/const/version';
 import { DiscoverProviderItem } from '@/types/discover';
 
 const useStyles = createStyles(({ css }) => ({
@@ -25,13 +26,13 @@ interface ProviderConfigProps extends FlexboxProps {
   identifier: string;
 }
 
-const ProviderConfig = memo<ProviderConfigProps>(({ data }) => {
+const ProviderConfig = memo<ProviderConfigProps>(({ data, identifier }) => {
   const { styles } = useStyles();
   const { t } = useTranslation('discover');
 
   const router = useRouter();
   const openSettings = () => {
-    router.push('/settings/llm');
+    router.push(!isServerMode ? '/settings/llm' : `/settings/provider/${identifier}`);
   };
 
   const icon = <Icon icon={SquareArrowOutUpRight} size={{ fontSize: 16 }} />;

diff --git a/src/config/aiModels/siliconcloud.ts b/src/config/aiModels/siliconcloud.ts
@@ -1,20 +1,6 @@
 import { AIChatModelCard } from '@/types/aiModel';
 
 const siliconcloudChatModels: AIChatModelCard[] = [
-  {
-    contextWindowTokens: 32_768,
-    description:
-      'Hunyuan-Large 是业界最大的开源 Transformer 架构 MoE 模型，拥有 3890 亿总参数量和 520 亿激活参数量。',
-    displayName: 'Hunyuan A52B Instruct',
-    enabled: true,
-    id: 'Tencent/Hunyuan-A52B-Instruct',
-    pricing: {
-      currency: 'CNY',
-      input: 21,
-      output: 21,
-    },
-    type: 'chat',
-  },
   {
     abilities: {
       functionCall: true,
@@ -32,19 +18,6 @@ const siliconcloudChatModels: AIChatModelCard[] = [
     },
     type: 'chat',
   },
-  {
-    contextWindowTokens: 32_768,
-    description:
-      'DeepSeek-V2 是一个强大、经济高效的混合专家（MoE）语言模型。它在 8.1 万亿个 token 的高质量语料库上进行了预训练，并通过监督微调（SFT）和强化学习（RL）进一步提升了模型能力。与 DeepSeek 67B 相比， DeepSeek-V2 在性能更强的同时，节省了 42.5% 的训练成本，减少了 93.3% 的 KV 缓存，并将最大生成吞吐量提高到了 5.76 倍。该模型支持 128k 的上下文长度，在标准基准测试和开放式生成评估中都表现出色',
-    displayName: 'DeepSeek V2 Chat',
-    id: 'deepseek-ai/DeepSeek-V2-Chat',
-    pricing: {
-      currency: 'CNY',
-      input: 1.33,
-      output: 1.33,
-    },
-    type: 'chat',
-  },
   {
     contextWindowTokens: 32_768,
     description:
@@ -238,19 +211,6 @@ const siliconcloudChatModels: AIChatModelCard[] = [
     },
     type: 'chat',
   },
-  {
-    contextWindowTokens: 4096,
-    description:
-      'Qwen2.5-Math-72B 是阿里云发布的 Qwen2.5-Math 系列数学大语言模型之一。该模型支持使用思维链（CoT）和工具集成推理（TIR）方法解决中文和英文数学问题。相比前代 Qwen2-Math 系列，Qwen2.5-Math 系列在中英文数学基准测试中取得了显著的性能提升。该模型在处理精确计算、符号操作和算法操作方面表现出色，尤其适合解决复杂的数学和算法推理任务',
-    displayName: 'Qwen2.5 Math 72B Instruct',
-    id: 'Qwen/Qwen2.5-Math-72B-Instruct',
-    pricing: {
-      currency: 'CNY',
-      input: 4.13,
-      output: 4.13,
-    },
-    type: 'chat',
-  },
   {
     contextWindowTokens: 32_768,
     description:
@@ -303,19 +263,6 @@ const siliconcloudChatModels: AIChatModelCard[] = [
     },
     type: 'chat',
   },
-  {
-    contextWindowTokens: 32_768,
-    description:
-      'Qwen2-72B-Instruct 是 Qwen2 系列中的指令微调大语言模型，参数规模为 72B。该模型基于 Transformer 架构，采用了 SwiGLU 激活函数、注意力 QKV 偏置和组查询注意力等技术。它能够处理大规模输入。该模型在语言理解、生成、多语言能力、编码、数学和推理等多个基准测试中表现出色，超越了大多数开源模型，并在某些任务上展现出与专有模型相当的竞争力',
-    displayName: 'Qwen2 72B Instruct (Vendor-A)',
-    id: 'Vendor-A/Qwen/Qwen2-7B-Instruct',
-    pricing: {
-      currency: 'CNY',
-      input: 1,
-      output: 1,
-    },
-    type: 'chat',
-  },
   {
     abilities: {
       vision: true,
@@ -414,22 +361,6 @@ const siliconcloudChatModels: AIChatModelCard[] = [
     },
     type: 'chat',
   },
-  {
-    abilities: {
-      vision: true,
-    },
-    contextWindowTokens: 8192,
-    description:
-      'InternVL2-Llama3-76B 是 InternVL 2.0 系列中的大规模多模态模型。它由 InternViT-6B-448px-V1-5 视觉模型、MLP 投影层和 Hermes-2-Theta-Llama-3-70B 语言模型组成。该模型在各种视觉语言任务上表现出色，包括文档和图表理解、信息图表问答、场景文本理解和 OCR 任务等。InternVL2-Llama3-76B 使用 8K 上下文窗口训练，能够处理长文本、多图像和视频输入，显著提升了模型在这些任务上的处理能力，在多项基准测试中达到或接近最先进的商业模型水平',
-    displayName: 'InternVL2 Llama3 76B',
-    id: 'OpenGVLab/InternVL2-Llama3-76B',
-    pricing: {
-      currency: 'CNY',
-      input: 4.13,
-      output: 4.13,
-    },
-    type: 'chat',
-  },
   {
     abilities: {
       functionCall: true,
@@ -617,12 +548,15 @@ const siliconcloudChatModels: AIChatModelCard[] = [
     type: 'chat',
   },
   {
+    abilities: {
+      functionCall: true,
+    },
     contextWindowTokens: 32_768,
     description:
-      'Llama-3.1-Nemotron-70B-Instruct 是由 NVIDIA 定制的大型语言模型，旨在提高 LLM 生成的响应对用户查询的帮助程度。该模型在 Arena Hard、AlpacaEval 2 LC 和 GPT-4-Turbo MT-Bench 等基准测试中表现出色，截至 2024 年 10 月 1 日，在所有三个自动对齐基准测试中排名第一。该模型使用 RLHF（特别是 REINFORCE）、Llama-3.1-Nemotron-70B-Reward 和 HelpSteer2-Preference 提示在 Llama-3.1-70B-Instruct 模型基础上进行训练',
-    displayName: 'Llama 3.1 Nemotron 70B Instruct',
+      'Llama 3.3 是 Llama 系列最先进的多语言开源大型语言模型，以极低成本体验媲美 405B 模型的性能。基于 Transformer 结构，并通过监督微调（SFT）和人类反馈强化学习（RLHF）提升有用性和安全性。其指令调优版本专为多语言对话优化，在多项行业基准上表现优于众多开源和封闭聊天模型。知识截止日期为 2023 年 12 月',
+    displayName: 'Llama 3.3 70B Instruct',
     enabled: true,
-    id: 'nvidia/Llama-3.1-Nemotron-70B-Instruct',
+    id: 'meta-llama/Llama-3.3-70B-Instruct',
     pricing: {
       currency: 'CNY',
       input: 4.13,

diff --git a/src/config/modelProviders/siliconcloud.ts b/src/config/modelProviders/siliconcloud.ts
@@ -3,19 +3,6 @@ import { ModelProviderCard } from '@/types/llm';
 // ref :https://siliconflow.cn/zh-cn/pricing
 const SiliconCloud: ModelProviderCard = {
   chatModels: [
-    {
-      contextWindowTokens: 32_768,
-      description:
-        'Hunyuan-Large 是业界最大的开源 Transformer 架构 MoE 模型，拥有 3890 亿总参数量和 520 亿激活参数量。',
-      displayName: 'Hunyuan A52B Instruct',
-      enabled: true,
-      id: 'Tencent/Hunyuan-A52B-Instruct',
-      pricing: {
-        currency: 'CNY',
-        input: 21,
-        output: 21,
-      },
-    },
     {
       contextWindowTokens: 32_768,
       description:
@@ -30,18 +17,6 @@ const SiliconCloud: ModelProviderCard = {
         output: 1.33,
       },
     },
-    {
-      contextWindowTokens: 32_768,
-      description:
-        'DeepSeek-V2 是一个强大、经济高效的混合专家（MoE）语言模型。它在 8.1 万亿个 token 的高质量语料库上进行了预训练，并通过监督微调（SFT）和强化学习（RL）进一步提升了模型能力。与 DeepSeek 67B 相比， DeepSeek-V2 在性能更强的同时，节省了 42.5% 的训练成本，减少了 93.3% 的 KV 缓存，并将最大生成吞吐量提高到了 5.76 倍。该模型支持 128k 的上下文长度，在标准基准测试和开放式生成评估中都表现出色',
-      displayName: 'DeepSeek V2 Chat',
-      id: 'deepseek-ai/DeepSeek-V2-Chat',
-      pricing: {
-        currency: 'CNY',
-        input: 1.33,
-        output: 1.33,
-      },
-    },
     {
       contextWindowTokens: 32_768,
       description:
@@ -208,18 +183,6 @@ const SiliconCloud: ModelProviderCard = {
         output: 1.26,
       },
     },
-    {
-      contextWindowTokens: 4096,
-      description:
-        'Qwen2.5-Math-72B 是阿里云发布的 Qwen2.5-Math 系列数学大语言模型之一。该模型支持使用思维链（CoT）和工具集成推理（TIR）方法解决中文和英文数学问题。相比前代 Qwen2-Math 系列，Qwen2.5-Math 系列在中英文数学基准测试中取得了显著的性能提升。该模型在处理精确计算、符号操作和算法操作方面表现出色，尤其适合解决复杂的数学和算法推理任务',
-      displayName: 'Qwen2.5 Math 72B Instruct',
-      id: 'Qwen/Qwen2.5-Math-72B-Instruct',
-      pricing: {
-        currency: 'CNY',
-        input: 4.13,
-        output: 4.13,
-      },
-    },
     {
       contextWindowTokens: 32_768,
       description:
@@ -280,18 +243,6 @@ const SiliconCloud: ModelProviderCard = {
         output: 4.13,
       },
     },
-    {
-      contextWindowTokens: 32_768,
-      description:
-        'Qwen2-72B-Instruct 是 Qwen2 系列中的指令微调大语言模型，参数规模为 72B。该模型基于 Transformer 架构，采用了 SwiGLU 激活函数、注意力 QKV 偏置和组查询注意力等技术。它能够处理大规模输入。该模型在语言理解、生成、多语言能力、编码、数学和推理等多个基准测试中表现出色，超越了大多数开源模型，并在某些任务上展现出与专有模型相当的竞争力',
-      displayName: 'Qwen2 72B Instruct (Vendor-A)',
-      id: 'Vendor-A/Qwen/Qwen2-7B-Instruct',
-      pricing: {
-        currency: 'CNY',
-        input: 1,
-        output: 1,
-      },
-    },
     {
       contextWindowTokens: 32_768,
       description:
@@ -372,19 +323,6 @@ const SiliconCloud: ModelProviderCard = {
       },
       vision: true,
     },
-    {
-      contextWindowTokens: 8192,
-      description:
-        'InternVL2-Llama3-76B 是 InternVL 2.0 系列中的大规模多模态模型。它由 InternViT-6B-448px-V1-5 视觉模型、MLP 投影层和 Hermes-2-Theta-Llama-3-70B 语言模型组成。该模型在各种视觉语言任务上表现出色，包括文档和图表理解、信息图表问答、场景文本理解和 OCR 任务等。InternVL2-Llama3-76B 使用 8K 上下文窗口训练，能够处理长文本、多图像和视频输入，显著提升了模型在这些任务上的处理能力，在多项基准测试中达到或接近最先进的商业模型水平',
-      displayName: 'InternVL2 Llama3 76B',
-      id: 'OpenGVLab/InternVL2-Llama3-76B',
-      pricing: {
-        currency: 'CNY',
-        input: 4.13,
-        output: 4.13,
-      },
-      vision: true,
-    },
     {
       contextWindowTokens: 131_072,
       description:
@@ -553,10 +491,11 @@ const SiliconCloud: ModelProviderCard = {
     {
       contextWindowTokens: 32_768,
       description:
-        'Llama-3.1-Nemotron-70B-Instruct 是由 NVIDIA 定制的大型语言模型，旨在提高 LLM 生成的响应对用户查询的帮助程度。该模型在 Arena Hard、AlpacaEval 2 LC 和 GPT-4-Turbo MT-Bench 等基准测试中表现出色，截至 2024 年 10 月 1 日，在所有三个自动对齐基准测试中排名第一。该模型使用 RLHF（特别是 REINFORCE）、Llama-3.1-Nemotron-70B-Reward 和 HelpSteer2-Preference 提示在 Llama-3.1-70B-Instruct 模型基础上进行训练',
-      displayName: 'Llama 3.1 Nemotron 70B Instruct',
+        'Llama 3.3 是 Llama 系列最先进的多语言开源大型语言模型，以极低成本体验媲美 405B 模型的性能。基于 Transformer 结构，并通过监督微调（SFT）和人类反馈强化学习（RLHF）提升有用性和安全性。其指令调优版本专为多语言对话优化，在多项行业基准上表现优于众多开源和封闭聊天模型。知识截止日期为 2023 年 12 月',
+      displayName: 'Llama 3.3 70B Instruct',
       enabled: true,
-      id: 'nvidia/Llama-3.1-Nemotron-70B-Instruct',
+      functionCall: true,
+      id: 'meta-llama/Llama-3.3-70B-Instruct',
       pricing: {
         currency: 'CNY',
         input: 4.13,

diff --git a/src/database/repositories/aiInfra/index.ts b/src/database/repositories/aiInfra/index.ts
@@ -81,13 +81,16 @@ export class AiInfraRepos {
           .map<EnabledAiModel & { enabled?: boolean | null }>((item) => {
             const user = allModels.find((m) => m.id === item.id && m.providerId === provider.id);
 
-            const enabled = !!user ? user.enabled : item.enabled;
-
             return {
-              ...item,
-              abilities: item.abilities || {},
-              enabled,
+              abilities: !!user ? user.abilities : item.abilities || {},
+              config: !!user ? user.config : item.config,
+              contextWindowTokens: !!user ? user.contextWindowTokens : item.contextWindowTokens,
+              displayName: user?.displayName || item.displayName,
+              enabled: !!user ? user.enabled : item.enabled,
+              id: item.id,
               providerId: provider.id,
+              sort: !!user ? user.sort : undefined,
+              type: item.type,
             };
           })
           .filter((i) => i.enabled);

diff --git a/src/database/server/models/__tests__/aiModel.test.ts b/src/database/server/models/__tests__/aiModel.test.ts
@@ -248,7 +248,7 @@ describe('AiModelModel', () => {
 
       const allModels = await aiProviderModel.query();
       expect(allModels).toHaveLength(2);
-      expect(allModels.find((m) => m.id === 'existing-model')?.displayName).toBe('Updated Name');
+      expect(allModels.find((m) => m.id === 'existing-model')?.displayName).toBe('Old Name');
       expect(allModels.find((m) => m.id === 'new-model')?.displayName).toBe('New Model');
     });
   });

diff --git a/src/database/server/models/aiModel.ts b/src/database/server/models/aiModel.ts
@@ -1,5 +1,4 @@
 import { and, asc, desc, eq, inArray } from 'drizzle-orm/expressions';
-import pMap from 'p-map';
 
 import { LobeChatDatabase } from '@/database/type';
 import {
@@ -131,51 +130,21 @@ export class AiModelModel {
   };
 
   batchUpdateAiModels = async (providerId: string, models: AiProviderModelListItem[]) => {
-    return this.db.transaction(async (trx) => {
-      const records = models.map(({ id, ...model }) => ({
-        ...model,
-        id,
-        providerId,
-        updatedAt: new Date(),
-        userId: this.userId,
-      }));
+    const records = models.map(({ id, ...model }) => ({
+      ...model,
+      id,
+      providerId,
+      updatedAt: new Date(),
+      userId: this.userId,
+    }));
 
-      // 第一步：尝试插入所有记录，忽略冲突
-      const insertedRecords = await trx
-        .insert(aiModels)
-        .values(records)
-        .onConflictDoNothing({
-          target: [aiModels.id, aiModels.userId, aiModels.providerId],
-        })
-        .returning();
-      // 第二步：找出需要更新的记录（即插入时发生冲突的记录）
-      // 找出未能插入的记录（需要更新的记录）
-      const insertedIds = new Set(insertedRecords.map((r) => r.id));
-      const recordsToUpdate = records.filter((r) => !insertedIds.has(r.id));
-
-      // 第三步：更新已存在的记录
-      if (recordsToUpdate.length > 0) {
-        await pMap(
-          recordsToUpdate,
-          async (record) => {
-            await trx
-              .update(aiModels)
-              .set({
-                ...record,
-                updatedAt: new Date(),
-              })
-              .where(
-                and(
-                  eq(aiModels.id, record.id),
-                  eq(aiModels.userId, this.userId),
-                  eq(aiModels.providerId, providerId),
-                ),
-              );
-          },
-          { concurrency: 10 }, // 限制并发数为 10
-        );
-      }
-    });
+    return this.db
+      .insert(aiModels)
+      .values(records)
+      .onConflictDoNothing({
+        target: [aiModels.id, aiModels.userId, aiModels.providerId],
+      })
+      .returning();
   };
 
   batchToggleAiModels = async (providerId: string, models: string[], enabled: boolean) => {