Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
bentwnghk committed Jan 9, 2025
2 parents 2329ec6 + b2775b5 commit 5bbf074
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 190 deletions.
1 change: 1 addition & 0 deletions next.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const nextConfig: NextConfig = {
'gpt-tokenizer',
],
webVitalsAttribution: ['CLS', 'LCP'],
webpackMemoryOptimizations: true,
},
async headers() {
return [
Expand Down
6 changes: 6 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,18 @@
"registry": "https://registry.npmjs.org"
},
"pnpm": {
"overrides": {
"mdast-util-gfm-autolink-literal": "2.0.0"
},
"packageExtensions": {
"@inkjs/ui": {
"dependencies": {
"react": "^18"
}
}
}
},
"overrides": {
"mdast-util-gfm-autolink-literal": "2.0.0"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import { memo } from 'react';
import { useTranslation } from 'react-i18next';
import { FlexboxProps } from 'react-layout-kit';

import { isServerMode } from '@/const/version';
import { DiscoverProviderItem } from '@/types/discover';

const useStyles = createStyles(({ css }) => ({
Expand All @@ -25,13 +26,13 @@ interface ProviderConfigProps extends FlexboxProps {
identifier: string;
}

const ProviderConfig = memo<ProviderConfigProps>(({ data }) => {
const ProviderConfig = memo<ProviderConfigProps>(({ data, identifier }) => {
const { styles } = useStyles();
const { t } = useTranslation('discover');

const router = useRouter();
const openSettings = () => {
router.push('/settings/llm');
router.push(!isServerMode ? '/settings/llm' : `/settings/provider/${identifier}`);
};

const icon = <Icon icon={SquareArrowOutUpRight} size={{ fontSize: 16 }} />;
Expand Down
78 changes: 6 additions & 72 deletions src/config/aiModels/siliconcloud.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,6 @@
import { AIChatModelCard } from '@/types/aiModel';

const siliconcloudChatModels: AIChatModelCard[] = [
{
contextWindowTokens: 32_768,
description:
'Hunyuan-Large 是业界最大的开源 Transformer 架构 MoE 模型,拥有 3890 亿总参数量和 520 亿激活参数量。',
displayName: 'Hunyuan A52B Instruct',
enabled: true,
id: 'Tencent/Hunyuan-A52B-Instruct',
pricing: {
currency: 'CNY',
input: 21,
output: 21,
},
type: 'chat',
},
{
abilities: {
functionCall: true,
Expand All @@ -32,19 +18,6 @@ const siliconcloudChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
contextWindowTokens: 32_768,
description:
'DeepSeek-V2 是一个强大、经济高效的混合专家(MoE)语言模型。它在 8.1 万亿个 token 的高质量语料库上进行了预训练,并通过监督微调(SFT)和强化学习(RL)进一步提升了模型能力。与 DeepSeek 67B 相比, DeepSeek-V2 在性能更强的同时,节省了 42.5% 的训练成本,减少了 93.3% 的 KV 缓存,并将最大生成吞吐量提高到了 5.76 倍。该模型支持 128k 的上下文长度,在标准基准测试和开放式生成评估中都表现出色',
displayName: 'DeepSeek V2 Chat',
id: 'deepseek-ai/DeepSeek-V2-Chat',
pricing: {
currency: 'CNY',
input: 1.33,
output: 1.33,
},
type: 'chat',
},
{
contextWindowTokens: 32_768,
description:
Expand Down Expand Up @@ -238,19 +211,6 @@ const siliconcloudChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
contextWindowTokens: 4096,
description:
'Qwen2.5-Math-72B 是阿里云发布的 Qwen2.5-Math 系列数学大语言模型之一。该模型支持使用思维链(CoT)和工具集成推理(TIR)方法解决中文和英文数学问题。相比前代 Qwen2-Math 系列,Qwen2.5-Math 系列在中英文数学基准测试中取得了显著的性能提升。该模型在处理精确计算、符号操作和算法操作方面表现出色,尤其适合解决复杂的数学和算法推理任务',
displayName: 'Qwen2.5 Math 72B Instruct',
id: 'Qwen/Qwen2.5-Math-72B-Instruct',
pricing: {
currency: 'CNY',
input: 4.13,
output: 4.13,
},
type: 'chat',
},
{
contextWindowTokens: 32_768,
description:
Expand Down Expand Up @@ -303,19 +263,6 @@ const siliconcloudChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
contextWindowTokens: 32_768,
description:
'Qwen2-72B-Instruct 是 Qwen2 系列中的指令微调大语言模型,参数规模为 72B。该模型基于 Transformer 架构,采用了 SwiGLU 激活函数、注意力 QKV 偏置和组查询注意力等技术。它能够处理大规模输入。该模型在语言理解、生成、多语言能力、编码、数学和推理等多个基准测试中表现出色,超越了大多数开源模型,并在某些任务上展现出与专有模型相当的竞争力',
displayName: 'Qwen2 72B Instruct (Vendor-A)',
id: 'Vendor-A/Qwen/Qwen2-7B-Instruct',
pricing: {
currency: 'CNY',
input: 1,
output: 1,
},
type: 'chat',
},
{
abilities: {
vision: true,
Expand Down Expand Up @@ -414,22 +361,6 @@ const siliconcloudChatModels: AIChatModelCard[] = [
},
type: 'chat',
},
{
abilities: {
vision: true,
},
contextWindowTokens: 8192,
description:
'InternVL2-Llama3-76B 是 InternVL 2.0 系列中的大规模多模态模型。它由 InternViT-6B-448px-V1-5 视觉模型、MLP 投影层和 Hermes-2-Theta-Llama-3-70B 语言模型组成。该模型在各种视觉语言任务上表现出色,包括文档和图表理解、信息图表问答、场景文本理解和 OCR 任务等。InternVL2-Llama3-76B 使用 8K 上下文窗口训练,能够处理长文本、多图像和视频输入,显著提升了模型在这些任务上的处理能力,在多项基准测试中达到或接近最先进的商业模型水平',
displayName: 'InternVL2 Llama3 76B',
id: 'OpenGVLab/InternVL2-Llama3-76B',
pricing: {
currency: 'CNY',
input: 4.13,
output: 4.13,
},
type: 'chat',
},
{
abilities: {
functionCall: true,
Expand Down Expand Up @@ -617,12 +548,15 @@ const siliconcloudChatModels: AIChatModelCard[] = [
type: 'chat',
},
{
abilities: {
functionCall: true,
},
contextWindowTokens: 32_768,
description:
'Llama-3.1-Nemotron-70B-Instruct 是由 NVIDIA 定制的大型语言模型,旨在提高 LLM 生成的响应对用户查询的帮助程度。该模型在 Arena Hard、AlpacaEval 2 LC 和 GPT-4-Turbo MT-Bench 等基准测试中表现出色,截至 2024 年 10 月 1 日,在所有三个自动对齐基准测试中排名第一。该模型使用 RLHF(特别是 REINFORCE)、Llama-3.1-Nemotron-70B-Reward 和 HelpSteer2-Preference 提示在 Llama-3.1-70B-Instruct 模型基础上进行训练',
displayName: 'Llama 3.1 Nemotron 70B Instruct',
'Llama 3.3 是 Llama 系列最先进的多语言开源大型语言模型,以极低成本体验媲美 405B 模型的性能。基于 Transformer 结构,并通过监督微调(SFT)和人类反馈强化学习(RLHF)提升有用性和安全性。其指令调优版本专为多语言对话优化,在多项行业基准上表现优于众多开源和封闭聊天模型。知识截止日期为 2023 年 12 月',
displayName: 'Llama 3.3 70B Instruct',
enabled: true,
id: 'nvidia/Llama-3.1-Nemotron-70B-Instruct',
id: 'meta-llama/Llama-3.3-70B-Instruct',
pricing: {
currency: 'CNY',
input: 4.13,
Expand Down
69 changes: 4 additions & 65 deletions src/config/modelProviders/siliconcloud.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,6 @@ import { ModelProviderCard } from '@/types/llm';
// ref :https://siliconflow.cn/zh-cn/pricing
const SiliconCloud: ModelProviderCard = {
chatModels: [
{
contextWindowTokens: 32_768,
description:
'Hunyuan-Large 是业界最大的开源 Transformer 架构 MoE 模型,拥有 3890 亿总参数量和 520 亿激活参数量。',
displayName: 'Hunyuan A52B Instruct',
enabled: true,
id: 'Tencent/Hunyuan-A52B-Instruct',
pricing: {
currency: 'CNY',
input: 21,
output: 21,
},
},
{
contextWindowTokens: 32_768,
description:
Expand All @@ -30,18 +17,6 @@ const SiliconCloud: ModelProviderCard = {
output: 1.33,
},
},
{
contextWindowTokens: 32_768,
description:
'DeepSeek-V2 是一个强大、经济高效的混合专家(MoE)语言模型。它在 8.1 万亿个 token 的高质量语料库上进行了预训练,并通过监督微调(SFT)和强化学习(RL)进一步提升了模型能力。与 DeepSeek 67B 相比, DeepSeek-V2 在性能更强的同时,节省了 42.5% 的训练成本,减少了 93.3% 的 KV 缓存,并将最大生成吞吐量提高到了 5.76 倍。该模型支持 128k 的上下文长度,在标准基准测试和开放式生成评估中都表现出色',
displayName: 'DeepSeek V2 Chat',
id: 'deepseek-ai/DeepSeek-V2-Chat',
pricing: {
currency: 'CNY',
input: 1.33,
output: 1.33,
},
},
{
contextWindowTokens: 32_768,
description:
Expand Down Expand Up @@ -208,18 +183,6 @@ const SiliconCloud: ModelProviderCard = {
output: 1.26,
},
},
{
contextWindowTokens: 4096,
description:
'Qwen2.5-Math-72B 是阿里云发布的 Qwen2.5-Math 系列数学大语言模型之一。该模型支持使用思维链(CoT)和工具集成推理(TIR)方法解决中文和英文数学问题。相比前代 Qwen2-Math 系列,Qwen2.5-Math 系列在中英文数学基准测试中取得了显著的性能提升。该模型在处理精确计算、符号操作和算法操作方面表现出色,尤其适合解决复杂的数学和算法推理任务',
displayName: 'Qwen2.5 Math 72B Instruct',
id: 'Qwen/Qwen2.5-Math-72B-Instruct',
pricing: {
currency: 'CNY',
input: 4.13,
output: 4.13,
},
},
{
contextWindowTokens: 32_768,
description:
Expand Down Expand Up @@ -280,18 +243,6 @@ const SiliconCloud: ModelProviderCard = {
output: 4.13,
},
},
{
contextWindowTokens: 32_768,
description:
'Qwen2-72B-Instruct 是 Qwen2 系列中的指令微调大语言模型,参数规模为 72B。该模型基于 Transformer 架构,采用了 SwiGLU 激活函数、注意力 QKV 偏置和组查询注意力等技术。它能够处理大规模输入。该模型在语言理解、生成、多语言能力、编码、数学和推理等多个基准测试中表现出色,超越了大多数开源模型,并在某些任务上展现出与专有模型相当的竞争力',
displayName: 'Qwen2 72B Instruct (Vendor-A)',
id: 'Vendor-A/Qwen/Qwen2-7B-Instruct',
pricing: {
currency: 'CNY',
input: 1,
output: 1,
},
},
{
contextWindowTokens: 32_768,
description:
Expand Down Expand Up @@ -372,19 +323,6 @@ const SiliconCloud: ModelProviderCard = {
},
vision: true,
},
{
contextWindowTokens: 8192,
description:
'InternVL2-Llama3-76B 是 InternVL 2.0 系列中的大规模多模态模型。它由 InternViT-6B-448px-V1-5 视觉模型、MLP 投影层和 Hermes-2-Theta-Llama-3-70B 语言模型组成。该模型在各种视觉语言任务上表现出色,包括文档和图表理解、信息图表问答、场景文本理解和 OCR 任务等。InternVL2-Llama3-76B 使用 8K 上下文窗口训练,能够处理长文本、多图像和视频输入,显著提升了模型在这些任务上的处理能力,在多项基准测试中达到或接近最先进的商业模型水平',
displayName: 'InternVL2 Llama3 76B',
id: 'OpenGVLab/InternVL2-Llama3-76B',
pricing: {
currency: 'CNY',
input: 4.13,
output: 4.13,
},
vision: true,
},
{
contextWindowTokens: 131_072,
description:
Expand Down Expand Up @@ -553,10 +491,11 @@ const SiliconCloud: ModelProviderCard = {
{
contextWindowTokens: 32_768,
description:
'Llama-3.1-Nemotron-70B-Instruct 是由 NVIDIA 定制的大型语言模型,旨在提高 LLM 生成的响应对用户查询的帮助程度。该模型在 Arena Hard、AlpacaEval 2 LC 和 GPT-4-Turbo MT-Bench 等基准测试中表现出色,截至 2024 年 10 月 1 日,在所有三个自动对齐基准测试中排名第一。该模型使用 RLHF(特别是 REINFORCE)、Llama-3.1-Nemotron-70B-Reward 和 HelpSteer2-Preference 提示在 Llama-3.1-70B-Instruct 模型基础上进行训练',
displayName: 'Llama 3.1 Nemotron 70B Instruct',
'Llama 3.3 是 Llama 系列最先进的多语言开源大型语言模型,以极低成本体验媲美 405B 模型的性能。基于 Transformer 结构,并通过监督微调(SFT)和人类反馈强化学习(RLHF)提升有用性和安全性。其指令调优版本专为多语言对话优化,在多项行业基准上表现优于众多开源和封闭聊天模型。知识截止日期为 2023 年 12 月',
displayName: 'Llama 3.3 70B Instruct',
enabled: true,
id: 'nvidia/Llama-3.1-Nemotron-70B-Instruct',
functionCall: true,
id: 'meta-llama/Llama-3.3-70B-Instruct',
pricing: {
currency: 'CNY',
input: 4.13,
Expand Down
13 changes: 8 additions & 5 deletions src/database/repositories/aiInfra/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,16 @@ export class AiInfraRepos {
.map<EnabledAiModel & { enabled?: boolean | null }>((item) => {
const user = allModels.find((m) => m.id === item.id && m.providerId === provider.id);

const enabled = !!user ? user.enabled : item.enabled;

return {
...item,
abilities: item.abilities || {},
enabled,
abilities: !!user ? user.abilities : item.abilities || {},
config: !!user ? user.config : item.config,
contextWindowTokens: !!user ? user.contextWindowTokens : item.contextWindowTokens,
displayName: user?.displayName || item.displayName,
enabled: !!user ? user.enabled : item.enabled,
id: item.id,
providerId: provider.id,
sort: !!user ? user.sort : undefined,
type: item.type,
};
})
.filter((i) => i.enabled);
Expand Down
2 changes: 1 addition & 1 deletion src/database/server/models/__tests__/aiModel.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ describe('AiModelModel', () => {

const allModels = await aiProviderModel.query();
expect(allModels).toHaveLength(2);
expect(allModels.find((m) => m.id === 'existing-model')?.displayName).toBe('Updated Name');
expect(allModels.find((m) => m.id === 'existing-model')?.displayName).toBe('Old Name');
expect(allModels.find((m) => m.id === 'new-model')?.displayName).toBe('New Model');
});
});
Expand Down
59 changes: 14 additions & 45 deletions src/database/server/models/aiModel.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import { and, asc, desc, eq, inArray } from 'drizzle-orm/expressions';
import pMap from 'p-map';

import { LobeChatDatabase } from '@/database/type';
import {
Expand Down Expand Up @@ -131,51 +130,21 @@ export class AiModelModel {
};

batchUpdateAiModels = async (providerId: string, models: AiProviderModelListItem[]) => {
return this.db.transaction(async (trx) => {
const records = models.map(({ id, ...model }) => ({
...model,
id,
providerId,
updatedAt: new Date(),
userId: this.userId,
}));
const records = models.map(({ id, ...model }) => ({
...model,
id,
providerId,
updatedAt: new Date(),
userId: this.userId,
}));

// 第一步:尝试插入所有记录,忽略冲突
const insertedRecords = await trx
.insert(aiModels)
.values(records)
.onConflictDoNothing({
target: [aiModels.id, aiModels.userId, aiModels.providerId],
})
.returning();
// 第二步:找出需要更新的记录(即插入时发生冲突的记录)
// 找出未能插入的记录(需要更新的记录)
const insertedIds = new Set(insertedRecords.map((r) => r.id));
const recordsToUpdate = records.filter((r) => !insertedIds.has(r.id));

// 第三步:更新已存在的记录
if (recordsToUpdate.length > 0) {
await pMap(
recordsToUpdate,
async (record) => {
await trx
.update(aiModels)
.set({
...record,
updatedAt: new Date(),
})
.where(
and(
eq(aiModels.id, record.id),
eq(aiModels.userId, this.userId),
eq(aiModels.providerId, providerId),
),
);
},
{ concurrency: 10 }, // 限制并发数为 10
);
}
});
return this.db
.insert(aiModels)
.values(records)
.onConflictDoNothing({
target: [aiModels.id, aiModels.userId, aiModels.providerId],
})
.returning();
};

batchToggleAiModels = async (providerId: string, models: string[], enabled: boolean) => {
Expand Down

0 comments on commit 5bbf074

Please sign in to comment.