Skip to content

Commit

Permalink
feat(config): 新增集群区分AI功能和HPC功能配置 (#1148)
Browse files Browse the repository at this point in the history
###改动

集群配置文件新增配置项,配置为true时 才会在对应的门户系统展示集群选项 
```yaml
hpc:
  enabled: true

ai:
  enabled: false
```

AI  跳转  由门户 改为 SCOW HPC


![b473480d-8392-4108-a37d-84e1d27fecf0](https://github.com/PKUHPC/SCOW/assets/130351655/8a5a67d5-5253-428f-8b13-41dad0b031b0)


管理系统  跳转 门户改为SCOW HPC 跳转AI 为 SCOW AI


![cd0ab7ac-f5bc-4bb9-92ad-1e6f8ff67ee8](https://github.com/PKUHPC/SCOW/assets/130351655/4864336a-1055-40ae-9e4d-e8d99a6dfc3e)
  • Loading branch information
ZihanChen821 authored Mar 5, 2024
1 parent f02687a commit 02d6a18
Show file tree
Hide file tree
Showing 17 changed files with 92 additions and 14 deletions.
5 changes: 5 additions & 0 deletions .changeset/dry-spoons-sit.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@scow/config": patch
---

集群配置新增 hpc 和 ai enabled属性 区分 Ai 集群和 HPC 集群或者是融合集群
8 changes: 8 additions & 0 deletions .changeset/proud-planes-give.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
"@scow/portal-server": patch
"@scow/portal-web": patch
"@scow/mis-web": patch
"@scow/ai": patch
---

新增集群区分 AI 功能和 HPC 功能配置
4 changes: 3 additions & 1 deletion apps/ai/src/app/(auth)/jobs/[clusterId]/LaunchAppForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,9 @@ export const LaunchAppForm = (props: Props) => {
account: account,
partition: partition,
nodeCount: nodeCount,
coreCount: coreCount,
coreCount: gpuCount ?
gpuCount * Math.floor(currentPartitionInfo!.cores / currentPartitionInfo!.gpus) :
coreCount,
gpuCount: gpuCount,
maxTime: maxTime,
memory: memorySize,
Expand Down
2 changes: 1 addition & 1 deletion apps/ai/src/app/(auth)/layout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ export default function Layout(
icon={<DesktopOutlined style={{ paddingRight: 2 }} />}
link={publicConfig.PORTAL_URL}
// linkText={t("baseLayout.linkTextAI")}
linkText="门户"
linkText="SCOW HPC"
/>
{/* {
systemLanguageConfig.isUsingI18n ? (
Expand Down
2 changes: 1 addition & 1 deletion apps/ai/src/server/config/clusters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@
import { getClusterConfigs } from "@scow/config/build/cluster";
import { logger } from "src/server/utils/logger";

export const clusters = getClusterConfigs(undefined, logger);
export const clusters = getClusterConfigs(undefined, logger, ["ai"]);

2 changes: 1 addition & 1 deletion apps/ai/src/server/trpc/route/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import { z } from "zod";


const configPath = USE_MOCK ? join(__dirname, "config") : undefined;
const clustersInit = getClusterConfigs(configPath, console);
const clustersInit = getClusterConfigs(configPath, console, ["ai"]);
Object.keys(clustersInit).map((id) => clustersInit[id].loginNodes = clustersInit[id].loginNodes.map(getLoginNode));

export const clusters = clustersInit;
Expand Down
8 changes: 8 additions & 0 deletions apps/cli/assets/init-full/config/clusters/hpc01.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,11 @@ adapterUrl: localhost:8972

# # 是否在这个机器上自动配置nginx
# autoSetupNginx: true

# 集群在HPC或是否启用,默认为true
hpc:
enabled: true

# 集群在AI或是否启用,默认为false
ai:
enabled: false
2 changes: 1 addition & 1 deletion apps/mis-web/src/i18n/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ export default {
operationLog: "Operation Log",
statistic: "Statistic",
},
navLinkTextPortal: "Portal",
navLinkTextPortal: "SCOW HPC",
navLinkTextAI: "SCOW AI",
dashboard: "Dashboard",
user: {
Expand Down
2 changes: 1 addition & 1 deletion apps/mis-web/src/i18n/zh_cn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ export default {
operationLog:"操作日志",
statistic: "平台数据统计",
},
navLinkTextPortal: "门户",
navLinkTextPortal: "SCOW HPC",
navLinkTextAI: "SCOW AI",
dashboard: "仪表盘",
user: {
Expand Down
2 changes: 1 addition & 1 deletion apps/portal-server/src/config/clusters.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,5 @@
import { getClusterConfigs } from "@scow/config/build/cluster";
import { logger } from "src/utils/logger";

export const clusters = getClusterConfigs(undefined, logger);
export const clusters = getClusterConfigs(undefined, logger, ["hpc"]);

3 changes: 2 additions & 1 deletion apps/portal-server/src/utils/desktops.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ import { Logger } from "ts-log";

export function getDesktopConfig(cluster: string): LoginDeskopConfigSchema {

return { ...getPortalConfig().loginDesktop, ...getClusterConfigs()[cluster].loginDesktop };
return { ...getPortalConfig().loginDesktop,
...getClusterConfigs(undefined, undefined, ["hpc"])[cluster].loginDesktop };
}

export function ensureEnabled(cluster: string) {
Expand Down
2 changes: 1 addition & 1 deletion apps/portal-server/src/utils/turbovnc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ export function getTurboVNCPath(cluster: string) {

const commonTurboVNCPath = getPortalConfig().turboVNCPath;

const clusterTurboVNCPath = getClusterConfigs()[cluster].turboVNCPath;
const clusterTurboVNCPath = getClusterConfigs(undefined, undefined, ["hpc"])[cluster].turboVNCPath;

return clusterTurboVNCPath || commonTurboVNCPath;

Expand Down
2 changes: 1 addition & 1 deletion apps/portal-web/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ const buildRuntimeConfig = async (phase, basePath) => {

const configPath = mockEnv ? join(__dirname, "config") : undefined;

const clusters = getClusterConfigs(configPath, console);
const clusters = getClusterConfigs(configPath, console, ["hpc"]);

Object.keys(clusters).map((id) => clusters[id].loginNodes = clusters[id].loginNodes.map(getLoginNode));

Expand Down
9 changes: 8 additions & 1 deletion docs/docs/deploy/config/cluster-config.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,14 @@ crossClusterFileTransfer:
enabled: true
# 传输节点的地址(ip地址:端口号)
transferNode: localhost:22222


# 集群在HPC或是否启用,默认为true
hpc:
enabled: true

# 集群在AI或是否启用,默认为false
ai:
enabled: false
```
## 注意
Expand Down
3 changes: 3 additions & 0 deletions libs/config/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
"@scow/lib-config": "workspace:*",
"@sinclair/typebox": "0.32.13"
},
"devDependencies": {
"ts-log": "2.2.5"
},
"volta": {
"extends": "../../package.json"
}
Expand Down
46 changes: 43 additions & 3 deletions libs/config/src/cluster.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
* See the Mulan PSL v2 for more details.
*/

import { GetConfigFn, getDirConfig } from "@scow/lib-config";
import { getDirConfig } from "@scow/lib-config";
import { Static, Type } from "@sinclair/typebox";
import { DEFAULT_CONFIG_BASE_PATH } from "src/constants";
import { createI18nStringSchema } from "src/i18n";
import { Logger } from "ts-log";

const CLUSTER_CONFIG_BASE_PATH = "clusters";

Expand Down Expand Up @@ -100,6 +101,15 @@ export const ClusterConfigSchema = Type.Object({
enabled: Type.Boolean({ description: "是否开启跨集群传输功能", default: false }),
transferNode: Type.Optional(Type.String({ description: "跨集群传输文件的节点" })),
})),

hpc: Type.Object({
enabled: Type.Boolean({ description: "是否在HPC中启用" }),
}, { description: "集群在HPC中是否启用, 默认启用", default: { enabled: true } }),

ai: Type.Object({
enabled: Type.Boolean({ description: "是否在AI中启用" }),
}, { description: "集群在AI中是否启用, 默认不启用", default: { enabled: false } }),

k8s: Type.Optional(Type.Object({
runtime: Type.Enum(k8sRuntime, { description: "k8s 集群运行时, ai系统的镜像功能的命令取决于该值, 可选 docker 或者 containerd",
default: "containerd" }),
Expand All @@ -109,8 +119,19 @@ export const ClusterConfigSchema = Type.Object({

export type ClusterConfigSchema = Static<typeof ClusterConfigSchema>;

export const getClusterConfigs: GetConfigFn<Record<string, ClusterConfigSchema>> =
(baseConfigPath, logger) => {

export type ClusterType = "hpc" | "ai";

/**
* @param
* type: 获取的集群类型,如果不传则返回所有集群,如果传入则返回指定类型的集群,例如:["hpc", "ai"] 返回所有HPC和AI集群
*/
export type GetClusterConfigFn<T> = (baseConfigPath?: string, logger?: Logger, type?: ClusterType[]) => T;

export const getClusterConfigs: GetClusterConfigFn<Record<string, ClusterConfigSchema>> =
(baseConfigPath, logger, clusterType) => {

const types: ClusterType[] = clusterType ?? ["hpc", "ai"];

const config = getDirConfig(
ClusterConfigSchema,
Expand Down Expand Up @@ -144,5 +165,24 @@ export const getClusterConfigs: GetConfigFn<Record<string, ClusterConfigSchema>>
throw new Error("login node address must be unique across all clusters and all login nodes.");
}


for (const cluster in config) {
if (Object.hasOwnProperty.call(config, cluster)) {
const clusterInfo = config[cluster];
if (clusterInfo) {
let enabled = false;
for (const type of types) {
if (clusterInfo[type].enabled) {
enabled = true;
break;
}
}
if (!enabled) {
delete config[cluster];
}
}
}
}

return config;
};
4 changes: 4 additions & 0 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 02d6a18

Please sign in to comment.