Skip to content

Commit

Permalink
fix(portal): 作业提交页面修复并优化模板时间应用 (#1302)
Browse files Browse the repository at this point in the history
![image](https://github.com/PKUHPC/SCOW/assets/78541912/8b9ac8ed-6020-4a6c-8526-47b8d1296073)
### 要修复问题
1.从模板值中获取到的时间无法填入
2.因为新增了时间单位,模板文件中保存的只有分钟所以无法换算回分钟以外的单位
<img width="302" alt="1d0a79840fb94fee37610e85dc107bc"
src="https://github.com/PKUHPC/SCOW/assets/78541912/0f47fec2-a763-4c91-81a1-dbbe1d67a4ce">
3.最长运行时间为空时,提示不随中英文切换变动
### 修复方法
1.增加最长运行时间的时间单位

![image](https://github.com/PKUHPC/SCOW/assets/78541912/c180c218-ddcb-4f9a-8fa9-d138e077f0f2)
2.最长运行时间为空时,提示随中英文切换变动

![image](https://github.com/PKUHPC/SCOW/assets/78541912/5e740f49-51d8-4ee5-ad18-59d6fe55a33c)

---------

Co-authored-by: Chen Junda <[email protected]>
  • Loading branch information
cuvalign and ddadaal authored Jun 20, 2024
1 parent a0e9199 commit 3558bd4
Show file tree
Hide file tree
Showing 13 changed files with 99 additions and 54 deletions.
6 changes: 6 additions & 0 deletions .changeset/olive-rings-heal.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@scow/portal-server": patch
"@scow/portal-web": patch
---

提交作业保存作业模板时最长运行时间的单位也保存入模板中
5 changes: 5 additions & 0 deletions .changeset/spotty-carpets-smile.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@scow/grpc-api": minor
---

JobTemplate 与 ListAllJobsResponse 增加 max_time_unit 可选字段
2 changes: 2 additions & 0 deletions apps/portal-server/src/clusterops/api/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
* See the Mulan PSL v2 for more details.
*/

import { TimeUnit } from "@scow/protos/build/portal/job";
import { Logger } from "ts-log";


Expand All @@ -30,6 +31,7 @@ export interface JobTemplate {
memory?: string;
comment?: string | undefined;
scriptOutput?: string | undefined;
maxTimeUnit?: TimeUnit | undefined;
}

export interface ListJobTemplatesRequest {
Expand Down
16 changes: 11 additions & 5 deletions apps/portal-server/src/services/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import { Status } from "@grpc/grpc-js/build/src/constants";
import { jobInfoToPortalJobInfo, jobInfoToRunningjob } from "@scow/lib-scheduler-adapter";
import { checkSchedulerApiVersion } from "@scow/lib-server";
import { createDirectoriesRecursively, sftpReadFile, sftpStat, sftpWriteFile } from "@scow/lib-ssh";
import { AccountStatusFilter, JobServiceServer, JobServiceService } from "@scow/protos/build/portal/job";
import { AccountStatusFilter, JobServiceServer, JobServiceService, TimeUnit } from "@scow/protos/build/portal/job";
import { parseErrorDetails } from "@scow/rich-error-model";
import { ApiVersion } from "@scow/utils/build/version";
import path, { join } from "path";
Expand Down Expand Up @@ -228,8 +228,8 @@ export const jobServiceServer = plugin((server) => {
},

submitJob: async ({ request, logger }) => {
const { cluster, command, jobName, coreCount, gpuCount, maxTime, saveAsTemplate, userId,
nodeCount, partition, qos, account, comment, workingDirectory, output
const { cluster, command, jobName, coreCount, gpuCount, maxTime, maxTimeUnit = TimeUnit.MINUTES,
saveAsTemplate, userId, nodeCount, partition, qos, account, comment, workingDirectory, output
, errorOutput, memory, scriptOutput } = request;
await checkActivatedClusters({ clusterIds: cluster });

Expand All @@ -240,13 +240,18 @@ export const jobServiceServer = plugin((server) => {
const sftp = await ssh.requestSFTP();
await createDirectoriesRecursively(sftp, workingDirectory);
});

const timeUnitConversion = {
[TimeUnit.MINUTES]: 1,
[TimeUnit.HOURS]: 60,
[TimeUnit.DAYS]: 60 * 24,
};
const maxTimeConversion = maxTime * (timeUnitConversion[maxTimeUnit]);
const reply = await callOnOne(
cluster,
logger,
async (client) => await asyncClientCall(client.job, "submitJob", {
userId, jobName, account, partition: partition!, qos, nodeCount, gpuCount: gpuCount || 0,
memoryMb: Number(memory?.split("M")[0]), coreCount, timeLimitMinutes: maxTime,
memoryMb: Number(memory?.split("M")[0]), coreCount, timeLimitMinutes: maxTimeConversion,
script: command, workingDirectory, stdout: output, stderr: errorOutput, extraOptions: [],
}).catch((e) => {
const ex = e as ServiceError;
Expand Down Expand Up @@ -289,6 +294,7 @@ export const jobServiceServer = plugin((server) => {
errorOutput,
memory,
scriptOutput,
maxTimeUnit,
};


Expand Down
2 changes: 2 additions & 0 deletions apps/portal-web/src/apis/api.mock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import { ClusterActivationStatus } from "@scow/config/build/type";
import type { RunningJob } from "@scow/protos/build/common/job";
import { JobInfo } from "@scow/protos/build/portal/job";
import { api } from "src/apis/api";
import { TimeUnit } from "src/models/job";
export type MockApi<TApi extends Record<
string,
(...args: any[]) => JsonFetchResultPromiseLike<any>>
Expand Down Expand Up @@ -198,6 +199,7 @@ export const mockApi: MockApi<typeof api> = {
output: "job.%j.out",
errorOutput: "job.%j.err",
workingDirectory: "/nfs/jobs/123",
maxTimeUnit: TimeUnit.MINUTES,
},
}),

Expand Down
1 change: 1 addition & 0 deletions apps/portal-web/src/i18n/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ export default {
gpuCount: "Number of GPU Cards per Node",
coreCount: "Number of CPU Cores per Node",
maxTime: "Maximum Running Time",
requireMaxTime: "Please enter the maximum runtime",
minute: "Minutes",
hours:"Hours",
days: "Days",
Expand Down
1 change: 1 addition & 0 deletions apps/portal-web/src/i18n/zh_cn.ts
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ export default {
gpuCount: "单节点GPU卡数",
coreCount: "单节点核心数",
maxTime: "最长运行时间",
requireMaxTime:"请输入最长运行时间",
minute: "分钟",
hours:"小时",
days: "天",
Expand Down
5 changes: 5 additions & 0 deletions apps/portal-web/src/models/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,8 @@ export enum AccountStatusFilter {
UNBLOCKED_ONLY = 2,
};

export enum TimeUnit {
MINUTES = 0,
HOURS = 1,
DAYS = 2,
}
84 changes: 45 additions & 39 deletions apps/portal-web/src/pageComponents/job/SubmitJobForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { SingleClusterSelector } from "src/components/ClusterSelector";
import { CodeEditor } from "src/components/CodeEditor";
import { ClusterNotAvailablePage } from "src/components/errorPages/ClusterNotAvailablePage";
import { prefix, useI18nTranslateToString } from "src/i18n";
import { AccountStatusFilter } from "src/models/job";
import { AccountStatusFilter, TimeUnit } from "src/models/job";
import { FileSelectModal } from "src/pageComponents/job/FileSelectModal";
import { Partition } from "src/pages/api/cluster";
import { ClusterInfoStore } from "src/stores/ClusterInfoStore";
Expand All @@ -42,8 +42,8 @@ interface JobForm {
command: string;
jobName: string;
qos: string | undefined;
maxTimeValue: number;
maxTimeUnit: "minutes" | "hours";
maxTime: number;
maxTimeUnit: TimeUnit | undefined;
account: string;
comment: string;
workingDirectory: string;
Expand All @@ -70,13 +70,13 @@ const initialValues = {
nodeCount: 1,
coreCount: 1,
gpuCount: 1,
maxTimeValue: 30,
maxTimeUnit:"minutes",
maxTime: 30,
maxTimeUnit: TimeUnit.MINUTES,
output: "job.%j.out",
scriptOutput:"job.%j.sh",
scriptOutput: "job.%j.sh",
errorOutput: "job.%j.err",
save: false,
showScriptOutput:true,
showScriptOutput: true,
} as Partial<JobForm>;

interface Props {
Expand All @@ -96,24 +96,22 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit


const cluster = Form.useWatch("cluster", form) as Cluster | undefined;
const timeUnitConversion = {
minutes: 1,
hours: 60,
days: 60 * 24,
};
const submit = async () => {
const formValues = await form.validateFields();
const { cluster, command, jobName, coreCount, gpuCount, workingDirectory, output, errorOutput, save,
maxTimeValue, maxTimeUnit, nodeCount, partition, qos, account, comment, showScriptOutput } = formValues;
maxTime, maxTimeUnit, nodeCount, partition, qos, account, comment, showScriptOutput } = formValues;
const scriptOutput = showScriptOutput ? formValues.scriptOutput : "";
const maxTime = maxTimeValue * (timeUnitConversion[maxTimeUnit] || 1);
await api.submitJob({ body: {
cluster: cluster.id, command, jobName, account,
coreCount: gpuCount ? gpuCount * Math.floor(currentPartitionInfo!.cores / currentPartitionInfo!.gpus) : coreCount,
gpuCount,
maxTime, nodeCount, partition, qos, comment,
workingDirectory, save, memory, output, errorOutput, scriptOutput,
} })

await api.submitJob({
body: {
cluster: cluster.id, command, jobName, account,
coreCount: gpuCount ? gpuCount * Math.floor(currentPartitionInfo!.cores
/ currentPartitionInfo!.gpus) : coreCount,
gpuCount,
maxTime, maxTimeUnit, nodeCount, partition, qos, comment,
workingDirectory, save, memory, output, errorOutput, scriptOutput,
},
})
.httpError(500, (e) => {
if (e.code === "SCHEDULER_FAILED") {
modal.error({
Expand Down Expand Up @@ -162,7 +160,7 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit
// 获取集群信息
const clusterInfoQuery = useAsync({
promiseFn: useCallback(async () => cluster
? api.getClusterInfo({ query: { cluster: cluster?.id } })
? api.getClusterInfo({ query: { cluster: cluster?.id } })
: undefined, [cluster]),
onResolve: () => {
const jobInitialName = genJobName();
Expand Down Expand Up @@ -247,12 +245,14 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit
setAccountPartitionsCacheMap({});
handlePartitionCacheMap({});

return cluster ? await api.getAccounts({ query: {
cluster: cluster.id,
statusFilter: AccountStatusFilter.UNBLOCKED_ONLY,
} })
return cluster ? await api.getAccounts({
query: {
cluster: cluster.id,
statusFilter: AccountStatusFilter.UNBLOCKED_ONLY,
},
})
.httpError(404, (error) => { message.error(error.message); })
: { accounts: [] as string [] };
: { accounts: [] as string[] };
}, [cluster, accountsReloadTrigger]),
onResolve: (data) => {

Expand All @@ -279,10 +279,12 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit
const account = form.getFieldValue("account");
if (cluster && account && selectableAccounts.includes(account) && !accountPartitionsCacheMap[account]) {
const newPartitionsMap = { ...accountPartitionsCacheMap };
return await api.getAvailablePartitionsForCluster({ query: {
cluster: cluster?.id,
accountName: account,
} })
return await api.getAvailablePartitionsForCluster({
query: {
cluster: cluster?.id,
accountName: account,
},
})
.then((data) => {
newPartitionsMap[account] = data.partitions;
// 如果第一次请求时模板值中分区存在,则填入模板值的分区及qos
Expand Down Expand Up @@ -360,8 +362,8 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit

const memorySize = (currentPartitionInfo ?
currentPartitionInfo.gpus ? nodeCount * gpuCount
* Math.floor(currentPartitionInfo.cores / currentPartitionInfo.gpus)
* Math.floor(currentPartitionInfo.memMb / currentPartitionInfo.cores) :
* Math.floor(currentPartitionInfo.cores / currentPartitionInfo.gpus)
* Math.floor(currentPartitionInfo.memMb / currentPartitionInfo.cores) :
nodeCount * coreCount * Math.floor(currentPartitionInfo.memMb / currentPartitionInfo.cores) : 0);
const memory = memorySize + "MB";
const memoryDisplay = formatSize(memorySize, ["MB", "GB", "TB"]);
Expand Down Expand Up @@ -410,10 +412,10 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit
dependencies={["cluster"]}
>
{/* 加载完集群后再加载账户,保证initial值能被赋值成功 */}
{ cluster?.id && unblockedAccountsQuery?.data?.accounts &&
{cluster?.id && unblockedAccountsQuery?.data?.accounts &&
(
<AccountListSelector
selectableAccounts={ selectableAccounts ?? []}
selectableAccounts={selectableAccounts ?? []}
isLoading={unblockedAccountsQuery.isLoading}
onReload={handleAccountsReload}
onChange={handleAccountChange}
Expand Down Expand Up @@ -510,7 +512,11 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit
<Col span={24} sm={6}>
<Form.Item label={t(p("maxTime"))} required>
<Input.Group compact style={{ display: "flex", minWidth: "120px" }}>
<Form.Item name="maxTimeValue" rules={[{ required: true }]} noStyle>
<Form.Item
name="maxTime"
rules={[{ required: true, message: `${t(p("requireMaxTime"))}` }]}
noStyle
>
<InputNumber
min={1}
step={1}
Expand All @@ -523,9 +529,9 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit
popupMatchSelectWidth={false}
style={{ flex: "0 1 auto" }}
>
<Select.Option value="minutes">{t(p("minute"))}</Select.Option>
<Select.Option value="hours">{t(p("hours"))}</Select.Option>
<Select.Option value="days">{t(p("days"))}</Select.Option>
<Select.Option value={TimeUnit.MINUTES}>{t(p("minute"))}</Select.Option>
<Select.Option value={TimeUnit.HOURS}>{t(p("hours"))}</Select.Option>
<Select.Option value={TimeUnit.DAYS}>{t(p("days"))}</Select.Option>
</Select>
</Form.Item>
</Input.Group>
Expand Down
6 changes: 3 additions & 3 deletions apps/portal-web/src/pages/api/job/getJobTemplate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import { typeboxRouteSchema } from "@ddadaal/next-typed-api-routes-runtime";
import { asyncUnaryCall } from "@ddadaal/tsgrpc-client";
import { status } from "@grpc/grpc-js";
import { JobServiceClient } from "@scow/protos/build/portal/job";
import { JobServiceClient, TimeUnit } from "@scow/protos/build/portal/job";
import { Static, Type } from "@sinclair/typebox";
import { authenticate } from "src/auth/server";
import { getClient } from "src/utils/client";
Expand All @@ -29,14 +29,14 @@ export const JobTemplate = Type.Object({
nodeCount: Type.Number(),
coreCount: Type.Number(),
gpuCount: Type.Optional(Type.Number()),
/** in minutes */
maxTime: Type.Number(),
maxTime: Type.Number(), // 最长运行时间
command: Type.String(),
workingDirectory: Type.String(),
output: Type.Optional(Type.String()),
errorOutput: Type.Optional(Type.String()),
comment: Type.Optional(Type.String()),
scriptOutput:Type.Optional(Type.String()),
maxTimeUnit: Type.Optional(Type.Enum(TimeUnit)), // 最长运行时间单位
});
export type JobTemplate = Static<typeof JobTemplate>;

Expand Down
6 changes: 4 additions & 2 deletions apps/portal-web/src/pages/api/job/submitJob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
import { typeboxRouteSchema } from "@ddadaal/next-typed-api-routes-runtime";
import { asyncUnaryCall } from "@ddadaal/tsgrpc-client";
import { status } from "@grpc/grpc-js";
import { JobServiceClient } from "@scow/protos/build/portal/job";
import { JobServiceClient, TimeUnit } from "@scow/protos/build/portal/job";
import { Static, Type } from "@sinclair/typebox";
import { authenticate } from "src/auth/server";
import { OperationResult, OperationType } from "src/models/operationLog";
Expand All @@ -40,6 +40,7 @@ export const SubmitJobInfo = Type.Object({
comment: Type.Optional(Type.String()),
save: Type.Boolean(),
scriptOutput:Type.Optional(Type.String()),
maxTimeUnit:Type.Optional(Type.Enum(TimeUnit)),
});

export type SubmitJobInfo = Static<typeof SubmitJobInfo>;
Expand Down Expand Up @@ -78,7 +79,7 @@ export default route(SubmitJobSchema, async (req, res) => {

if (!info) { return; }

const { cluster, command, jobName, coreCount, gpuCount, maxTime, save,
const { cluster, command, jobName, coreCount, gpuCount, maxTime, maxTimeUnit, save,
nodeCount, partition, qos, account, comment
, workingDirectory, output, errorOutput, scriptOutput, memory } = req.body;

Expand All @@ -99,6 +100,7 @@ export default route(SubmitJobSchema, async (req, res) => {
coreCount,
gpuCount,
maxTime,
maxTimeUnit,
nodeCount,
partition,
qos,
Expand Down
7 changes: 4 additions & 3 deletions apps/portal-web/src/pages/jobs/submit.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { api } from "src/apis";
import { requireAuth } from "src/auth/requireAuth";
import { PageTitle } from "src/components/PageTitle";
import { useI18nTranslateToString } from "src/i18n";
import { TimeUnit } from "src/models/job";
import { SubmitJobForm } from "src/pageComponents/job/SubmitJobForm";
import { ClusterInfoStore } from "src/stores/ClusterInfoStore";
import { getServerI18nConfigText, publicConfig } from "src/utils/config";
Expand Down Expand Up @@ -62,13 +63,13 @@ export const SubmitJobPage: NextPage<Props> = requireAuth(() => true)(
output: template.output,
errorOutput: template.errorOutput,
save: false,
scriptOutput:template.scriptOutput,
scriptOutput: template.scriptOutput,
maxTimeUnit: template.maxTimeUnit || TimeUnit.MINUTES,
}));
} else {
return undefined;
}
},
[cluster, jobTemplateId]),
}, [cluster, jobTemplateId]),
});

const t = useI18nTranslateToString();
Expand Down
Loading

0 comments on commit 3558bd4

Please sign in to comment.