-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathtrain.yaml
49 lines (48 loc) · 1.07 KB
/
train.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
apiVersion: v1
kind: Pod
metadata:
generateName: aggrmean-seq10-pred2-
labels:
type: interactive
research-group: rl-team
spec:
hostIPC: true
restartPolicy: Never
containers:
- name: nri-train
command: ["python", "-u", "train.py",
--name="fulltraining_lr1e-7_sparsity",
--epochs=200,
--batch_size=2,
--seq_len=49,
--predicted_timesteps=10,
--lr=1e-7,
--batch_limit=0,
--sparsity_prior,
]
# command: ["sleep", "infinity"]
image: amr-registry.caas.intel.com/aipg/mpettee-pytorch-geometric
workingDir: /workspace/choreo-graph/
resources:
limits:
nvidia.com/gpu: 1
cpu: 4
memory: 50Gi
volumeMounts:
- mountPath: "/workspace"
name: workspace
envFrom:
- configMapRef:
name: proxy-config
env:
- name: LD_LIBRARY_PATH
value: $LD_LIBRARY_PATH:/.mujoco/mjpro150/bin
- name: OMP_NUM_THREADS
value: "1"
securityContext:
fsGroup: 17685
runAsUser: 11964579
volumes:
- name: workspace
persistentVolumeClaim:
claimName: workdisk