Skip to content

Commit

Permalink
checkpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
bdevcich committed May 8, 2024
1 parent dc7f968 commit 34fe873
Show file tree
Hide file tree
Showing 81 changed files with 3,294 additions and 126 deletions.
7 changes: 7 additions & 0 deletions allocation-bug-pending-mount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
spec:
allocationSets:
- allocationSize: 10737418240
label: xfs
storage:
- name: kind-worker2
allocationCount: 2
3 changes: 3 additions & 0 deletions allocation-computes-bug-pending-mount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
data:
- name: "compute-01"
- name: "compute-02"
16 changes: 10 additions & 6 deletions allocation-computes.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
data:
- name: "compute-01"
- name: "compute-02"
- name: "compute-03"
- name: "compute-04"
# - name: "compute-node-3"
#- name: "compute-node-5"
# kind
# - name: "compute-01"
# - name: "compute-02"
# - name: "compute-03"
# - name: "compute-04"
# # dpa1/htx
- name: 'rabbit-compute-2'
- name: 'rabbit-compute-3'
- name: 'rabbit-compute-4'
- name: 'rabbit-compute-5'
16 changes: 10 additions & 6 deletions allocation-gfs2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,13 @@ spec:
- allocationSize: 50000000000
label: gfs2
storage:
- allocationCount: 3
name: kind-worker2
- allocationCount: 1
name: kind-worker3
# - allocationCount: 1
# name: rabbit-node-2
- name: kind-worker2
allocationCount: 3
- name: kind-worker3
allocationCount: 1
#- name: rabbit-node-0
# allocationCount: 2
# - name: rabbit-node-1
# allocationCount: 2
# - name: rabbit-node-2
# allocationCount: 2
8 changes: 5 additions & 3 deletions allocation-lustre-global.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@ spec:
storage:
- allocationCount: 1
# name: kind-worker2
name: rabbit-node-2
name: rabbit-node-1
- allocationSize: 50000000000
label: mgtmdt
# label: mgtmdt
# for external mgs
label: mdt
storage:
- allocationCount: 1
# name: kind-worker2
name: rabbit-node-2
name: rabbit-node-1
20 changes: 11 additions & 9 deletions allocation-lustre.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@ spec:
- allocationSize: 50000000000
label: ost
storage:
- allocationCount: 1
name: kind-worker2
- allocationCount: 1
name: kind-worker3
- allocationCount: 2
# name: kind-worker2
# name: rabbit-node-1
name: rabbit-node-2
- allocationSize: 50000000000
label: mgtmdt
# Use mdt when using externalMgs
label: mdt
#label: mgtmdt
storage:
- allocationCount: 1
name: kind-worker2
- allocationCount: 1
name: kind-worker3
- allocationCount: 2
# name: kind-worker2
# name: rabbit-node-1
name: rabbit-node-2
21 changes: 15 additions & 6 deletions allocation-xfs.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
allocationSets:
- allocationSize: 50000000000
label: xfs
storage:
- allocationCount: 1
name: kind-worker2
spec:
allocationSets:
- allocationSize: 50000000000
label: xfs
storage:
# - name: kind-worker2
# allocationCount: 3
# - name: kind-worker3
# allocationCount: 1
# - name: rabbit-node-0
# allocationCount: 2
#- name: rabbit-node-1
# allocationCount: 2
- name: rabbit-node-2
allocationCount: 2
14 changes: 14 additions & 0 deletions bug-pending-mount.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: dataworkflowservices.github.io/v1alpha2
kind: Workflow
metadata:
name: fluxjob-456407384064
namespace: default
spec:
desiredState: Proposal
dwDirectives:
- '#DW jobdw capacity=10GiB type=xfs name=project1'
groupID: 31193
hurry: false
jobID: fCzNA9Hh
userID: 31193
wlmID: flux
15 changes: 15 additions & 0 deletions check_daemons.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/usr/bin/env bash

set -e

for (( c=0; c<=3; c++ )); do
# ssh compute-node-$c systemctl stop clientmount
# ssh compute-node-$c systemctl daemon-reload
# ssh compute-node-$c systemctl start clientmount
ssh compute-node-$c systemctl status clientmount

# ssh compute-node-$c systemctl stop nnf-dm
# ssh compute-node-$c systemctl daemon-reload
# ssh compute-node-$c systemctl start nnf-dm
ssh compute-node-$c systemctl status nnf-dm
done
16 changes: 16 additions & 0 deletions container-mpi-global.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: dataworkflowservices.github.io/v1alpha1
kind: Workflow
metadata:
name: blake-container-mpi-global
namespace: default
spec:
desiredState: 'Proposal'
dwDirectives:
- '#DW jobdw name=blake-gfs2-mpi type=gfs2 capacity=50GB'
- "#DW container name=blake-container-mpi-global profile=example-mpi \
DW_JOB_foo_local_storage=blake-gfs2-mpi \
DW_GLOBAL_foo_global_lustre=/lus/global"
wlmID: '5f239bd8-30db-450b-8c2c-a1a7c8631a1a'
jobID: 900002
userID: 1050
groupID: 1051
17 changes: 17 additions & 0 deletions container-mpi.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
apiVersion: dataworkflowservices.github.io/v1alpha2
kind: Workflow
metadata:
name: blake-container-mpi
namespace: default
spec:
desiredState: 'Proposal'
dwDirectives:
- '#DW jobdw name=blake-gfs2-mpi type=gfs2 capacity=50GB'
# - "#DW persistentdw name=blake-persistent"
- "#DW container name=blake-container-mpi profile=example-mpi-webserver \
DW_JOB_foo_local_storage=blake-gfs2-mpi"
# DW_PERSISTENT_foo-persistent-storage=blake-persistent"
wlmID: '5f239bd8-30db-450b-8c2c-a1a7c8631a1a'
jobID: 900002
userID: 1500
groupID: 1600
22 changes: 11 additions & 11 deletions container.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
apiVersion: dws.cray.hpe.com/v1alpha1
apiVersion: dataworkflowservices.github.io/v1alpha2
kind: Workflow
metadata:
name: blake-container
name: webserver2
namespace: default
spec:
desiredState: "Proposal"
desiredState: 'Proposal'
dwDirectives:
- "#DW jobdw name=blake-gfs2 type=gfs2 capacity=50GB"
#- "#DW jobdw name=blake-gfs2 type=gfs2 capacity=50GB"
# - "#DW persistentdw name=blake-persistent"
- "#DW container name=blake-container profile=example-forever \
DW_JOB_foo-local-storage=blake-gfs2"
# DW_PERSISTENT_foo-persistent-storage=blake-persistent"
wlmID: "5f239bd8-30db-450b-8c2c-a1a7c8631a1a"
- '#DW container name=webserver2 profile=example-mpi-webserver'
# DW_PERSISTENT_foo-persistent-storage=blake-persistent"
wlmID: '5f239bd8-30db-450b-8c2c-a1a7c8631a1a'
jobID: 900001
userID: 1041
groupID: 999

# userID: 1050
# groupID: 1200
userID: 1050
groupID: 1051
6 changes: 3 additions & 3 deletions containers-persistent-storage-destroy.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
apiVersion: dws.cray.hpe.com/v1alpha1
apiVersion: dataworkflowservices.github.io/v1alpha1
kind: Workflow
metadata:
name: containers-persistent-storage-destroy
namespace: default
spec:
desiredState: Proposal
dwDirectives:
- '#DW destroy_persistent name=containers-persistent-storage'
- '#DW destroy_persistent name=containers-persistent-storage'
groupID: 0
hurry: false
jobID: 2
userID: 0
wlmID: "2"
wlmID: '2'
19 changes: 9 additions & 10 deletions containerx2.yaml
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
apiVersion: dws.cray.hpe.com/v1alpha1
apiVersion: dataworkflowservices.github.io/v1alpha1
kind: Workflow
metadata:
name: blake-container
namespace: default
spec:
desiredState: "Proposal"
desiredState: 'Proposal'
dwDirectives:
- "#DW jobdw name=blake-gfs2 type=gfs2 capacity=50GB"
- "#DW persistentdw name=blake-persistent"
- '#DW jobdw name=blake-gfs2 type=gfs2 capacity=50GB'
- '#DW persistentdw name=blake-persistent'
- "#DW container name=blake-container profile=example-randomly-fail \
DW_JOB_foo-local-storage=blake-gfs2 \
DW_PERSISTENT_foo-persistent-storage=blake-persistent"
DW_JOB_foo-local-storage=blake-gfs2 \
DW_PERSISTENT_foo-persistent-storage=blake-persistent"
- "#DW container name=blake-container2 profile=example-success \
DW_JOB_foo-local-storage=blake-gfs2 \
DW_PERSISTENT_foo-persistent-storage=blake-persistent"
wlmID: "5f239bd8-30db-450b-8c2c-a1a7c8631a1a"
DW_JOB_foo-local-storage=blake-gfs2 \
DW_PERSISTENT_foo-persistent-storage=blake-persistent"
wlmID: '5f239bd8-30db-450b-8c2c-a1a7c8631a1a'
jobID: 900001
userID: 1041
groupID: 999

15 changes: 15 additions & 0 deletions copy-in-out.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
apiVersion: dataworkflowservices.github.io/v1alpha2
kind: Workflow
metadata:
name: blake-gfs2-copy-in-out
namespace: default
spec:
desiredState: 'Proposal'
dwDirectives:
- '#DW jobdw type=gfs2 capacity=50GB name=blake-gfs2-copy-in-out'
- '#DW copy_in source=/lus/global/mpiuser/perl destination=$DW_JOB_blake-gfs2-copy-in-out/'
- '#DW copy_out source=$DW_JOB_blake-gfs2-copy-in-out/perl destination=/lus/global/mpiuser/perl2'
wlmID: '5f239bd8-30db-450b-8c2c-a1a7c8631a1a'
jobID: 900003
userID: 1050
groupID: 1051
14 changes: 14 additions & 0 deletions copy-xfs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: dataworkflowservices.github.io/v1alpha2
kind: Workflow
metadata:
name: blake-xfs-copy-in
namespace: default
spec:
desiredState: 'Proposal'
dwDirectives:
- '#DW jobdw type=xfs capacity=50GB name=blake-xfs'
- '#DW copy_in source=/lus/global/user/test.in destination=$DW_JOB_blake-xfs/'
wlmID: '5f239bd8-30db-450b-8c2c-a1a7c8631a1a'
jobID: 900003
userID: 1010
groupID: 1010
15 changes: 7 additions & 8 deletions copy.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
apiVersion: dws.cray.hpe.com/v1alpha1
apiVersion: dataworkflowservices.github.io/v1alpha2
kind: Workflow
metadata:
name: blake-gfs2-copy-in
namespace: default
spec:
desiredState: "Proposal"
desiredState: 'Proposal'
dwDirectives:
- "#DW jobdw type=gfs2 capacity=1GB name=blake-gfs2-src"
- "#DW jobdw type=gfs2 capacity=1GB name=blake-gfs2-dest"
- "#DW copy_in source=$DW_JOB_blake-gfs2-src destination=$DW_JOB_blake-gfs2-dest"
wlmID: "5f239bd8-30db-450b-8c2c-a1a7c8631a1a"
- '#DW jobdw type=gfs2 capacity=50GB name=blake-gfs2'
- '#DW copy_in source=/lus/global/mpiuser/perl destination=$DW_JOB_blake-gfs2/'
wlmID: '5f239bd8-30db-450b-8c2c-a1a7c8631a1a'
jobID: 900002
userID: 1041
groupID: 999
userID: 1050
groupID: 1051
10 changes: 10 additions & 0 deletions dm-env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

src=$(kubectl get workflow blake-gfs2 -oyaml | yq '.status.env.DW_JOB*')
# /mnt/nnf/34d5c01c-534d-4249-af5c-b1f233df37ce-0

dest=$(kubectl get workflow blake-global-lustre -oyaml | yq '.status.env.DW_JOB*')
# /mnt/nnf/8006ba28-f51f-426e-89fc-fe742c639556-0

echo "export NNF_SRC=${src}"
echo "export NNF_DEST=${dest}"
Loading

0 comments on commit 34fe873

Please sign in to comment.