Skip to content

Commit

Permalink
Jobs are waiting for previous jobs to finish
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel-unyi-42 authored Oct 16, 2024
1 parent cb79d9b commit ceeba81
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions submit_job.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ SINGULARITY_IMAGE="segger_dev_latest.sif" # Path to the Singularity image

# Functions to run different pipelines
run_data_processing() {
bsub -o "$OUTPUT_LOG_PREPROCESS" -n "$N_WORKERS_PREPROCESS" -R "rusage[mem=$RAM_PREPROCESS]" -q long \
"singularity exec --bind $LOCAL_REPO_DIR:$CONTAINER_DIR \
bsub -J "job_data_processing" -o "$OUTPUT_LOG_PREPROCESS" -n "$N_WORKERS_PREPROCESS" -R "rusage[mem=$RAM_PREPROCESS]" -q long \
"singularity exec --bind $LOCAL_REPO_DIR:$CONTAINER_DIR --pwd $CONTAINER_DIR \
$SINGULARITY_IMAGE python3 src/segger/cli/create_dataset_fast.py \
--base_dir '$BASE_DIR' \
--data_dir '$DATA_DIR' \
Expand All @@ -53,8 +53,8 @@ run_data_processing() {
}

run_training() {
bsub -o "$OUTPUT_LOG_TRAIN" -n "$N_WORKERS_TRAIN" -R "rusage[mem=$RAM_TRAIN]" -R "tensorcore" -gpu "num=$GPUS:j_exclusive=no:gmem=$GPU_MEM_TRAIN" -q gpu \
"singularity exec --nv --bind $LOCAL_REPO_DIR:$CONTAINER_DIR \
bsub -J "job_training" -w "done(job_data_processing)" -o "$OUTPUT_LOG_TRAIN" -n "$N_WORKERS_TRAIN" -R "rusage[mem=$RAM_TRAIN]" -R "tensorcore" -gpu "num=$GPUS:j_exclusive=no:gmem=$GPU_MEM_TRAIN" -q gpu \
"singularity exec --nv --bind $LOCAL_REPO_DIR:$CONTAINER_DIR --pwd $CONTAINER_DIR \
$SINGULARITY_IMAGE python3 src/segger/cli/train_model.py \
--dataset_dir '$DATASET_DIR' \
--models_dir '$MODELS_DIR' \
Expand All @@ -64,8 +64,8 @@ run_training() {
}

run_prediction() {
bsub -o "$OUTPUT_LOG_PREDICT" -n "$N_WORKERS_PREDICT" -R "rusage[mem=$RAM_PREDICT]" -R "tensorcore" -gpu "num=1:j_exclusive=no:gmem=$GPU_MEM_PREDICT" -q gpu \
"singularity exec --nv --bind $LOCAL_REPO_DIR:$CONTAINER_DIR \
bsub -J "job_prediction" -w "done(job_training)" -o "$OUTPUT_LOG_PREDICT" -n "$N_WORKERS_PREDICT" -R "rusage[mem=$RAM_PREDICT]" -R "tensorcore" -gpu "num=1:j_exclusive=no:gmem=$GPU_MEM_PREDICT" -q gpu \
"singularity exec --nv --bind $LOCAL_REPO_DIR:$CONTAINER_DIR --pwd $CONTAINER_DIR \
$SINGULARITY_IMAGE python3 src/segger/cli/predict.py \
--segger_data_dir '$SEGGER_DATA_DIR' \
--models_dir '$MODELS_DIR' \
Expand Down

0 comments on commit ceeba81

Please sign in to comment.