Skip to content

Commit

Permalink
Fix DS version and opt issue
Browse files Browse the repository at this point in the history
  • Loading branch information
tjruwase committed Sep 13, 2023
1 parent c4f713b commit 2d9a6e6
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 31 deletions.
20 changes: 14 additions & 6 deletions inference/huggingface/zero_inference/run_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from packaging import version


assert version.parse(deepspeed.__version__) >= version.parse("0.10.2"), "ZeRO-Inference with weight quantization and kv cache offloading is available only in DeepSpeed 0.10.3+, please upgrade DeepSpeed"
assert version.parse(deepspeed.__version__) >= version.parse("0.10.3"), "ZeRO-Inference with weight quantization and kv cache offloading is available only in DeepSpeed 0.10.3+, please upgrade DeepSpeed"

def get_model_config(model_name):
if "175b" in model_name:
Expand Down Expand Up @@ -161,11 +161,19 @@ def run_generation(
return_token_type_ids = True
padding_side = "left" if config.model_type in ["opt"] else "right"

tokenizer = AutoTokenizer.from_pretrained(
model_name,
return_token_type_ids=return_token_type_ids,
padding_side=padding_side
)
if config.model_type == "opt":
tokenizer = AutoTokenizer.from_pretrained(
model_name.replace("175b", "66b"),
return_token_type_ids=return_token_type_ids,
padding_side=padding_side
)
else:
tokenizer = AutoTokenizer.from_pretrained(
model_name,
return_token_type_ids=return_token_type_ids,
padding_side=padding_side
)


tokenizer.pad_token = tokenizer.eos_token

Expand Down
50 changes: 25 additions & 25 deletions inference/huggingface/zero_inference/run_opt175b_a6000.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,38 +12,38 @@ BSZ=8
LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
mkdir -p $LOG_DIR

deepspeed --num_gpus 1 run_model.py --dummy --model ${FULL_MODEL_NAME} --batch-size ${BSZ} --disk-offload --gen-len 32 --pin-memory 0 --offload-dir ${OFFLOAD_DIR} &> $LOG_DIR/ds_${MODEL_NAME}_bs${BSZ}_disk.txt
# deepspeed --num_gpus 1 run_model.py --dummy --model ${FULL_MODEL_NAME} --batch-size ${BSZ} --disk-offload --gen-len 32 --pin-memory 0 --offload-dir ${OFFLOAD_DIR} &> $LOG_DIR/ds_${MODEL_NAME}_bs${BSZ}_disk.txt
deepspeed --num_gpus 1 run_model.py --dummy --model ${FULL_MODEL_NAME} --batch-size ${BSZ} --cpu-offload --gen-len 32 --pin-memory 0 --offload-dir ${OFFLOAD_DIR} --quant_bits ${QB} &> $LOG_DIR/ds_${MODEL_NAME}_bs${BSZ}_cpu_q${QB}.txt

BSZ=32
LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
mkdir -p $LOG_DIR
deepspeed --num_gpus 1 run_model.py --dummy --model ${FULL_MODEL_NAME} --batch-size ${BSZ} --disk-offload --gen-len 32 --pin-memory 0 --offload-dir ${OFFLOAD_DIR} --kv-offload &> $LOG_DIR/ds_${MODEL_NAME}_bs${BSZ}_disk_kv.txt
# BSZ=32
# LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
# mkdir -p $LOG_DIR
# deepspeed --num_gpus 1 run_model.py --dummy --model ${FULL_MODEL_NAME} --batch-size ${BSZ} --disk-offload --gen-len 32 --pin-memory 0 --offload-dir ${OFFLOAD_DIR} --kv-offload &> $LOG_DIR/ds_${MODEL_NAME}_bs${BSZ}_disk_kv.txt


BSZ=24
LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
mkdir -p $LOG_DIR
deepspeed --num_gpus 1 run_model.py --dummy --model ${FULL_MODEL_NAME} --batch-size ${BSZ} --cpu-offload --gen-len 32 --pin-memory 0 --offload-dir ${OFFLOAD_DIR} --quant_bits ${QB} --kv-offload &> $LOG_DIR/ds_${MODEL_NAME}_bs${BSZ}_cpu_q${QB}_kv.txt
# BSZ=24
# LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
# mkdir -p $LOG_DIR
# deepspeed --num_gpus 1 run_model.py --dummy --model ${FULL_MODEL_NAME} --batch-size ${BSZ} --cpu-offload --gen-len 32 --pin-memory 0 --offload-dir ${OFFLOAD_DIR} --quant_bits ${QB} --kv-offload &> $LOG_DIR/ds_${MODEL_NAME}_bs${BSZ}_cpu_q${QB}_kv.txt


# flexgen
OFFLOAD_DIR=/local_nvme/flexgen_offload
mkdir -p $OFFLOAD_DIR
# # flexgen
# OFFLOAD_DIR=/local_nvme/flexgen_offload
# mkdir -p $OFFLOAD_DIR

BSZ=16
LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
mkdir -p $LOG_DIR
python -m flexgen.flex_opt --model ${FULL_MODEL_NAME} --path _DUMMY_ --percent 0 0 100 0 100 0 --gpu-batch-size ${BSZ} --pin-weight 0 --num-gpu-batches 1 &> $LOG_DIR/fg_${MODEL_NAME}_bs${BSZ}_disk.txt
python -m flexgen.flex_opt --model ${FULL_MODEL_NAME} --path _DUMMY_ --percent 0 100 100 0 100 0 --gpu-batch-size ${BSZ} --pin-weight 0 --num-gpu-batches 1 --compress-weight &> $LOG_DIR/fg_${MODEL_NAME}_bs${BSZ}_cpu_q4.txt
# BSZ=16
# LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
# mkdir -p $LOG_DIR
# python -m flexgen.flex_opt --model ${FULL_MODEL_NAME} --path _DUMMY_ --percent 0 0 100 0 100 0 --gpu-batch-size ${BSZ} --pin-weight 0 --num-gpu-batches 1 &> $LOG_DIR/fg_${MODEL_NAME}_bs${BSZ}_disk.txt
# python -m flexgen.flex_opt --model ${FULL_MODEL_NAME} --path _DUMMY_ --percent 0 100 100 0 100 0 --gpu-batch-size ${BSZ} --pin-weight 0 --num-gpu-batches 1 --compress-weight &> $LOG_DIR/fg_${MODEL_NAME}_bs${BSZ}_cpu_q4.txt

BSZ=64
LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
mkdir -p $LOG_DIR
python -m flexgen.flex_opt --model ${FULL_MODEL_NAME} --path _DUMMY_ --percent 0 0 0 100 0 100 --gpu-batch-size ${BSZ} --offload-dir ${OFFLOAD_DIR} --pin-weight 0 --num-gpu-batches 1 &> $LOG_DIR/fg_${MODEL_NAME}_bs${BSZ}_cpu_disk.txt
# BSZ=64
# LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
# mkdir -p $LOG_DIR
# python -m flexgen.flex_opt --model ${FULL_MODEL_NAME} --path _DUMMY_ --percent 0 0 0 100 0 100 --gpu-batch-size ${BSZ} --offload-dir ${OFFLOAD_DIR} --pin-weight 0 --num-gpu-batches 1 &> $LOG_DIR/fg_${MODEL_NAME}_bs${BSZ}_cpu_disk.txt

BSZ=40
LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
mkdir -p $LOG_DIR
python -m flexgen.flex_opt --model ${FULL_MODEL_NAME} --path _DUMMY_ --percent 0 100 0 100 0 100 --gpu-batch-size ${BSZ} --pin-weight 0 --num-gpu-batches 1 --compress-weight &> $LOG_DIR/fg_${MODEL_NAME}_bs${BSZ}_cpu_q4.txt
# BSZ=40
# LOG_DIR=$BASE_LOG_DIR/${MODEL_NAME}_bs${BSZ}
# mkdir -p $LOG_DIR
# python -m flexgen.flex_opt --model ${FULL_MODEL_NAME} --path _DUMMY_ --percent 0 100 0 100 0 100 --gpu-batch-size ${BSZ} --pin-weight 0 --num-gpu-batches 1 --compress-weight &> $LOG_DIR/fg_${MODEL_NAME}_bs${BSZ}_cpu_q4.txt

0 comments on commit 2d9a6e6

Please sign in to comment.