Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DeepSpeed-Chat] feat: Support local path for actor, reward models. #485

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,5 @@ dmypy.json

# Pyre type checker
.pyre/

applications/DeepSpeed-Chat/output
6 changes: 3 additions & 3 deletions applications/DeepSpeed-Chat/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ def parse_args():
)
parser.add_argument(
"--actor-model",
type=lambda x: x.replace("facebook/opt-", ""),
type=lambda pth: [fn.split("-")[-1] for fn in pth.split("/") if "opt" in fn][-1],
default="1.3b",
choices=("1.3b", "6.7b", "13b", "66b"),
choices=("350m", "1.3b", "6.7b", "13b", "66b"),
help="Which facebook/opt-* model to use for Actor (step 1)",
)
parser.add_argument(
"--reward-model",
type=lambda x: x.replace("facebook/opt-", ""),
type=lambda pth: [fn.split("-")[-1] for fn in pth.split("/") if "opt" in fn][-1],
default="350m",
choices=("350m"),
help="Which facebook/opt-* model to use for Reward (step 2)",
Expand Down
5 changes: 5 additions & 0 deletions applications/DeepSpeed-Chat/training/base.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

MODEL_STORAGE="/data/yiminjiang/models/"
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@

# DeepSpeed Team

# Get the directory path of the current script file
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

echo "The directory path of the current script file is: $CURRENT_DIR"

source $CURRENT_DIR/../../../base.sh

# Note that usually LoRA needs to use larger learning rate
OUTPUT=$1
ZERO_STAGE=$2
if [ "$OUTPUT" == "" ]; then
OUTPUT=./output
fi
if [ "$ZERO_STAGE" == "" ]; then
ZERO_STAGE=0
fi
OUTPUT=${1:-output}
ZERO_STAGE=${2:-0}
mkdir -p $OUTPUT

deepspeed --num_gpus 1 main.py --model_name_or_path facebook/opt-1.3b \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team

# Get the directory path of the current script file
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

echo "The directory path of the current script file is: $CURRENT_DIR"

source $CURRENT_DIR/../../../base.sh

# Note that usually LoRA needs to use larger learning rate
OUTPUT=${1:-output}
ZERO_STAGE=${2:-0}
mkdir -p $OUTPUT

run_cmd="deepspeed --num_gpus 1 main.py \
--model_name_or_path ${MODEL_STORAGE}facebook/opt-350m \
--gradient_accumulation_steps 8 \
--lora_dim 128 --zero_stage $ZERO_STAGE \
--deepspeed --output_dir $OUTPUT &> $OUTPUT/training.log"

echo ${run_cmd}
eval ${run_cmd}
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,17 @@

# DeepSpeed Team

# Get the directory path of the current script file
CURRENT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

echo "The directory path of the current script file is: $CURRENT_DIR"

source $CURRENT_DIR/../../../base.sh

# Note that usually LoRA needs to use larger learning rate
OUTPUT_PATH=./output
mkdir -p $OUTPUT_PATH
OUTPUT=${1:-output}
ZERO_STAGE=${2:-0}
mkdir -p $OUTPUT

deepspeed --num_gpus 1 main.py \
--data_path Dahoas/rm-static Dahoas/full-hh-rlhf Dahoas/synthetic-instruct-gptj-pairwise yitingxie/rlhf-reward-datasets \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def create_hf_model(model_class,
model = model_class.from_pretrained(
model_name_or_path,
from_tf=bool(".ckpt" in model_name_or_path),
config=model_config)
config=model_config, trust_remote_code=True)

model.config.end_token_id = tokenizer.eos_token_id
model.config.pad_token_id = model.config.eos_token_id
Expand Down
5 changes: 5 additions & 0 deletions applications/DeepSpeed-Chat/training/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def update(self, num):


def load_hf_tokenizer(model_name_or_path, fast_tokenizer=True):
try:
return AutoTokenizer.from_pretrained(model_name_or_path,
fast_tokenizer=True)
except:
pass
if os.path.exists(model_name_or_path):
# Locally tokenizer loading has some issue, so we need to force download
model_json = os.path.join(model_name_or_path, "config.json")
Expand Down