Skip to content

Commit

Permalink
scrap->validate_tokenizer. Remove profiler stuff. Remove unnecessary …
Browse files Browse the repository at this point in the history
…fields from config files
  • Loading branch information
KevinMusgrave committed Feb 21, 2024
1 parent 7696c20 commit f700eda
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 548 deletions.
15 changes: 3 additions & 12 deletions blog/llm-finetuning/deepspeed.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
name: mistral deepspeed easy WarmupDecayLR batch size 1 bf16, genai image
name: mistral deepspeed easy
debug: false
workspace: agnieszka
project: llm-blog2
profiling:
enabled: true
begin_on_batch: 100
end_after_batch: 1000
environment:
environment_variables:
- NCCL_DEBUG=INFO
- HF_HOME=/nvmefs1/agnieszka.ciborowska/hf_cache
image: determinedai/genai-train:latest
resources:
slots_per_trial: 2
resource_pool: A100
searcher:
name: single
max_length:
Expand All @@ -22,13 +14,12 @@ searcher:
smaller_is_better: false
hyperparameters:
model: "mistralai/Mistral-7B-Instruct-v0.2"
#model: "TinyLlama/TinyLlama-1.1B-Chat-v0.4"
dataset_subset: "easy"
lora: false
training_args:
output_dir: "/tmp/llm_finetuning"
max_steps: 5000
per_device_train_batch_size: 1
per_device_train_batch_size: 2
per_device_eval_batch_size: 4
bf16: true
evaluation_strategy: "steps"
Expand All @@ -41,5 +32,5 @@ hyperparameters:
deepspeed: "ds_configs/ds_config_stage_3.json"
entrypoint: >-
python -m determined.launch.deepspeed
python finetune_with_profiling.py
python finetune.py
max_restarts: 0
15 changes: 3 additions & 12 deletions blog/llm-finetuning/distributed.yaml
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
name: mistral easy medium bf16
name: mistral lora easy
debug: false
workspace: agnieszka
project: llm-blog2
profiling:
enabled: true
begin_on_batch: 100
end_after_batch: 1000
environment:
environment_variables:
- NCCL_DEBUG=INFO
- HF_HOME=/nvmefs1/agnieszka.ciborowska/hf_cache
image: determinedai/environments-dev:python-3.10-pytorch-2.0-deepspeed-0.10.0-smartsim
resources:
slots_per_trial: 2
resource_pool: A100
searcher:
name: single
max_length:
Expand All @@ -22,13 +14,12 @@ searcher:
smaller_is_better: false
hyperparameters:
model: "mistralai/Mistral-7B-Instruct-v0.2"
#model: "TinyLlama/TinyLlama-1.1B-Chat-v0.4"
dataset_subset: "easy"
lora: true
training_args:
output_dir: "/tmp/llm_finetuning"
max_steps: 5000
per_device_train_batch_size: 1
per_device_train_batch_size: 8
per_device_eval_batch_size: 4
bf16: true
evaluation_strategy: "steps"
Expand All @@ -40,5 +31,5 @@ hyperparameters:
learning_rate: 1e-5
entrypoint: >-
python -m determined.launch.torch_distributed
python finetune_with_profiling.py
python finetune.py
max_restarts: 0
193 changes: 0 additions & 193 deletions blog/llm-finetuning/finetune_with_profiling.py

This file was deleted.

Loading

0 comments on commit f700eda

Please sign in to comment.