Skip to content

Commit

Permalink
olmo option
Browse files Browse the repository at this point in the history
  • Loading branch information
AkshitaB committed Oct 26, 2023
1 parent 212216e commit 742c9f0
Show file tree
Hide file tree
Showing 9 changed files with 100,467 additions and 2 deletions.
1 change: 1 addition & 0 deletions llm_eval/steps/get_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def get_model_path(
checkpoint_dir=checkpoint_dir, local_dir=model_dir
)
except ImportError:
raise ImportError("Package `hf_olmo` cannot be found on the PYTHONPATH.")
model_name = os.path.basename(checkpoint_dir)
local_model_path = os.path.join(model_dir, model_name)

Expand Down
5 changes: 5 additions & 0 deletions llm_eval/steps/run_catwalk.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
from catwalk.utils import guess_instance_id
from tango.step import Step

try:
from hf_olmo import * # noqa: F403
except ImportError:
pass

logger = logging.getLogger(__name__)


Expand Down
7 changes: 5 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@ authors = [
]
requires-python = ">=3.10"

# TODO: update tango once the PR is merged, and new version released.
dependencies = [
# Add your own dependencies here
"ai2-catwalk @ git+https://github.com/allenai/catwalk.git@olmo-eval",
"ai2-tango[torch,transformers,fairscale,beaker,wandb,gs] @ git+https://github.com/allenai/tango.git@gs-bug-fixes",
"ai2-tango[torch,transformers,fairscale,beaker,wandb,gs]>=1.3.1",
"pygsheets"
]
license = {file = "LICENSE"}
Expand Down Expand Up @@ -55,6 +54,10 @@ dev = [
"sphinx-autodoc-typehints==1.23.3",
"packaging"
]
olmo = [
"hf_olmo @ git+https://github.com/allenai/LLM.git@main#subdirectory=hf_olmo",
"omegaconf"
]

[tool.setuptools.packages.find]
exclude = [
Expand Down
41 changes: 41 additions & 0 deletions test_fixtures/test-olmo-model/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{
"activation_type": "swiglu",
"alibi": false,
"alibi_bias_max": 8.0,
"architectures": [
"OlmoModelForCausalLM"
],
"attention_dropout": 0.1,
"attention_layer_norm": false,
"attention_layer_norm_with_affine": true,
"bias_for_layer_norm": null,
"block_type": "sequential",
"d_model": 32,
"embedding_dropout": 0.1,
"embedding_size": 50304,
"eos_token_id": 50256,
"flash_attention": false,
"include_bias": true,
"init_cutoff_factor": null,
"init_device": null,
"init_fn": "normal",
"init_std": 0.02,
"layer_norm_type": "default",
"layer_norm_with_affine": true,
"max_sequence_length": 1024,
"mlp_hidden_size": null,
"mlp_ratio": 4,
"model_type": "olmo",
"multi_query_attention": false,
"n_heads": 1,
"n_layers": 1,
"pad_token_id": 50256,
"precision": null,
"residual_dropout": 0.1,
"rope": false,
"scale_logits": false,
"transformers_version": "4.34.1",
"use_cache": true,
"vocab_size": 50257,
"weight_tying": true
}
89 changes: 89 additions & 0 deletions test_fixtures/test-olmo-model/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
run_name: null
seed: 6198
dry_run: false
model:
d_model: 32
n_heads: 1
n_layers: 1
mlp_ratio: 4
activation_type: swiglu
block_type: sequential
alibi: false
alibi_bias_max: 8.0
rope: false
flash_attention: false
attention_dropout: 0.1
multi_query_attention: false
attention_layer_norm: false
residual_dropout: 0.1
embedding_dropout: 0.1
layer_norm_type: default
max_sequence_length: 1024
include_bias: true
vocab_size: 50257
embedding_size: 50304
eos_token_id: 50256
pad_token_id: 50256
init_device: null
init_std: 0.02
precision: null
optimizer:
name: lionw
learning_rate: 0.0001
weight_decay: 0.01
betas:
- 0.9
- 0.95
no_decay_norm_and_bias: true
scheduler:
name: cosine_with_warmup
t_warmup: 100
t_max: null
alpha_f: 0.1
data:
paths: null
datasets: null
pad_direction: right
num_workers: 0
drop_last: false
pin_memory: false
prefetch_factor: null
persistent_workers: false
timeout: 0
restore_dataloader: true
fast_forward_batches: null
evaluators: []
eval_interval: 1000
tokenizer:
identifier: gpt2
truncate_direction: right
save_folder: ./
save_interval: 1000
save_interval_unsharded: null
save_num_checkpoints_to_keep: -1
save_num_unsharded_checkpoints_to_keep: -1
save_overwrite: false
force_save_unsharded: false
load_path: null
max_duration: 10000
global_train_batch_size: 512
device_train_batch_size: null
device_train_microbatch_size: 16
device_eval_batch_size: 16
eval_subset_num_batches: -1
eval_on_load: false
device_train_grad_accum: null
max_grad_norm: null
precision: null
wandb: null
speed_monitor:
window_size: 100
gpu_flops_available: null
console_log_interval: 1
compile: null
activation_checkpointing: false
fsdp:
use_orig_params: true
sharding_strategy: FULL_SHARD
softmax_auxiliary_loss: false
time_limit: 171000.0
Binary file added test_fixtures/test-olmo-model/model.pt
Binary file not shown.
3 changes: 3 additions & 0 deletions test_fixtures/test-olmo-model/special_tokens_map.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"eos_token": "<|endoftext|>"
}
Loading

0 comments on commit 742c9f0

Please sign in to comment.