scripts/eval/yamls/hf_lora_eval.yml

max_seq_len: 2048
seed: 1
precision: amp_fp16

# If you are using one model, put it here:
model_name_or_path: EleutherAI/gpt-neo-125m
# If you are using a seperated lora weight, put it here:
# lora weights must be compatible with the specified model
lora_id_or_path: edbeeching/gpt-neo-125M-imdb-lora # Example lora weights for gpt-neo-125m

# otherwise, write a block for each model you want to test in the `models` section

models:
-
  model_name: ${model_name_or_path}
  model:
    name: hf_causal_lm
    pretrained_model_name_or_path: ${model_name_or_path}
    init_device: mixed
    pretrained: true
    pretrained_lora_id_or_path: ${lora_id_or_path}
  tokenizer:
    name: ${model_name_or_path}
    kwargs:
      model_max_length: ${max_seq_len}
# # if you are evaluating more than one model, list them all as YAML blocks without variable interpolation
# -
#   model_name: mosaicml/mpt-7b
#   model:
#     name: hf_causal_lm
#     pretrained_model_name_or_path: mosaicml/mpt-7b
#     init_device: cpu
#     pretrained: true
#     config_overrides:
#       max_seq_len: ${max_seq_len}
#   tokenizer:
#     name: mosaicml/mpt-7b
#     kwargs:
#       model_max_length: ${max_seq_len}


device_eval_batch_size: 4

# FSDP config for model sharding
fsdp_config:
  sharding_strategy: FULL_SHARD
  mixed_precision: FULL

icl_tasks: 'eval/yamls/tasks_light.yaml'
eval_gauntlet: 'eval/yamls/eval_gauntlet.yaml'