fix bugs when no model gauntlet is provided

mosaicml · Jul 6, 2023 · aa70f78 · aa70f78
1 parent 4478fdb
commit aa70f78
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 27 deletions.
diff --git a/mcli/mcli-hf-eval.yaml b/mcli/mcli-hf-eval.yaml
@@ -1,7 +1,7 @@
 integrations:
 - integration_type: git_repo
   git_repo: mosaicml/llm-foundry
-  git_branch: remove_try_catch # v0.2.0
+  git_branch: v0.2.0
   # git_commit: # OR use your commit hash
   pip_install: -e ".[gpu]"
   ssh_clone: false # Should be true if using a private repo
@@ -11,10 +11,10 @@ command: |
   composer eval/eval.py /mnt/config/parameters.yaml
 
 # Mosaic Cloud will use run_name (with a unique suffix) to populate the env var $RUN_NAME
-run_name: docstring_eval
+run_name: all-eval
 gpu_num: 8
-gpu_type: a100_80gb
-cluster: r1z1 # replace with your cluster here!
+# gpu_type:
+# cluster: # replace with your cluster here!
 
 image: mosaicml/llm-foundry:2.0.1_cu118-latest
 
@@ -28,16 +28,16 @@ parameters:
 
   models:
   -
-    model_name: mosaicml/mpt-7b
+    model_name: mosaicml/mpt-7b-instruct
     # Tokenizer
     tokenizer:
-      name: mosaicml/mpt-7b
+      name: EleutherAI/gpt-neox-20b
       kwargs:
         model_max_length: ${max_seq_len}
 
     model:
       name: hf_causal_lm
-      pretrained_model_name_or_path: mosaicml/mpt-7b
+      pretrained_model_name_or_path: mosaicml/mpt-7b-instruct
       init_device: cpu
       pretrained: true
       use_auth_token: false
@@ -286,12 +286,5 @@ parameters:
     sharding_strategy: FULL_SHARD
     mixed_precision: FULL
 
-  icl_tasks:
-  -
-    label: composer_docstring_eval
-    dataset_uri: oci://mosaicml-internal-checkpoints/support-bot-demo/data/eval/composer_docstrings.jsonl
-    num_fewshot: [1, 3, 5]
-    icl_task_type: language_modeling
-    continuation_delimiter: "\nAnswer"
-    batch_size: 8
-    max_seq_len: ${max_seq_len}
+  icl_tasks: 'eval/yamls/tasks.yaml'
+  model_gauntlet: 'eval/yamls/model_gauntlet.yaml'
diff --git a/scripts/eval/eval.py b/scripts/eval/eval.py
@@ -199,7 +199,7 @@ def calculate_markdown_results(logger_keys, logger_data, benchmark_to_taxonomy,
                 subscores = results[num_shot][benchmark][metric]
                 if len(subscores) == 1:
                     row = {
-                        'Category': benchmark_to_taxonomy[benchmark],
+                        'Category': benchmark_to_taxonomy.get(benchmark, ""),
                         'Benchmark': benchmark,
                         'Subtask': None,
                         'Accuracy': subscores[0]['val'],
@@ -210,7 +210,7 @@ def calculate_markdown_results(logger_keys, logger_data, benchmark_to_taxonomy,
                 else:
                     row = {
                         'Category':
-                            benchmark_to_taxonomy[benchmark],
+                            benchmark_to_taxonomy.get(benchmark, ""),
                         'Benchmark':
                             benchmark,
                         'Subtask':
@@ -225,7 +225,7 @@ def calculate_markdown_results(logger_keys, logger_data, benchmark_to_taxonomy,
                     df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
                     for sub in subscores:
                         row = {
-                            'Category': benchmark_to_taxonomy[benchmark],
+                            'Category': benchmark_to_taxonomy.get(benchmark, ""),
                             'Benchmark': None,
                             'Subtask': sub['subcat'],
                             'Accuracy': sub['val'],

diff --git a/scripts/eval/yamls/hf_eval.yaml b/scripts/eval/yamls/hf_eval.yaml
@@ -41,12 +41,5 @@ fsdp_config:
   sharding_strategy: FULL_SHARD
   mixed_precision: FULL
 
-icl_tasks:
--
-    label: composer_docstring_eval
-    dataset_uri: oci://mosaicml-internal-checkpoints/support-bot-demo/data/eval/composer_docstrings.jsonl
-    num_fewshot: [1, 3, 5]
-    icl_task_type: language_modeling
-    continuation_delimiter: "\nAnswer"
-    batch_size: 8
+icl_tasks: 'eval/yamls/tasks_light.yaml'
 model_gauntlet: 'eval/yamls/model_gauntlet.yaml'