support multi-cards magnitude pruning (#1202)

Signed-off-by: Lv, Kaokao <[email protected]> Signed-off-by: Xinyu Ye <[email protected]> Co-authored-by: Xinyu Ye <[email protected]>
intel · Sep 1, 2023 · 9096188 · 9096188
1 parent a01ba8b
commit 9096188
Show file tree

Hide file tree

Showing 7 changed files with 1,998 additions and 0 deletions.
diff --git a/...es/pytorch/nlp/huggingface_models/language-modeling/pruning/magnitude/README.md b/...es/pytorch/nlp/huggingface_models/language-modeling/pruning/magnitude/README.md
@@ -0,0 +1,107 @@
+Step-by-Step
+============
+
+# single GPU
+
+```
+export CUDA_VISIBLE_DEVICES=0
+bash run.sh \
+    --model_name_or_path=facebook/opt-125m \
+    --dataset_name=NeelNanda/pile-10k \
+    --block_size=128 \
+    --output_dir=./test-clm \
+    --pruning_type=magnitude \
+    --pruning_pattern=4x1 \
+    --pruning_frequency=1000
+```
+
+# multi GPU
+
+we use `accelerate` and `deepspeed ZeRO Stage-2` to conduct weight magnitude pruning
+
+### Accelerate DeepSpeed Plugin
+
+On your machine(s) just run:
+```
+accelerate config
+```
+
+and answer the questions asked. It will ask whether you want to use a config file for DeepSpeed to which you should answer no. Then answer the following questions to generate a basic DeepSpeed config. This will generate a config file that will be used automatically to properly set the default options when doing
+
+For instance,
+
+```
+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+ deepspeed_config_file: config/zero_stage2_config.json
+ zero3_init_flag: true
+distributed_type: DEEPSPEED
+fsdp_config: {}
+machine_rank: 0
+main_process_ip: null
+main_process_port: null
+main_training_function: main
+mixed_precision: fp16
+num_machines: 1
+num_processes: 2
+use_cpu: false
+```
+with the contents of `config/zero_stage2_config.json` being:
+
+```
+{
+  "train_batch_size": 64,
+  "train_micro_batch_size_per_gpu": 8,
+  "gradient_accumulation_steps": 4,
+  "fp16": {
+    "enabled": true,
+    "min_loss_scale": 1,
+    "opt_level": "O2"
+  },
+  "zero_optimization": {
+    "stage": 2,
+    "offload_param": {
+      "device": "cpu"
+    },
+    "offload_optimizer": {
+      "device": "cpu"
+    },
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "contiguous_gradients": true
+  },
+  "optimizer": {
+    "type": "AdamW",
+    "params": {
+      "lr": "auto",
+      "torch_adam": true,
+      "adam_w_mode": true
+    }
+  },
+  "scheduler": {
+    "type": "WarmupDecayLR",
+    "params": {
+      "warmup_min_lr": 0.0,
+      "warmup_max_lr": "auto",
+      "warmup_num_steps": "auto",
+      "total_num_steps": "auto",
+      "warmup_type": "cosine"
+    }
+  }
+}
+```
+
+### pruning
+
+```
+# 2 gpu cards example
+export CUDA_VISIBLE_DEVICES=0,1
+bash run_ds.sh \
+    --model_name_or_path=facebook/opt-125m \
+    --dataset_name=NeelNanda/pile-10k \
+    --block_size=128 \
+    --output_dir=./test-clm \
+    --pruning_type=magnitude \
+    --pruning_pattern=4x1 \
+    --pruning_frequency=1000
+```
diff --git a/...nlp/huggingface_models/language-modeling/pruning/magnitude/config/zero_stage2_config.json b/...nlp/huggingface_models/language-modeling/pruning/magnitude/config/zero_stage2_config.json
@@ -0,0 +1,40 @@
+{
+  "train_batch_size": 64,
+  "train_micro_batch_size_per_gpu": 8,
+  "gradient_accumulation_steps": 4,
+  "fp16": {
+    "enabled": true,
+    "min_loss_scale": 1,
+    "opt_level": "O2"
+  },
+  "zero_optimization": {
+    "stage": 2,
+    "offload_param": {
+      "device": "cpu"
+    },
+    "offload_optimizer": {
+      "device": "cpu"
+    },
+    "allgather_partitions": true,
+    "allgather_bucket_size": 5e8,
+    "contiguous_gradients": true
+  },
+  "optimizer": {
+    "type": "AdamW",
+    "params": {
+      "lr": "auto",
+      "torch_adam": true,
+      "adam_w_mode": true
+    }
+  },
+  "scheduler": {
+    "type": "WarmupDecayLR",
+    "params": {
+      "warmup_min_lr": 0.0,
+      "warmup_max_lr": "auto",
+      "warmup_num_steps": "auto",
+      "total_num_steps": "auto",
+      "warmup_type": "cosine"
+    }
+  }
+}
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/magnitude/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/magnitude/requirements.txt
@@ -0,0 +1,8 @@
+accelerate
+datasets
+sentencepiece
+transformers
+torch
+tqdm
+optimum
+einops
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/magnitude/run.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/magnitude/run.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+set -x
+
+function main {
+
+  init_params "$@"
+  run_pruning
+
+}
+
+# init params
+function init_params {
+  dataset_name="NeelNanda/pile-10k"
+  model_name_or_path="facebook/opt-125m"
+  output_dir="./test-clm"
+  per_device_train_batch_size=8
+  block_size=128
+  gradient_accumulation_steps=4
+  num_train_epochs=3
+  target_sparsity=0.8
+  pruning_type="magnitude"
+  pruning_pattern="4x1"
+  pruning_frequency=1000
+  for var in "$@"
+  do
+    case $var in
+      --dataset_name=*)
+          dataset_name=$(echo $var |cut -f2 -d=)
+      ;;
+      --model_name_or_path=*)
+          model_name_or_path=$(echo $var |cut -f2 -d=)
+      ;;
+       --output_dir=*)
+           output_dir=$(echo $var |cut -f2 -d=)
+       ;;
+       --per_device_train_batch_size=*)
+           per_device_train_batch_size=$(echo $var |cut -f2 -d=)
+       ;;
+       --block_size=*)
+           block_size=$(echo $var |cut -f2 -d=)
+       ;;
+       --gradient_accumulation_steps=*)
+           gradient_accumulation_steps=$(echo $var |cut -f2 -d=)
+       ;;
+       --num_train_epochs=*)
+          num_train_epochs=$(echo $var |cut -f2 -d=)
+      ;;
+       --target_sparsity=*)
+           target_sparsity=$(echo $var |cut -f2 -d=)
+       ;;
+       --pruning_type=*)
+           pruning_type=$(echo $var |cut -f2 -d=)
+       ;;
+       --pruning_pattern=*)
+           pruning_pattern=$(echo $var |cut -f2 -d=)
+       ;;
+       --pruning_frequency=*)
+           pruning_frequency=$(echo $var |cut -f2 -d=)
+       ;;
+      *)
+          echo "Error: No such parameter: ${var}"
+          exit 1
+      ;;
+    esac
+  done
+
+}
+
+# run_tuning
+function run_pruning {
+  python run_clm_no_trainer.py \
+      --dataset_name $dataset_name \
+      --model_name_or_path $model_name_or_path \
+      --block_size $block_size \
+      --per_device_train_batch_size $per_device_train_batch_size \
+      --gradient_accumulation_steps $gradient_accumulation_steps \
+      --output_dir $output_dir \
+      --do_prune \
+      --pruning_pattern $pruning_type \
+      --num_train_epochs $num_train_epochs \
+      --target_sparsity $target_sparsity \
+      --pruning_pattern $pruning_pattern \
+      --pruning_frequency $pruning_frequency
+
+}
+
+main "$@"