From d6e8e23ddba8dc56480c5ca7a6dc228d7dde14af Mon Sep 17 00:00:00 2001 From: Abhinav Ramesh Kashyap Date: Tue, 21 Sep 2021 11:21:59 +0800 Subject: [PATCH] Fix: Running the best model on the test (validation) dataset for GLUE - Added TOKENIZERS_PARALLELISM=false in the envrc file. For more information you can visit https://github.com/huggingface/tokenizers/pull/311 - orchestration/task_adapter.py - Added model checkpointing callbacks to the trainer. Saves the best model depending on the metric being tracked - models/task_adapter.py - Removed logging loss for every step in the test. It is enough to log at the end of test epoch - scripts/train_task/task-adapter.sh passes the options for number of workers to use in the dataloader; the metric to track among others --- .envrc | 3 ++- domadapter/datamodules/glue_dm.py | 16 ++++++++--- domadapter/models/task_adapter.py | 1 - domadapter/orchestration/task_adapter.py | 27 ++++++++++++++++++- domadapter/scripts/train_task/task_adapter.sh | 14 ++++++---- 5 files changed, 49 insertions(+), 12 deletions(-) diff --git a/.envrc b/.envrc index acbf2c1..c3ab175 100644 --- a/.envrc +++ b/.envrc @@ -1,4 +1,5 @@ export DATASET_CACHE_DIR=/ssd1/abhinav/domadapter/data/ export PT_MODELS_CACHE_DIR="/ssd1/abhinav/domadapter/pretrained_models_cache/" export OUTPUT_DIR="/ssd1/abhinav/domadapter/experiments/" -export PROJECT_ROOT=$PWD \ No newline at end of file +export PROJECT_ROOT=$PWD +export TOKENIZERS_PARALLELISM=false diff --git a/domadapter/datamodules/glue_dm.py b/domadapter/datamodules/glue_dm.py index ca8e1a8..f4e2d8d 100644 --- a/domadapter/datamodules/glue_dm.py +++ b/domadapter/datamodules/glue_dm.py @@ -20,6 +20,7 @@ def __init__( pad_to_max_length: bool = True, max_seq_length: int = None, batch_size: int = 32, + num_workers: int = 8 ): """Use the transformer datasets library to download GLUE tasks. We should use this later if we decide to do experiments @@ -58,6 +59,9 @@ def __init__( batch_size: int Batch size of inputs + + num_workers: int + Number of workers to use for dataloaders """ super(GlueDM, self).__init__() self.task_to_keys = { @@ -100,6 +104,7 @@ def __init__( self.val_dataset = None self.test_dataset = None self.batch_size = batch_size + self.num_workers = num_workers def prepare_data(self): """Download the dataset for the task and store it in the @@ -162,24 +167,27 @@ def setup(self, stage: Optional[str] = None): elif stage == "test": self.test_dataset = ( - self.datasets["test_matched"] if self.task_name == "mnli" else self.datasets["test"] + self.datasets["validation_matched"] if self.task_name == "mnli" else self.datasets["validation"] ) else: raise ValueError("stage can be on of [fit, val, test]") def train_dataloader(self): return DataLoader( - self.train_dataset, batch_size=self.batch_size, collate_fn=self.data_collator + self.train_dataset, batch_size=self.batch_size, collate_fn=self.data_collator, + num_workers=self.num_workers ) def val_dataloader(self): return DataLoader( - self.val_dataset, batch_size=self.batch_size, collate_fn=self.data_collator + self.val_dataset, batch_size=self.batch_size, collate_fn=self.data_collator, + num_workers=self.num_workers ) def test_dataloader(self): return DataLoader( - self.test_dataset, batch_size=self.batch_size, collate_fn=self.data_collator + self.test_dataset, batch_size=self.batch_size, collate_fn=self.data_collator, + num_workers=self.num_workers ) def preprocess_function(self, examples): diff --git a/domadapter/models/task_adapter.py b/domadapter/models/task_adapter.py index ad5c4ba..71e7b20 100644 --- a/domadapter/models/task_adapter.py +++ b/domadapter/models/task_adapter.py @@ -343,7 +343,6 @@ def test_step(self, batch, batch_idx): predictions = ( predictions.argmax(dim=-1) if not self.is_regression else predictions.squeeze() ) - self.log("test/loss", loss.item()) return {"loss": loss, "predictions": predictions, "labels": labels} def test_epoch_end(self, outputs): diff --git a/domadapter/orchestration/task_adapter.py b/domadapter/orchestration/task_adapter.py index 85ced2e..d19c631 100644 --- a/domadapter/orchestration/task_adapter.py +++ b/domadapter/orchestration/task_adapter.py @@ -12,6 +12,7 @@ from pytorch_lightning.loggers import WandbLogger from pytorch_lightning import seed_everything from domadapter.models.task_adapter import TaskAdapterModel +from pytorch_lightning.callbacks import ModelCheckpoint @dataclass @@ -64,6 +65,10 @@ class DataTrainingArguments: batch_size: Optional[int] = field(default=32, metadata={"help": "Batch size of data"}) + num_processes: Optional[int] = field( + default=8, metadata={"help": "Num of workers for Dataloader"} + ) + def __post_init__(self): self.task_name = self.task_name.lower() @@ -95,7 +100,7 @@ class ModelArguments: ) use_fast_tokenizer: bool = field( - default=True, metadata={"help": "Whether to use the Fast version of the tokenizer"} + default=False, metadata={"help": "Whether to use the Fast version of the tokenizer"} ) adapter_name: str = field( @@ -150,6 +155,10 @@ class TrainerArguments: gpus: str = field(metadata={"help": "GPU number to train on. Pass this as a string"}) + monitor_metric: str = field( + metadata={"help": "This metric will be monitored for storing the best model"} + ) + def main(): # MultiLingAdapterArguments extends from AdapterArguments. @@ -190,11 +199,14 @@ def main(): pad_to_max_length=data_args.pad_to_max_length, max_seq_length=data_args.max_seq_length, batch_size=data_args.batch_size, + num_workers=data_args.num_processes, ) dm.prepare_data() dm.setup("fit") train_loader = dm.train_dataloader() val_loader = dm.val_dataloader() + dm.setup("test") + test_loader = dm.test_dataloader() model = TaskAdapterModel( adapter_name=model_args.adapter_name, @@ -222,6 +234,17 @@ def main(): logger.watch(model, log="gradients", log_freq=10) logger.log_hyperparams(hparams) + callbacks = [] + + checkpoints_dir = experiments_dir.joinpath("checkpoints") + checkpoint_callback = ModelCheckpoint( + dirpath=str(checkpoints_dir), + save_top_k=1, + mode="max", + monitor=f"dev/{trainer_args.monitor_metric}", + ) + callbacks.append(checkpoint_callback) + trainer = pl.Trainer( limit_train_batches=trainer_args.train_data_proportion, limit_val_batches=trainer_args.validation_data_proportion, @@ -231,9 +254,11 @@ def main(): max_epochs=trainer_args.num_epochs, gpus=trainer_args.gpus, logger=logger, + callbacks=callbacks, ) trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader) + trainer.test(dataloaders=test_loader) if __name__ == "__main__": diff --git a/domadapter/scripts/train_task/task_adapter.sh b/domadapter/scripts/train_task/task_adapter.sh index 733ac8c..a0edb3c 100755 --- a/domadapter/scripts/train_task/task_adapter.sh +++ b/domadapter/scripts/train_task/task_adapter.sh @@ -7,20 +7,22 @@ MAX_SEQ_LENGTH=128 MODEL_NAME="bert-base-uncased" BSZ=32 ADAPTER_NAME="dummy_adapter" -EXPERIMENT_NAME="[DEBUG_TASK_ADAPTERS]" +EXPERIMENT_NAME="[SST_TASK_ADAPTERS]" WANDB_PROJ_NAME="ADAPTERS" SEED=1729 -TRAIN_PROPORTION=0.1 +TRAIN_PROPORTION=1 VALIDATION_PROPORTION=1.0 TEST_PROPORTION=1.0 GRADIENT_CLIP_VAL=5.0 -EPOCHS=2 +EPOCHS=20 ADAM_BETA1=0.99 ADAM_BETA2=0.999 ADAM_EPSILON=1e-8 LEARNING_RATE=1e-4 -ADAPTER_REDUCTION_FACTOR=32 +ADAPTER_REDUCTION_FACTOR=64 GPUS="0" +NUM_PROCESSES=32 +MONITOR_METRIC="accuracy" python ${SCRIPT_FILE} \ @@ -46,5 +48,7 @@ python ${SCRIPT_FILE} \ --adam_epsilon ${ADAM_EPSILON} \ --learning_rate ${LEARNING_RATE} \ --adapter_reduction_factor ${ADAPTER_REDUCTION_FACTOR} \ ---gpus ${GPUS} +--gpus ${GPUS} \ +--num_processes ${NUM_PROCESSES} \ +--monitor_metric ${MONITOR_METRIC}