Skip to content

Commit

Permalink
Fix: Running the best model on the test (validation) dataset for GLUE
Browse files Browse the repository at this point in the history
- Added TOKENIZERS_PARALLELISM=false in the envrc file. For more
  information you can visit huggingface/tokenizers#311
- orchestration/task_adapter.py - Added model checkpointing callbacks to the trainer. Saves the best
  model depending on the metric being tracked
- models/task_adapter.py - Removed logging loss for every step in the test. It is enough to log
  at the end of test epoch
- scripts/train_task/task-adapter.sh passes the options for number of
  workers to use in the dataloader; the metric to track among others
  • Loading branch information
abhinavkashyap92 committed Sep 21, 2021
1 parent 948f0e4 commit d6e8e23
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 12 deletions.
3 changes: 2 additions & 1 deletion .envrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
export DATASET_CACHE_DIR=/ssd1/abhinav/domadapter/data/
export PT_MODELS_CACHE_DIR="/ssd1/abhinav/domadapter/pretrained_models_cache/"
export OUTPUT_DIR="/ssd1/abhinav/domadapter/experiments/"
export PROJECT_ROOT=$PWD
export PROJECT_ROOT=$PWD
export TOKENIZERS_PARALLELISM=false
16 changes: 12 additions & 4 deletions domadapter/datamodules/glue_dm.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(
pad_to_max_length: bool = True,
max_seq_length: int = None,
batch_size: int = 32,
num_workers: int = 8
):
"""Use the transformer datasets library to download
GLUE tasks. We should use this later if we decide to do experiments
Expand Down Expand Up @@ -58,6 +59,9 @@ def __init__(
batch_size: int
Batch size of inputs
num_workers: int
Number of workers to use for dataloaders
"""
super(GlueDM, self).__init__()
self.task_to_keys = {
Expand Down Expand Up @@ -100,6 +104,7 @@ def __init__(
self.val_dataset = None
self.test_dataset = None
self.batch_size = batch_size
self.num_workers = num_workers

def prepare_data(self):
"""Download the dataset for the task and store it in the
Expand Down Expand Up @@ -162,24 +167,27 @@ def setup(self, stage: Optional[str] = None):

elif stage == "test":
self.test_dataset = (
self.datasets["test_matched"] if self.task_name == "mnli" else self.datasets["test"]
self.datasets["validation_matched"] if self.task_name == "mnli" else self.datasets["validation"]
)
else:
raise ValueError("stage can be on of [fit, val, test]")

def train_dataloader(self):
return DataLoader(
self.train_dataset, batch_size=self.batch_size, collate_fn=self.data_collator
self.train_dataset, batch_size=self.batch_size, collate_fn=self.data_collator,
num_workers=self.num_workers
)

def val_dataloader(self):
return DataLoader(
self.val_dataset, batch_size=self.batch_size, collate_fn=self.data_collator
self.val_dataset, batch_size=self.batch_size, collate_fn=self.data_collator,
num_workers=self.num_workers
)

def test_dataloader(self):
return DataLoader(
self.test_dataset, batch_size=self.batch_size, collate_fn=self.data_collator
self.test_dataset, batch_size=self.batch_size, collate_fn=self.data_collator,
num_workers=self.num_workers
)

def preprocess_function(self, examples):
Expand Down
1 change: 0 additions & 1 deletion domadapter/models/task_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,6 @@ def test_step(self, batch, batch_idx):
predictions = (
predictions.argmax(dim=-1) if not self.is_regression else predictions.squeeze()
)
self.log("test/loss", loss.item())
return {"loss": loss, "predictions": predictions, "labels": labels}

def test_epoch_end(self, outputs):
Expand Down
27 changes: 26 additions & 1 deletion domadapter/orchestration/task_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pytorch_lightning.loggers import WandbLogger
from pytorch_lightning import seed_everything
from domadapter.models.task_adapter import TaskAdapterModel
from pytorch_lightning.callbacks import ModelCheckpoint


@dataclass
Expand Down Expand Up @@ -64,6 +65,10 @@ class DataTrainingArguments:

batch_size: Optional[int] = field(default=32, metadata={"help": "Batch size of data"})

num_processes: Optional[int] = field(
default=8, metadata={"help": "Num of workers for Dataloader"}
)

def __post_init__(self):
self.task_name = self.task_name.lower()

Expand Down Expand Up @@ -95,7 +100,7 @@ class ModelArguments:
)

use_fast_tokenizer: bool = field(
default=True, metadata={"help": "Whether to use the Fast version of the tokenizer"}
default=False, metadata={"help": "Whether to use the Fast version of the tokenizer"}
)

adapter_name: str = field(
Expand Down Expand Up @@ -150,6 +155,10 @@ class TrainerArguments:

gpus: str = field(metadata={"help": "GPU number to train on. Pass this as a string"})

monitor_metric: str = field(
metadata={"help": "This metric will be monitored for storing the best model"}
)


def main():
# MultiLingAdapterArguments extends from AdapterArguments.
Expand Down Expand Up @@ -190,11 +199,14 @@ def main():
pad_to_max_length=data_args.pad_to_max_length,
max_seq_length=data_args.max_seq_length,
batch_size=data_args.batch_size,
num_workers=data_args.num_processes,
)
dm.prepare_data()
dm.setup("fit")
train_loader = dm.train_dataloader()
val_loader = dm.val_dataloader()
dm.setup("test")
test_loader = dm.test_dataloader()

model = TaskAdapterModel(
adapter_name=model_args.adapter_name,
Expand Down Expand Up @@ -222,6 +234,17 @@ def main():
logger.watch(model, log="gradients", log_freq=10)
logger.log_hyperparams(hparams)

callbacks = []

checkpoints_dir = experiments_dir.joinpath("checkpoints")
checkpoint_callback = ModelCheckpoint(
dirpath=str(checkpoints_dir),
save_top_k=1,
mode="max",
monitor=f"dev/{trainer_args.monitor_metric}",
)
callbacks.append(checkpoint_callback)

trainer = pl.Trainer(
limit_train_batches=trainer_args.train_data_proportion,
limit_val_batches=trainer_args.validation_data_proportion,
Expand All @@ -231,9 +254,11 @@ def main():
max_epochs=trainer_args.num_epochs,
gpus=trainer_args.gpus,
logger=logger,
callbacks=callbacks,
)

trainer.fit(model, train_dataloaders=train_loader, val_dataloaders=val_loader)
trainer.test(dataloaders=test_loader)


if __name__ == "__main__":
Expand Down
14 changes: 9 additions & 5 deletions domadapter/scripts/train_task/task_adapter.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,22 @@ MAX_SEQ_LENGTH=128
MODEL_NAME="bert-base-uncased"
BSZ=32
ADAPTER_NAME="dummy_adapter"
EXPERIMENT_NAME="[DEBUG_TASK_ADAPTERS]"
EXPERIMENT_NAME="[SST_TASK_ADAPTERS]"
WANDB_PROJ_NAME="ADAPTERS"
SEED=1729
TRAIN_PROPORTION=0.1
TRAIN_PROPORTION=1
VALIDATION_PROPORTION=1.0
TEST_PROPORTION=1.0
GRADIENT_CLIP_VAL=5.0
EPOCHS=2
EPOCHS=20
ADAM_BETA1=0.99
ADAM_BETA2=0.999
ADAM_EPSILON=1e-8
LEARNING_RATE=1e-4
ADAPTER_REDUCTION_FACTOR=32
ADAPTER_REDUCTION_FACTOR=64
GPUS="0"
NUM_PROCESSES=32
MONITOR_METRIC="accuracy"


python ${SCRIPT_FILE} \
Expand All @@ -46,5 +48,7 @@ python ${SCRIPT_FILE} \
--adam_epsilon ${ADAM_EPSILON} \
--learning_rate ${LEARNING_RATE} \
--adapter_reduction_factor ${ADAPTER_REDUCTION_FACTOR} \
--gpus ${GPUS}
--gpus ${GPUS} \
--num_processes ${NUM_PROCESSES} \
--monitor_metric ${MONITOR_METRIC}

0 comments on commit d6e8e23

Please sign in to comment.