From f173ac2960bdf52fd009adc62fdac9920ef3aa05 Mon Sep 17 00:00:00 2001 From: Evelina Bakhturina Date: Wed, 12 Feb 2020 13:12:21 -0800 Subject: [PATCH 1/3] load config updated Signed-off-by: Evelina Bakhturina --- .../glue_benchmark_with_bert.py | 29 ++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py b/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py index a7d909d93247..d75f5ed82cad 100644 --- a/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py +++ b/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py @@ -64,6 +64,8 @@ import json import os +from transformers import BertConfig + import nemo.collections.nlp as nemo_nlp import nemo.core as nemo_core from nemo import logging @@ -187,6 +189,8 @@ add_time_to_log_dir=True, ) + + if args.bert_checkpoint is None: """ Use this if you're using a standard BERT model. To see the list of pretrained models, call: @@ -205,14 +209,31 @@ tokenizer = NemoBertTokenizer(args.pretrained_bert_model) else: raise ValueError(f"received unexpected tokenizer '{args.tokenizer}'") + if args.bert_config is not None: - with open(args.bert_config) as json_file: - config = json.load(json_file) - model = nemo_nlp.nm.trainables.huggingface.BERT(**config) + config = BertConfig.from_json_file(args.bert_config).to_dict() + args.vocab_size = config['vocab_size'] + args.hidden_size = config['hidden_size'] + args.num_hidden_layers = config['num_hidden_layers'] + args.num_attention_heads = config['num_attention_heads'] + args.intermediate_size = config['intermediate_size'] + args.hidden_act = config['hidden_act'] + args.max_seq_length = config['max_position_embeddings'] + + model = nemo_nlp.nm.trainables.huggingface.BERT( + vocab_size=args.vocab_size, + num_hidden_layers=args.num_hidden_layers, + hidden_size=args.hidden_size, + num_attention_heads=args.num_attention_heads, + intermediate_size=args.intermediate_size, + max_position_embeddings=args.max_seq_length, + hidden_act=args.hidden_act, + ) + logging.info(f"using {args.bert_config}") else: model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model) - model.restore_from(args.bert_checkpoint) + logging.info(f"model resotred from {args.bert_checkpoint}") hidden_size = model.hidden_size From f8bd75a32bc26d2cd72210d65045448b455ef2da Mon Sep 17 00:00:00 2001 From: Evelina Bakhturina Date: Wed, 12 Feb 2020 13:23:36 -0800 Subject: [PATCH 2/3] style fix Signed-off-by: Evelina Bakhturina --- .../glue_benchmark_with_bert.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py b/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py index d75f5ed82cad..5b0c1c6ba742 100644 --- a/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py +++ b/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py @@ -190,7 +190,6 @@ ) - if args.bert_checkpoint is None: """ Use this if you're using a standard BERT model. To see the list of pretrained models, call: @@ -209,7 +208,7 @@ tokenizer = NemoBertTokenizer(args.pretrained_bert_model) else: raise ValueError(f"received unexpected tokenizer '{args.tokenizer}'") - + if args.bert_config is not None: config = BertConfig.from_json_file(args.bert_config).to_dict() args.vocab_size = config['vocab_size'] @@ -221,14 +220,14 @@ args.max_seq_length = config['max_position_embeddings'] model = nemo_nlp.nm.trainables.huggingface.BERT( - vocab_size=args.vocab_size, - num_hidden_layers=args.num_hidden_layers, - hidden_size=args.hidden_size, - num_attention_heads=args.num_attention_heads, - intermediate_size=args.intermediate_size, - max_position_embeddings=args.max_seq_length, - hidden_act=args.hidden_act, - ) + vocab_size=args.vocab_size, + num_hidden_layers=args.num_hidden_layers, + hidden_size=args.hidden_size, + num_attention_heads=args.num_attention_heads, + intermediate_size=args.intermediate_size, + max_position_embeddings=args.max_seq_length, + hidden_act=args.hidden_act, + ) logging.info(f"using {args.bert_config}") else: model = nemo_nlp.nm.trainables.huggingface.BERT(pretrained_model_name=args.pretrained_bert_model) From b43d64d51485c00c4dc02a431d8b61dcde28fb8d Mon Sep 17 00:00:00 2001 From: Evelina Bakhturina Date: Thu, 13 Feb 2020 15:57:03 -0800 Subject: [PATCH 3/3] rm unused import Signed-off-by: Evelina Bakhturina --- examples/nlp/glue_benchmark/glue_benchmark_with_bert.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py b/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py index eeb3f3a8cca6..7b90c132a506 100644 --- a/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py +++ b/examples/nlp/glue_benchmark/glue_benchmark_with_bert.py @@ -61,7 +61,6 @@ """ import argparse -import json import os from transformers import BertConfig