From 0864e650b942547cf6477d309c8887e130cc9444 Mon Sep 17 00:00:00 2001 From: VahidooX Date: Fri, 31 Jan 2020 13:09:11 -0800 Subject: [PATCH] Changed nemo.logging to logging Signed-off-by: VahidooX --- .../callbacks/joint_intent_slot_callback.py | 22 +++++++-------- .../nlp/callbacks/lm_bert_callback.py | 6 ++-- .../nlp/callbacks/lm_transformer_callback.py | 10 +++---- .../punctuation_capitalization_callback.py | 2 +- .../token_classification_callback.py | 10 +++---- .../nlp/data/datasets/lm_bert_dataset.py | 4 +-- .../punctuation_capitalization_dataset.py | 28 +++++++++---------- .../datasets/token_classification_dataset.py | 3 +- .../collections/nlp/utils/common_nlp_utils.py | 10 +++---- 9 files changed, 47 insertions(+), 48 deletions(-) diff --git a/nemo/collections/nlp/callbacks/joint_intent_slot_callback.py b/nemo/collections/nlp/callbacks/joint_intent_slot_callback.py index b4020cc59b11..5accc209e80f 100644 --- a/nemo/collections/nlp/callbacks/joint_intent_slot_callback.py +++ b/nemo/collections/nlp/callbacks/joint_intent_slot_callback.py @@ -5,7 +5,7 @@ import numpy as np from sklearn.metrics import classification_report -import nemo +from nemo import logging from nemo.collections.nlp.utils.callback_utils import list2str, plot_confusion_matrix, tensor2list __all__ = ['eval_iter_callback', 'eval_epochs_done_callback'] @@ -77,26 +77,26 @@ def eval_epochs_done_callback(global_vars, graph_fold): i = 0 if intent_preds.shape[0] > sample_size + 1: i = random.randint(0, intent_preds.shape[0] - sample_size - 1) - nemo.logging.info("Sampled i_preds: [%s]" % list2str(intent_preds[i : i + sample_size])) - nemo.logging.info("Sampled intents: [%s]" % list2str(intent_labels[i : i + sample_size])) - nemo.logging.info("Sampled s_preds: [%s]" % list2str(slot_preds[i : i + sample_size])) - nemo.logging.info("Sampled slots: [%s]" % list2str(slot_labels[i : i + sample_size])) + logging.info("Sampled i_preds: [%s]" % list2str(intent_preds[i : i + sample_size])) + logging.info("Sampled intents: [%s]" % list2str(intent_labels[i : i + sample_size])) + logging.info("Sampled s_preds: [%s]" % list2str(slot_preds[i : i + sample_size])) + logging.info("Sampled slots: [%s]" % list2str(slot_labels[i : i + sample_size])) plot_confusion_matrix(intent_labels, intent_preds, graph_fold) - nemo.logging.info('Intent prediction results') + logging.info('Intent prediction results') correct_preds = sum(intent_labels == intent_preds) intent_accuracy = correct_preds / intent_labels.shape[0] - nemo.logging.info(f'Intent accuracy: {intent_accuracy}') - nemo.logging.info( + logging.info(f'Intent accuracy: {intent_accuracy}') + logging.info( f'Classification report:\n \ {classification_report(intent_labels, intent_preds)}' ) - nemo.logging.info('Slot prediction results') + logging.info('Slot prediction results') slot_accuracy = sum(slot_labels == slot_preds) / slot_labels.shape[0] - nemo.logging.info(f'Slot accuracy: {slot_accuracy}') - nemo.logging.info( + logging.info(f'Slot accuracy: {slot_accuracy}') + logging.info( f'Classification report:\n \ {classification_report(slot_labels[:-2], slot_preds[:-2])}' ) diff --git a/nemo/collections/nlp/callbacks/lm_bert_callback.py b/nemo/collections/nlp/callbacks/lm_bert_callback.py index baeaabe2d701..7b51a442a42c 100644 --- a/nemo/collections/nlp/callbacks/lm_bert_callback.py +++ b/nemo/collections/nlp/callbacks/lm_bert_callback.py @@ -3,7 +3,7 @@ import numpy as np -import nemo +from nemo import logging def eval_iter_callback(tensors, global_vars): @@ -24,14 +24,14 @@ def eval_iter_callback(tensors, global_vars): def eval_epochs_done_callback(global_vars): if 'dev_mlm_loss' in global_vars: mlm_loss = np.mean(global_vars["dev_mlm_loss"]) - nemo.logging.info("Dev MLM perplexity: {0}".format(np.round(np.exp(mlm_loss), 3))) + logging.info("Dev MLM perplexity: {0}".format(np.round(np.exp(mlm_loss), 3))) global_vars["dev_mlm_loss"] = [] else: mlm_loss = -123.0 if 'dev_nsp_loss' in global_vars: nsp_loss = np.mean(global_vars["dev_nsp_loss"]) - nemo.logging.info("Dev NSP perplexity: {0}".format(np.round(np.exp(nsp_loss), 3))) + logging.info("Dev NSP perplexity: {0}".format(np.round(np.exp(nsp_loss), 3))) global_vars["dev_nsp_loss"] = [] else: nsp_loss = -123.0 diff --git a/nemo/collections/nlp/callbacks/lm_transformer_callback.py b/nemo/collections/nlp/callbacks/lm_transformer_callback.py index daffe2c64d2d..f444042b264b 100644 --- a/nemo/collections/nlp/callbacks/lm_transformer_callback.py +++ b/nemo/collections/nlp/callbacks/lm_transformer_callback.py @@ -3,7 +3,7 @@ import numpy as np -import nemo +from nemo import logging GLOBAL_KEYS = ["eval_loss", "sys"] @@ -23,10 +23,10 @@ def eval_epochs_done_callback(global_vars): eval_loss = np.mean(global_vars["eval_loss"]) eval_ppl = np.exp(eval_loss) - nemo.logging.info("------------------------------------------------------") - nemo.logging.info("Eval loss: {0}".format(np.round(eval_loss, 3))) - nemo.logging.info("Eval ppl: {0}".format(np.round(eval_ppl, 3))) - nemo.logging.info("------------------------------------------------------") + logging.info("------------------------------------------------------") + logging.info("Eval loss: {0}".format(np.round(eval_loss, 3))) + logging.info("Eval ppl: {0}".format(np.round(eval_ppl, 3))) + logging.info("------------------------------------------------------") for key in GLOBAL_KEYS: global_vars[key] = [] return dict({"Eval_loss": eval_loss, "Eval_ppl": eval_ppl}) diff --git a/nemo/collections/nlp/callbacks/punctuation_capitalization_callback.py b/nemo/collections/nlp/callbacks/punctuation_capitalization_callback.py index 25cc05faebb0..15dc6f9a5187 100644 --- a/nemo/collections/nlp/callbacks/punctuation_capitalization_callback.py +++ b/nemo/collections/nlp/callbacks/punctuation_capitalization_callback.py @@ -6,7 +6,7 @@ import numpy as np from sklearn.metrics import classification_report -import nemo +from nemo import logging from nemo.collections.nlp.utils.callback_utils import list2str, plot_confusion_matrix, tensor2list diff --git a/nemo/collections/nlp/callbacks/token_classification_callback.py b/nemo/collections/nlp/callbacks/token_classification_callback.py index 5b0e42342bde..2701378c0733 100644 --- a/nemo/collections/nlp/callbacks/token_classification_callback.py +++ b/nemo/collections/nlp/callbacks/token_classification_callback.py @@ -6,7 +6,7 @@ import numpy as np from sklearn.metrics import classification_report -import nemo +from nemo import logging from nemo.collections.nlp.utils.callback_utils import list2str, plot_confusion_matrix, tensor2list @@ -51,21 +51,21 @@ def eval_epochs_done_callback(global_vars, label_ids, graph_fold=None, none_labe preds = preds[subtokens_mask] accuracy = sum(labels == preds) / labels.shape[0] - nemo.logging.info(f'Accuracy: {accuracy}') + logging.info(f'Accuracy: {accuracy}') # print predictions and labels for a small random subset of data sample_size = 20 i = 0 if preds.shape[0] > sample_size + 1: i = random.randint(0, preds.shape[0] - sample_size - 1) - nemo.logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size])) - nemo.logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size])) + logging.info("Sampled preds: [%s]" % list2str(preds[i : i + sample_size])) + logging.info("Sampled labels: [%s]" % list2str(labels[i : i + sample_size])) # remove labels from label_ids that don't appear in the dev set used_labels = set(labels) | set(preds) label_ids = {k: label_ids[k] for k, v in label_ids.items() if v in used_labels} - nemo.logging.info(classification_report(labels, preds, target_names=label_ids)) + logging.info(classification_report(labels, preds, target_names=label_ids)) # calculate and plot confusion_matrix if graph_fold: diff --git a/nemo/collections/nlp/data/datasets/lm_bert_dataset.py b/nemo/collections/nlp/data/datasets/lm_bert_dataset.py index 0b83c94d94e5..b6436be00766 100644 --- a/nemo/collections/nlp/data/datasets/lm_bert_dataset.py +++ b/nemo/collections/nlp/data/datasets/lm_bert_dataset.py @@ -26,7 +26,7 @@ from torch.utils.data import Dataset from tqdm import tqdm -import nemo +from nemo import logging from nemo.collections.nlp.data.datasets.datasets_utils import download_wkt2 from nemo.collections.nlp.data.datasets.lm_transformer_dataset import create_vocab_mlm @@ -385,7 +385,7 @@ def __init__(self, dataset_name, data_dir, vocab_size, sample_size, special_toke data_dir, vocab_size, sample_size, special_tokens, train_file ) else: - nemo.logging.warning( + logging.warning( "Looks like you passed a dataset name that isn't " "already supported by NeMo. Please make sure that " "you build the preprocessing method for it." diff --git a/nemo/collections/nlp/data/datasets/punctuation_capitalization_dataset.py b/nemo/collections/nlp/data/datasets/punctuation_capitalization_dataset.py index 36d643609c20..b8d8bfcd728b 100644 --- a/nemo/collections/nlp/data/datasets/punctuation_capitalization_dataset.py +++ b/nemo/collections/nlp/data/datasets/punctuation_capitalization_dataset.py @@ -29,8 +29,8 @@ import numpy as np from torch.utils.data import Dataset -import nemo import nemo.collections.nlp.data.datasets.datasets_utils as utils +from nemo import logging def get_features( @@ -162,12 +162,12 @@ def get_features( logging.info("*** Example ***") logging.info("i: %s" % (i)) logging.info("subtokens: %s" % " ".join(list(map(str, all_subtokens[i])))) - nemo.logging.info("loss_mask: %s" % " ".join(list(map(str, all_loss_mask[i])))) - nemo.logging.info("input_mask: %s" % " ".join(list(map(str, all_input_mask[i])))) - nemo.logging.info("subtokens_mask: %s" % " ".join(list(map(str, all_subtokens_mask[i])))) + logging.info("loss_mask: %s" % " ".join(list(map(str, all_loss_mask[i])))) + logging.info("input_mask: %s" % " ".join(list(map(str, all_input_mask[i])))) + logging.info("subtokens_mask: %s" % " ".join(list(map(str, all_subtokens_mask[i])))) if with_label: - nemo.logging.info("punct_labels: %s" % " ".join(list(map(str, punct_all_labels[i])))) - nemo.logging.info("capit_labels: %s" % " ".join(list(map(str, capit_all_labels[i])))) + logging.info("punct_labels: %s" % " ".join(list(map(str, punct_all_labels[i])))) + logging.info("capit_labels: %s" % " ".join(list(map(str, capit_all_labels[i])))) return ( all_input_ids, @@ -247,7 +247,7 @@ def __init__( if use_cache and os.path.exists(features_pkl): # If text_file was already processed, load from pickle features = pickle.load(open(features_pkl, 'rb')) - nemo.logging.info(f'features restored from {features_pkl}') + logging.info(f'features restored from {features_pkl}') else: if num_samples == 0: raise ValueError("num_samples has to be positive", num_samples) @@ -290,16 +290,16 @@ def __init__( # for dev/test sets use label mapping from training set if punct_label_ids: if len(punct_label_ids) != len(punct_unique_labels): - nemo.logging.info( + logging.info( 'Not all labels from the specified' + 'label_ids dictionary are present in the' + 'current dataset. Using the provided' + 'label_ids dictionary.' ) else: - nemo.logging.info('Using the provided label_ids dictionary.') + logging.info('Using the provided label_ids dictionary.') else: - nemo.logging.info( + logging.info( 'Creating a new label to label_id dictionary.' + ' It\'s recommended to use label_ids generated' + ' during training for dev/test sets to avoid' @@ -334,7 +334,7 @@ def create_label_ids(unique_labels, pad_label=pad_label): if use_cache: pickle.dump(features, open(features_pkl, "wb")) - nemo.logging.info(f'features saved to {features_pkl}') + logging.info(f'features saved to {features_pkl}') self.all_input_ids = features[0] self.all_segment_ids = features[1] @@ -350,14 +350,14 @@ def create_label_ids(unique_labels, pad_label=pad_label): def get_stats_and_save(all_labels, label_ids, name): infold = text_file[: text_file.rfind('/')] merged_labels = itertools.chain.from_iterable(all_labels) - nemo.logging.info('Three most popular labels') + logging.info('Three most popular labels') _, label_frequencies = utils.get_label_stats(merged_labels, infold + '/label_count_' + name + '.tsv') out = open(os.path.join(infold, name + '_label_ids.csv'), 'w') labels, _ = zip(*sorted(label_ids.items(), key=lambda x: x[1])) out.write('\n'.join(labels)) - nemo.logging.info(f'Labels: {label_ids}') - nemo.logging.info(f'Labels mapping saved to : {out.name}') + logging.info(f'Labels: {label_ids}') + logging.info(f'Labels mapping saved to : {out.name}') return label_frequencies diff --git a/nemo/collections/nlp/data/datasets/token_classification_dataset.py b/nemo/collections/nlp/data/datasets/token_classification_dataset.py index b0858a91985e..5a62d98be03c 100644 --- a/nemo/collections/nlp/data/datasets/token_classification_dataset.py +++ b/nemo/collections/nlp/data/datasets/token_classification_dataset.py @@ -27,9 +27,8 @@ import numpy as np from torch.utils.data import Dataset -import nemo import nemo.collections.nlp.data.datasets.datasets_utils as datasets_utils -import nemo.collections.nlp.data.datasets.joint_intent_slot_dataset +from nemo import logging __all__ = ['BertTokenClassificationDataset', 'BertTokenClassificationInferDataset'] diff --git a/nemo/collections/nlp/utils/common_nlp_utils.py b/nemo/collections/nlp/utils/common_nlp_utils.py index bb05af8d950b..4de761dfec8a 100644 --- a/nemo/collections/nlp/utils/common_nlp_utils.py +++ b/nemo/collections/nlp/utils/common_nlp_utils.py @@ -4,7 +4,7 @@ import numpy as np -import nemo +from nemo import logging def _is_whitespace(c): @@ -27,11 +27,11 @@ def read_intent_slot_outputs( pred_slots = np.argmax(slot_logits, axis=2) slot_masks = slot_masks > 0.5 for i, query in enumerate(queries): - nemo.logging.info(f'Query: {query}') + logging.info(f'Query: {query}') pred = pred_intents[i] - nemo.logging.info(f'Predicted intent:\t{pred}\t{intent_dict[pred]}') + logging.info(f'Predicted intent:\t{pred}\t{intent_dict[pred]}') if intents is not None: - nemo.logging.info(f'True intent:\t{intents[i]}\t{intent_dict[intents[i]]}') + logging.info(f'True intent:\t{intents[i]}\t{intent_dict[intents[i]]}') pred_slot = pred_slots[i][slot_masks[i]] tokens = query.strip().split() @@ -43,7 +43,7 @@ def read_intent_slot_outputs( output = f'{token}\t{slot_dict[pred_slot[j]]}' if slots is not None: output = f'{output}\t{slot_dict[slots[i][j]]}' - nemo.logging.info(output) + logging.info(output) def get_vocab(file):