From 78c804d67fdd9f1595a8b66d050a0df9527bde91 Mon Sep 17 00:00:00 2001 From: Shubh Vachher Date: Thu, 30 Mar 2017 22:47:41 +0530 Subject: [PATCH 1/2] Docs for word2vec.py forwarding functions and one more --- gensim/models/word2vec.py | 80 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 000eee6976..5dd2a37700 100644 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -355,6 +355,9 @@ class Word2Vec(utils.SaveLoad): """ Class for training, using and evaluating neural networks described in https://code.google.com/p/word2vec/ + If you're finished training a model (=no more updates, only querying) + then switch to the :mod:`gensim.models.KeyedVectors` instance in wv + The model can be stored/loaded via its `save()` and `load()` methods, or stored/loaded in a format compatible with the original word2vec implementation via `wv.save_word2vec_format()` and `KeyedVectors.load_word2vec_format()`. @@ -1076,6 +1079,11 @@ def worker_loop(): return sentence_scores[:sentence_count] def clear_sims(self): + """ + Removes all L2-normalized vectors for words from the model. + You will have to recompute them using init_sims method. + """ + self.wv.syn0norm = None def update_weights(self): @@ -1181,33 +1189,93 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut logger.info("merged %d vectors into %s matrix from %s" % (overlap_count, self.wv.syn0.shape, fname)) def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, indexer=None): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.most_similar` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.most_similar(positive, negative, topn, restrict_vocab, indexer) def wmdistance(self, document1, document2): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.wmdistance` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.wmdistance(document1, document2) def most_similar_cosmul(self, positive=[], negative=[], topn=10): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.most_similar_cosmul` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.most_similar_cosmul(positive, negative, topn) def similar_by_word(self, word, topn=10, restrict_vocab=None): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.similar_by_word` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.similar_by_word(word, topn, restrict_vocab) def similar_by_vector(self, vector, topn=10, restrict_vocab=None): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.similar_by_vector` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.similar_by_vector(vector, topn, restrict_vocab) def doesnt_match(self, words): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.doesnt_match` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.doesnt_match(words) def __getitem__(self, words): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.__getitem__` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.__getitem__(words) def __contains__(self, word): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.__contains__` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.__contains__(word) def similarity(self, w1, w2): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.similarity` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.similarity(w1, w2) def n_similarity(self, ws1, ws2): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.n_similarity` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.n_similarity(ws1, ws2) def predict_output_word(self, context_words_list, topn=10): @@ -1270,9 +1338,21 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=None, case_inse @staticmethod def log_evaluate_word_pairs(pearson, spearman, oov, pairs): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.log_evaluate_word_pairs` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return KeyedVectors.log_evaluate_word_pairs(pearson, spearman, oov, pairs) def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case_insensitive=True, dummy4unknown=False): + """ + Please refer to the documentation for `gensim.models.KeyedVectors.evaluate_word_pairs` + This is just a forwarding function. + In the future please use the `gensim.models.KeyedVectors` instance in wv + """ + return self.wv.evaluate_word_pairs(pairs, delimiter, restrict_vocab, case_insensitive, dummy4unknown) def __str__(self): From 73396652986654e19e1ad6f415f00ce5324f5c73 Mon Sep 17 00:00:00 2001 From: Shubh Vachher Date: Mon, 10 Apr 2017 15:47:19 +0530 Subject: [PATCH 2/2] Fix Pep8 --- gensim/models/word2vec.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py index 5dd2a37700..a85d9b3e62 100644 --- a/gensim/models/word2vec.py +++ b/gensim/models/word2vec.py @@ -1190,7 +1190,8 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, indexer=None): """ - Please refer to the documentation for `gensim.models.KeyedVectors.most_similar` + Please refer to the documentation for + `gensim.models.KeyedVectors.most_similar` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1199,7 +1200,8 @@ def most_similar(self, positive=[], negative=[], topn=10, restrict_vocab=None, i def wmdistance(self, document1, document2): """ - Please refer to the documentation for `gensim.models.KeyedVectors.wmdistance` + Please refer to the documentation for + `gensim.models.KeyedVectors.wmdistance` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1208,7 +1210,8 @@ def wmdistance(self, document1, document2): def most_similar_cosmul(self, positive=[], negative=[], topn=10): """ - Please refer to the documentation for `gensim.models.KeyedVectors.most_similar_cosmul` + Please refer to the documentation for + `gensim.models.KeyedVectors.most_similar_cosmul` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1217,7 +1220,8 @@ def most_similar_cosmul(self, positive=[], negative=[], topn=10): def similar_by_word(self, word, topn=10, restrict_vocab=None): """ - Please refer to the documentation for `gensim.models.KeyedVectors.similar_by_word` + Please refer to the documentation for + `gensim.models.KeyedVectors.similar_by_word` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1226,7 +1230,8 @@ def similar_by_word(self, word, topn=10, restrict_vocab=None): def similar_by_vector(self, vector, topn=10, restrict_vocab=None): """ - Please refer to the documentation for `gensim.models.KeyedVectors.similar_by_vector` + Please refer to the documentation for + `gensim.models.KeyedVectors.similar_by_vector` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1235,7 +1240,8 @@ def similar_by_vector(self, vector, topn=10, restrict_vocab=None): def doesnt_match(self, words): """ - Please refer to the documentation for `gensim.models.KeyedVectors.doesnt_match` + Please refer to the documentation for + `gensim.models.KeyedVectors.doesnt_match` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1244,7 +1250,8 @@ def doesnt_match(self, words): def __getitem__(self, words): """ - Please refer to the documentation for `gensim.models.KeyedVectors.__getitem__` + Please refer to the documentation for + `gensim.models.KeyedVectors.__getitem__` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1253,7 +1260,8 @@ def __getitem__(self, words): def __contains__(self, word): """ - Please refer to the documentation for `gensim.models.KeyedVectors.__contains__` + Please refer to the documentation for + `gensim.models.KeyedVectors.__contains__` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1262,7 +1270,8 @@ def __contains__(self, word): def similarity(self, w1, w2): """ - Please refer to the documentation for `gensim.models.KeyedVectors.similarity` + Please refer to the documentation for + `gensim.models.KeyedVectors.similarity` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1271,7 +1280,8 @@ def similarity(self, w1, w2): def n_similarity(self, ws1, ws2): """ - Please refer to the documentation for `gensim.models.KeyedVectors.n_similarity` + Please refer to the documentation for + `gensim.models.KeyedVectors.n_similarity` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1339,7 +1349,8 @@ def accuracy(self, questions, restrict_vocab=30000, most_similar=None, case_inse @staticmethod def log_evaluate_word_pairs(pearson, spearman, oov, pairs): """ - Please refer to the documentation for `gensim.models.KeyedVectors.log_evaluate_word_pairs` + Please refer to the documentation for + `gensim.models.KeyedVectors.log_evaluate_word_pairs` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """ @@ -1348,7 +1359,8 @@ def log_evaluate_word_pairs(pearson, spearman, oov, pairs): def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000, case_insensitive=True, dummy4unknown=False): """ - Please refer to the documentation for `gensim.models.KeyedVectors.evaluate_word_pairs` + Please refer to the documentation for + `gensim.models.KeyedVectors.evaluate_word_pairs` This is just a forwarding function. In the future please use the `gensim.models.KeyedVectors` instance in wv """