piskvorky · menshikh-iv · Sep 23, 2018 · Sep 21, 2018 · Sep 21, 2018 · Sep 21, 2018
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,10 +8,10 @@ Changes
   New training mode for `*2Vec` models (word2vec, doc2vec, fasttext) that allows model training to scale linearly with the number of cores (full GIL elimination). The result of our Google Summer of Code 2018 project by Dmitry Persiyanov.
 
   **Benchmark**
-  - Dataset: full English Wikipedia
-  - Cloud: GCE
-  - CPU: Intel(R) Xeon(R) CPU @ 2.30GHz 32 cores
-  - BLAS: libblas3 (3.7.1-3ubuntu2)
+  - Dataset: `full English Wikipedia`
+  - Cloud: `GCE`
+  - CPU: `Intel(R) Xeon(R) CPU @ 2.30GHz 32 cores`
+  - BLAS: `MKL`
 
 
   | Model | Queue-based version [sec] | File-based version [sec] | speed up | Accuracy (queue-based) | Accuracy (file-based) |

diff --git a/gensim/models/deprecated/doc2vec.py b/gensim/models/deprecated/doc2vec.py
@@ -91,7 +91,7 @@ def load_old_doc2vec(*args, **kwargs):
         'dm_tag_count': old_model.dm_tag_count,
         'docvecs_mapfile': old_model.__dict__.get('docvecs_mapfile', None),
         'comment': old_model.__dict__.get('comment', None),
-        'size': old_model.vector_size,
+        'vector_size': old_model.vector_size,
         'alpha': old_model.alpha,
         'window': old_model.window,
         'min_count': old_model.min_count,
@@ -104,7 +104,7 @@ def load_old_doc2vec(*args, **kwargs):
         'negative': old_model.negative,
         'cbow_mean': old_model.cbow_mean,
         'hashfxn': old_model.hashfxn,
-        'iter': old_model.iter,
+        'epochs': old_model.iter,
         'sorted_vocab': old_model.__dict__.get('sorted_vocab', 1),
         'batch_words': old_model.__dict__.get('batch_words', MAX_WORDS_IN_BATCH),
         'compute_loss': old_model.__dict__.get('compute_loss', None)

diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py
@@ -363,8 +363,8 @@ def __init__(self, sentences=None, corpus_file=None, sg=0, hs=0, size=100, alpha
         >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
         >>>
         >>> model = FastText(sentences, min_count=1)
-        >>> say_vector = model['say']  # get vector for word
-        >>> of_vector = model['of']  # get vector for out-of-vocab word
+        >>> say_vector = model.wv['say']  # get vector for word
+        >>> of_vector = model.wv['of']  # get vector for out-of-vocab word
 
         """
         self.load = call_on_class_only
@@ -380,7 +380,7 @@ def __init__(self, sentences=None, corpus_file=None, sg=0, hs=0, size=100, alpha
             sorted_vocab=bool(sorted_vocab), null_word=null_word, ns_exponent=ns_exponent)
         self.trainables = FastTextTrainables(
             vector_size=size, seed=seed, bucket=bucket, hashfxn=hashfxn)
-        self.wv.bucket = self.bucket
+        self.wv.bucket = self.trainables.bucket
 
         super(FastText, self).__init__(
             sentences=sentences, corpus_file=corpus_file, workers=workers, vector_size=size, epochs=iter,
@@ -487,10 +487,10 @@ def build_vocab(self, sentences=None, corpus_file=None, update=False, progress_p
         >>>
         >>> model = FastText(min_count=1)
         >>> model.build_vocab(sentences_1)
-        >>> model.train(sentences_1, total_examples=model.corpus_count, epochs=model.iter)
+        >>> model.train(sentences_1, total_examples=model.corpus_count, epochs=model.epochs)
         >>>
         >>> model.build_vocab(sentences_2, update=True)
-        >>> model.train(sentences_2, total_examples=model.corpus_count, epochs=model.iter)
+        >>> model.train(sentences_2, total_examples=model.corpus_count, epochs=model.epochs)
 
         """
         if update:
@@ -519,11 +519,11 @@ def _clear_post_train(self):
     def estimate_memory(self, vocab_size=None, report=None):
         vocab_size = vocab_size or len(self.wv.vocab)
         vec_size = self.vector_size * np.dtype(np.float32).itemsize
-        l1_size = self.layer1_size * np.dtype(np.float32).itemsize
+        l1_size = self.trainables.layer1_size * np.dtype(np.float32).itemsize
         report = report or {}
         report['vocab'] = len(self.wv.vocab) * (700 if self.hs else 500)
         report['syn0_vocab'] = len(self.wv.vocab) * vec_size
-        num_buckets = self.bucket
+        num_buckets = self.trainables.bucket
         if self.hs:
             report['syn1'] = len(self.wv.vocab) * l1_size
         if self.negative:
@@ -657,7 +657,7 @@ def train(self, sentences=None, corpus_file=None, total_examples=None, total_wor
         >>>
         >>> model = FastText(min_count=1)
         >>> model.build_vocab(sentences)
-        >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter)
+        >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
 
         """
         super(FastText, self).train(

diff --git a/gensim/models/translation_matrix.py b/gensim/models/translation_matrix.py
@@ -136,12 +136,12 @@ def build(cls, lang_vec, lexicon=None):
             # if the lexicon is not provided, using the all the Keyedvectors's words as default
             for item in lexicon:
                 words.append(item)
-                mat.append(lang_vec.syn0[lang_vec.vocab[item].index])
+                mat.append(lang_vec.vectors[lang_vec.vocab[item].index])
 
         else:
             for item in lang_vec.vocab.keys():
                 words.append(item)
-                mat.append(lang_vec.syn0[lang_vec.vocab[item].index])
+                mat.append(lang_vec.vectors[lang_vec.vocab[item].index])
 
         return Space(mat, words)
 

diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
@@ -464,9 +464,9 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
         for time in chosen_times:
             for i in chosen_topics:
                 if formatted:
-                    topic = self.print_topic(i, time, num_words=num_words)
+                    topic = self.print_topic(i, time, topn=num_words)
                 else:
-                    topic = self.show_topic(i, time, num_words=num_words)
+                    topic = self.show_topic(i, time, topn=num_words)
                 shown.append(topic)
         return shown
 
@@ -529,7 +529,7 @@ def print_topic(self, topicid, time, topn=10, num_words=None):
             warnings.warn("The parameter `num_words` is deprecated, will be removed in 4.0.0, use `topn` instead.")
             topn = num_words
 
-        return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn)])
+        return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn=topn)])
 
     def dtm_vis(self, corpus, time):
         """Get data specified by pyLDAvis format.

diff --git a/gensim/models/wrappers/varembed.py b/gensim/models/wrappers/varembed.py
@@ -95,14 +95,14 @@ def load_word_embeddings(self, word_embeddings, word_to_ix):
             counts[word] = counts.get(word, 0) + 1
         self.vocab_size = len(counts)
         self.vector_size = word_embeddings.shape[1]
-        self.syn0 = np.zeros((self.vocab_size, self.vector_size))
+        self.vectors = np.zeros((self.vocab_size, self.vector_size))
         self.index2word = [None] * self.vocab_size
         logger.info("Corpus has %i words", len(self.vocab))
         for word_id, word in enumerate(counts):
             self.vocab[word] = Vocab(index=word_id, count=counts[word])
-            self.syn0[word_id] = word_embeddings[word_to_ix[word]]
+            self.vectors[word_id] = word_embeddings[word_to_ix[word]]
             self.index2word[word_id] = word
-        assert((len(self.vocab), self.vector_size) == self.syn0.shape)
+        assert((len(self.vocab), self.vector_size) == self.vectors.shape)
         logger.info("Loaded matrix of %d size and %d dimensions", self.vocab_size, self.vector_size)
 
     def add_morphemes_to_embeddings(self, morfessor_model, morpho_embeddings, morpho_to_ix):
@@ -125,5 +125,5 @@ def add_morphemes_to_embeddings(self, morfessor_model, morpho_embeddings, morpho
                     for m in morfessor_model.viterbi_segment(word)[0]
                 ]
             ).sum(axis=0)
-            self.syn0[self.vocab[word].index] += morpheme_embedding
+            self.vectors[self.vocab[word].index] += morpheme_embedding
         logger.info("Added morphemes to word vectors")
diff --git a/gensim/similarities/docsim.py b/gensim/similarities/docsim.py
@@ -1051,7 +1051,7 @@ def get_similarities(self, query):
         result = []
         for qidx in range(n_queries):
             # Compute similarity for each query.
-            qresult = [self.w2v_model.wmdistance(document, query[qidx]) for document in self.corpus]
+            qresult = [self.w2v_model.wv.wmdistance(document, query[qidx]) for document in self.corpus]
             qresult = numpy.array(qresult)
             qresult = 1. / (1. + qresult)  # Similarity is the negative of the distance.
 

diff --git a/gensim/similarities/index.py b/gensim/similarities/index.py
@@ -174,7 +174,7 @@ def build_from_keyedvectors(self):
         """Build an Annoy index using word vectors from a KeyedVectors model."""
 
         self.model.init_sims()
-        return self._build_from_model(self.model.syn0norm, self.model.index2word, self.model.vector_size)
+        return self._build_from_model(self.model.vectors_norm, self.model.index2word, self.model.vector_size)
 
     def _build_from_model(self, vectors, labels, num_features):
         index = AnnoyIndex(num_features)

diff --git a/gensim/sklearn_api/ftmodel.py b/gensim/sklearn_api/ftmodel.py
@@ -220,5 +220,5 @@ def transform(self, words):
         # The input as array of array
         if isinstance(words, six.string_types):
             words = [words]
-        vectors = [self.gensim_model[word] for word in words]
+        vectors = [self.gensim_model.wv[word] for word in words]
         return np.reshape(np.array(vectors), (len(words), self.size))
diff --git a/gensim/sklearn_api/w2vmodel.py b/gensim/sklearn_api/w2vmodel.py
@@ -173,7 +173,7 @@ def transform(self, words):
         # The input as array of array
         if isinstance(words, six.string_types):
             words = [words]
-        vectors = [self.gensim_model[word] for word in words]
+        vectors = [self.gensim_model.wv[word] for word in words]
         return np.reshape(np.array(vectors), (len(words), self.size))
 
     def partial_fit(self, X):

diff --git a/gensim/test/test_api.py b/gensim/test/test_api.py
@@ -47,7 +47,7 @@ def test_load_model(self):
             base_dir, "__testing_word2vec-matrix-synopsis", "__testing_word2vec-matrix-synopsis.gz"
         )
         model = api.load("__testing_word2vec-matrix-synopsis")
-        vector_dead_calc = model["dead"]
+        vector_dead_calc = model.wv["dead"]
         self.assertTrue(np.allclose(vector_dead, vector_dead_calc))
         shutil.rmtree(base_dir)
         self.assertEqual(api.load("__testing_word2vec-matrix-synopsis", return_path=True), dataset_path)