Implement and document topn=None

piskvorky · Apr 23, 2019 · 6d89673 · 6d89673
1 parent da094a9
commit 6d89673
Showing 1 changed file with 28 additions and 19 deletions.
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -497,8 +497,9 @@ def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=Non
             List of words that contribute positively.
         negative : list of str, optional
             List of words that contribute negatively.
-        topn : int, optional
-            Number of top-N similar words to return.
+        topn : {int, None}, optional
+            Number of top-N similar words to return, when `topn` is int. When `topn` is None,
+            then similarities for all words are returned.
         restrict_vocab : int, optional
             Optional integer which limits the range of vectors which
             are searched for most-similar values. For example, restrict_vocab=10000 would
@@ -507,11 +508,12 @@ def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=Non
 
         Returns
         -------
-        list of (str, float)
-            Sequence of (word, similarity).
+        {list of (str, float), numpy.array}
+            Sequence of (word, similarity) when `topn` is int. When `topn` is None, then
+            similarities for all words are returned.
 
         """
-        if topn is not None and topn < 1:
+        if isinstance(topn, int) and topn < 1:
             return []
 
         if positive is None:
@@ -553,7 +555,7 @@ def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=Non
 
         limited = self.vectors_norm if restrict_vocab is None else self.vectors_norm[:restrict_vocab]
         dists = dot(limited, mean)
-        if topn is None:
+        if not topn:
             return dists
         best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True)
         # ignore (don't return) words from the input
@@ -567,8 +569,8 @@ def similar_by_word(self, word, topn=10, restrict_vocab=None):
         ----------
         word : str
             Word
-        topn : {int, False}, optional
-            Number of top-N similar words to return. If topn is False, similar_by_word returns
+        topn : {int, None}, optional
+            Number of top-N similar words to return. If topn is None, similar_by_word returns
             the vector of similarity scores.
         restrict_vocab : int, optional
             Optional integer which limits the range of vectors which
@@ -578,8 +580,9 @@ def similar_by_word(self, word, topn=10, restrict_vocab=None):
 
         Returns
         -------
-        list of (str, float)
-            Sequence of (word, similarity).
+        {list of (str, float), numpy.array}
+            Sequence of (word, similarity) when `topn` is int. When `topn` is None, then
+            similarities for all words are returned.
 
         """
         return self.most_similar(positive=[word], topn=topn, restrict_vocab=restrict_vocab)
@@ -591,9 +594,9 @@ def similar_by_vector(self, vector, topn=10, restrict_vocab=None):
         ----------
         vector : numpy.array
             Vector from which similarities are to be computed.
-        topn : {int, False}, optional
-            Number of top-N similar words to return. If topn is False, similar_by_vector returns
-            the vector of similarity scores.
+        topn : {int, None}, optional
+            Number of top-N similar words to return, when `topn` is int. When `topn` is None,
+            then similarities for all words are returned.
         restrict_vocab : int, optional
             Optional integer which limits the range of vectors which
             are searched for most-similar values. For example, restrict_vocab=10000 would
@@ -602,8 +605,9 @@ def similar_by_vector(self, vector, topn=10, restrict_vocab=None):
 
         Returns
         -------
-        list of (str, float)
-            Sequence of (word, similarity).
+        {list of (str, float), numpy.array}
+            Sequence of (word, similarity) when `topn` is int. When `topn` is None, then
+            similarities for all words are returned.
 
         """
         return self.most_similar(positive=[vector], topn=topn, restrict_vocab=restrict_vocab)
@@ -783,15 +787,20 @@ def most_similar_cosmul(self, positive=None, negative=None, topn=10):
             List of words that contribute positively.
         negative : list of str, optional
             List of words that contribute negatively.
-        topn : int, optional
-            Number of top-N similar words to return.
+        topn : {int, None}, optional
+            Number of top-N similar words to return, when `topn` is int. When `topn` is None,
+            then similarities for all words are returned.
 
         Returns
         -------
-        list of (str, float)
-            Sequence of (word, similarity).
+        {list of (str, float), numpy.array}
+            Sequence of (word, similarity) when `topn` is int. When `topn` is None, then
+            similarities for all words are returned.
 
         """
+        if isinstance(topn, int) and topn < 1:
+            return []
+
         if positive is None:
             positive = []
         if negative is None: