Skip to content

Commit

Permalink
Implement and document topn=None
Browse files Browse the repository at this point in the history
  • Loading branch information
Witiko committed Apr 23, 2019
1 parent da094a9 commit 6d89673
Showing 1 changed file with 28 additions and 19 deletions.
47 changes: 28 additions & 19 deletions gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -497,8 +497,9 @@ def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=Non
List of words that contribute positively.
negative : list of str, optional
List of words that contribute negatively.
topn : int, optional
Number of top-N similar words to return.
topn : {int, None}, optional
Number of top-N similar words to return, when `topn` is int. When `topn` is None,
then similarities for all words are returned.
restrict_vocab : int, optional
Optional integer which limits the range of vectors which
are searched for most-similar values. For example, restrict_vocab=10000 would
Expand All @@ -507,11 +508,12 @@ def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=Non
Returns
-------
list of (str, float)
Sequence of (word, similarity).
{list of (str, float), numpy.array}
Sequence of (word, similarity) when `topn` is int. When `topn` is None, then
similarities for all words are returned.
"""
if topn is not None and topn < 1:
if isinstance(topn, int) and topn < 1:
return []

if positive is None:
Expand Down Expand Up @@ -553,7 +555,7 @@ def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=Non

limited = self.vectors_norm if restrict_vocab is None else self.vectors_norm[:restrict_vocab]
dists = dot(limited, mean)
if topn is None:
if not topn:
return dists
best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True)
# ignore (don't return) words from the input
Expand All @@ -567,8 +569,8 @@ def similar_by_word(self, word, topn=10, restrict_vocab=None):
----------
word : str
Word
topn : {int, False}, optional
Number of top-N similar words to return. If topn is False, similar_by_word returns
topn : {int, None}, optional
Number of top-N similar words to return. If topn is None, similar_by_word returns
the vector of similarity scores.
restrict_vocab : int, optional
Optional integer which limits the range of vectors which
Expand All @@ -578,8 +580,9 @@ def similar_by_word(self, word, topn=10, restrict_vocab=None):
Returns
-------
list of (str, float)
Sequence of (word, similarity).
{list of (str, float), numpy.array}
Sequence of (word, similarity) when `topn` is int. When `topn` is None, then
similarities for all words are returned.
"""
return self.most_similar(positive=[word], topn=topn, restrict_vocab=restrict_vocab)
Expand All @@ -591,9 +594,9 @@ def similar_by_vector(self, vector, topn=10, restrict_vocab=None):
----------
vector : numpy.array
Vector from which similarities are to be computed.
topn : {int, False}, optional
Number of top-N similar words to return. If topn is False, similar_by_vector returns
the vector of similarity scores.
topn : {int, None}, optional
Number of top-N similar words to return, when `topn` is int. When `topn` is None,
then similarities for all words are returned.
restrict_vocab : int, optional
Optional integer which limits the range of vectors which
are searched for most-similar values. For example, restrict_vocab=10000 would
Expand All @@ -602,8 +605,9 @@ def similar_by_vector(self, vector, topn=10, restrict_vocab=None):
Returns
-------
list of (str, float)
Sequence of (word, similarity).
{list of (str, float), numpy.array}
Sequence of (word, similarity) when `topn` is int. When `topn` is None, then
similarities for all words are returned.
"""
return self.most_similar(positive=[vector], topn=topn, restrict_vocab=restrict_vocab)
Expand Down Expand Up @@ -783,15 +787,20 @@ def most_similar_cosmul(self, positive=None, negative=None, topn=10):
List of words that contribute positively.
negative : list of str, optional
List of words that contribute negatively.
topn : int, optional
Number of top-N similar words to return.
topn : {int, None}, optional
Number of top-N similar words to return, when `topn` is int. When `topn` is None,
then similarities for all words are returned.
Returns
-------
list of (str, float)
Sequence of (word, similarity).
{list of (str, float), numpy.array}
Sequence of (word, similarity) when `topn` is int. When `topn` is None, then
similarities for all words are returned.
"""
if isinstance(topn, int) and topn < 1:
return []

if positive is None:
positive = []
if negative is None:
Expand Down

0 comments on commit 6d89673

Please sign in to comment.