Skip to content

Commit

Permalink
gensim models show_topic/print_topic parameter num_words changed to t…
Browse files Browse the repository at this point in the history
…opn to match other topic models. Backwards compatible (#1200)

* Update CHANGELOG.txt

* Update CHANGELOG.txt

* Release version typo fix

* Typo in version

* show_topic parameter num_words changed to topn

show_topic parameter num_words changed to topn in order to make it consistent with LdaModel

show_topic parameter num_words changed to topn

both old and new param with deprecation warning

ldamallet now supports both num_words and topn parameters for show_topic with deprecation warning for the num_words.

hdpmodel show_topic supports old and new param

show_topic in hdpmodel now supports both num_words and topn parameters to make it consistent across all models, with deprecation warning for num_words

dtmmodel topn/num_words with deprecation warning

Inconsistency between api and code removed for topn/num_words by adding support for both params with proper deprecation warning

hdpmodel show_topic supports old and new param

show_topic in hdpmodel now supports both num_words and topn parameters to make it consistent across all models, with deprecation warning for num_words - checks should pass this time

hdpmodel show_topic supports old and new para

dtmmodel topn/num_words with deprecation warning

ldamallet show_topic param fixed

ldamallet now supports both num_words and topn parameters for show_topic with deprecation warning for the num_words.

dtmmodel topn/num_words with deprecation warning

dtmmodel is now compatible with both topn/num_words parameters for show_topic and others with proper deprecation warnings.

hdpmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

hdpmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

hdpmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

dtmmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

ldamallet num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

hdpmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

ldamallet num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

* hdpmodel topn/num_words conflict resolved

* dtmmodel topn/show_topic conflict resolved

* ldamallet topn/num_words conflict resolved

* whitespace error resolved

* whitespace error resolved

* split multi-line comments in hdpmodel

* splitting multi-line comments in dtmmodel

* splitting multi-line comments for ldamallet
  • Loading branch information
prakhar2b authored and tmylk committed May 23, 2017
1 parent 5242a32 commit 834e130
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 14 deletions.
29 changes: 21 additions & 8 deletions gensim/models/hdpmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
meanchangethresh = 0.00001
rhot_bound = 0.0


def expect_log_sticks(sticks):
"""
For stick-breaking hdp, return the E[log(sticks)]
Expand Down Expand Up @@ -436,20 +435,25 @@ def update_expectations(self):
self.m_timestamp[:] = self.m_updatect
self.m_status_up_to_date = True

def show_topic(self, topic_id, num_words=20, log=False, formatted=False):
def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words=None):
"""
Print the `num_words` most probable words for topic `topic_id`.
Set `formatted=True` to return the topics as a list of strings, or
`False` as lists of (weight, word) pairs.
"""
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

if not self.m_status_up_to_date:
self.update_expectations()
betas = self.m_lambda + self.m_eta
hdp_formatter = HdpTopicFormatter(self.id2word, betas)
return hdp_formatter.show_topic(topic_id, num_words, log, formatted)
return hdp_formatter.show_topic(topic_id, topn, log, formatted)

def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True):
"""
Print the `num_words` most probable words for `num_topics` number of topics.
Expand Down Expand Up @@ -608,18 +612,27 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):

return shown

def print_topic(self, topic_id, num_words):
return self.show_topic(topic_id, num_words, formatted=True)
def print_topic(self, topic_id, topn= None, num_words=None):
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for print_topic() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

return self.show_topic(topic_id, topn, formatted=True)

def show_topic(self, topic_id, num_words, log=False, formatted=False):
def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words= None,):
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

lambdak = list(self.data[topic_id, :])
lambdak = lambdak / sum(lambdak)

temp = zip(lambdak, xrange(len(lambdak)))
temp = sorted(temp, key=lambda x: x[0], reverse=True)

topic_terms = self.show_topic_terms(temp, num_words)
topic_terms = self.show_topic_terms(temp, topn)

if formatted:
topic = self.format_topic(topic_id, topic_terms)
Expand Down
18 changes: 14 additions & 4 deletions gensim/models/wrappers/dtmmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,26 +283,36 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
# topic))
return shown

def show_topic(self, topicid, time, num_words=50):
def show_topic(self, topicid, time, topn=50, num_words=None):
"""
Return `num_words` most probable words for the given `topicid`, as a list of
`(word_probability, word)` 2-tuples.
"""
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

topics = self.lambda_[:, :, time]
topic = topics[topicid]
# liklihood to probability
topic = np.exp(topic)
# normalize to probability dist
topic = topic / topic.sum()
# sort according to prob
bestn = matutils.argsort(topic, num_words, reverse=True)
bestn = matutils.argsort(topic, topn, reverse=True)
beststr = [(topic[id], self.id2word[id]) for id in bestn]
return beststr

def print_topic(self, topicid, time, num_words=10):
def print_topic(self, topicid, time, topn=10, num_words=None):
"""Return the given topic, formatted as a string."""
return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, num_words)])
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for print_topic(() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn)])

def dtm_vis(self, corpus, time):
"""
Expand Down
9 changes: 7 additions & 2 deletions gensim/models/wrappers/ldamallet.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,19 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic)
return shown

def show_topic(self, topicid, num_words=10):
def show_topic(self, topicid, topn=10, num_words=None):
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

if self.word_topics is None:
logger.warning(
"Run train or load_word_topics before showing topics."
)
topic = self.word_topics[topicid]
topic = topic / topic.sum() # normalize to probability dist
bestn = matutils.argsort(topic, num_words, reverse=True)
bestn = matutils.argsort(topic, topn, reverse=True)
beststr = [(self.id2word[id], topic[id]) for id in bestn]
return beststr

Expand Down

0 comments on commit 834e130

Please sign in to comment.