From 834e130e36a4457ebe286ce6a9120e965eed527e Mon Sep 17 00:00:00 2001
From: Prakhar Pratyush <er.prakhar2b@gmail.com>
Date: Wed, 24 May 2017 03:45:38 +0530
Subject: [PATCH] gensim models show_topic/print_topic parameter num_words
 changed to topn to match other topic models. Backwards compatible (#1200)

* Update CHANGELOG.txt

* Update CHANGELOG.txt

* Release version typo fix

* Typo in version

* show_topic parameter num_words changed to topn

show_topic parameter num_words changed to topn in order to make it consistent with LdaModel

show_topic parameter num_words changed to topn

both old and new param with deprecation warning

ldamallet now supports both num_words and topn parameters for show_topic with deprecation warning for the num_words.

hdpmodel show_topic supports old and new param

show_topic in hdpmodel now supports both num_words and topn parameters to make it consistent across all models, with deprecation warning for num_words

dtmmodel topn/num_words with deprecation warning

Inconsistency between api and code removed for topn/num_words by adding support for both params with proper deprecation warning

hdpmodel show_topic supports old and new param

show_topic in hdpmodel now supports both num_words and topn parameters to make it consistent across all models, with deprecation warning for num_words - checks should pass this time

hdpmodel show_topic supports old and new para

dtmmodel topn/num_words with deprecation warning

ldamallet show_topic param fixed

ldamallet now supports both num_words and topn parameters for show_topic with deprecation warning for the num_words.

dtmmodel topn/num_words with deprecation warning

dtmmodel is now compatible with both topn/num_words parameters for show_topic and others with proper deprecation warnings.

hdpmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

hdpmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

hdpmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

dtmmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

ldamallet num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

hdpmodel num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

ldamallet num_words changed to topn with deprecation warning

To make the code consistent with the api-  parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words

* hdpmodel topn/num_words conflict resolved

* dtmmodel topn/show_topic conflict resolved

* ldamallet topn/num_words conflict resolved

* whitespace error resolved

* whitespace error resolved

* split multi-line comments in hdpmodel

* splitting multi-line comments in dtmmodel

* splitting multi-line comments for ldamallet
---
 gensim/models/hdpmodel.py           | 29 +++++++++++++++++++++--------
 gensim/models/wrappers/dtmmodel.py  | 18 ++++++++++++++----
 gensim/models/wrappers/ldamallet.py |  9 +++++++--
 3 files changed, 42 insertions(+), 14 deletions(-)

diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
index 2c74d15a15..ee6035a449 100755
--- a/gensim/models/hdpmodel.py
+++ b/gensim/models/hdpmodel.py
@@ -47,7 +47,6 @@
 meanchangethresh = 0.00001
 rhot_bound = 0.0
 
-
 def expect_log_sticks(sticks):
     """
     For stick-breaking hdp, return the E[log(sticks)]
@@ -436,7 +435,7 @@ def update_expectations(self):
         self.m_timestamp[:] = self.m_updatect
         self.m_status_up_to_date = True
 
-    def show_topic(self, topic_id, num_words=20, log=False, formatted=False):
+    def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words=None):
         """
         Print the `num_words` most probable words for topic `topic_id`.
 
@@ -444,12 +443,17 @@ def show_topic(self, topic_id, num_words=20, log=False, formatted=False):
         `False` as lists of (weight, word) pairs.
 
         """
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
         if not self.m_status_up_to_date:
             self.update_expectations()
         betas = self.m_lambda + self.m_eta
         hdp_formatter = HdpTopicFormatter(self.id2word, betas)
-        return hdp_formatter.show_topic(topic_id, num_words, log, formatted)
-        
+        return hdp_formatter.show_topic(topic_id, topn, log, formatted)
+
     def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True):
         """
         Print the `num_words` most probable words for `num_topics` number of topics.
@@ -608,10 +612,19 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
 
         return shown
 
-    def print_topic(self, topic_id, num_words):
-        return self.show_topic(topic_id, num_words, formatted=True)
+    def print_topic(self, topic_id, topn= None, num_words=None):
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for print_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
+        return self.show_topic(topic_id, topn, formatted=True)
 
-    def show_topic(self, topic_id, num_words, log=False, formatted=False):
+    def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words= None,):
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
 
         lambdak = list(self.data[topic_id, :])
         lambdak = lambdak / sum(lambdak)
@@ -619,7 +632,7 @@ def show_topic(self, topic_id, num_words, log=False, formatted=False):
         temp = zip(lambdak, xrange(len(lambdak)))
         temp = sorted(temp, key=lambda x: x[0], reverse=True)
 
-        topic_terms = self.show_topic_terms(temp, num_words)
+        topic_terms = self.show_topic_terms(temp, topn)
 
         if formatted:
             topic = self.format_topic(topic_id, topic_terms)
diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
index a953ce858a..f9cb19362a 100644
--- a/gensim/models/wrappers/dtmmodel.py
+++ b/gensim/models/wrappers/dtmmodel.py
@@ -283,12 +283,17 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
                 #     topic))
         return shown
 
-    def show_topic(self, topicid, time, num_words=50):
+    def show_topic(self, topicid, time, topn=50, num_words=None):
         """
         Return `num_words` most probable words for the given `topicid`, as a list of
         `(word_probability, word)` 2-tuples.
 
         """
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
         topics = self.lambda_[:, :, time]
         topic = topics[topicid]
         # liklihood to probability
@@ -296,13 +301,18 @@ def show_topic(self, topicid, time, num_words=50):
         # normalize to probability dist
         topic = topic / topic.sum()
         # sort according to prob
-        bestn = matutils.argsort(topic, num_words, reverse=True)
+        bestn = matutils.argsort(topic, topn, reverse=True)
         beststr = [(topic[id], self.id2word[id]) for id in bestn]
         return beststr
 
-    def print_topic(self, topicid, time, num_words=10):
+    def print_topic(self, topicid, time, topn=10, num_words=None):
         """Return the given topic, formatted as a string."""
-        return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, num_words)])
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for print_topic(() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
+        return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn)])
 
     def dtm_vis(self, corpus, time):
         """
diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
index fb9ae1e31d..b52de2e3f0 100644
--- a/gensim/models/wrappers/ldamallet.py
+++ b/gensim/models/wrappers/ldamallet.py
@@ -240,14 +240,19 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
                 logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic)
         return shown
 
-    def show_topic(self, topicid, num_words=10):
+    def show_topic(self, topicid, topn=10, num_words=None):
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
         if self.word_topics is None:
             logger.warning(
                 "Run train or load_word_topics before showing topics."
             )
         topic = self.word_topics[topicid]
         topic = topic / topic.sum()  # normalize to probability dist
-        bestn = matutils.argsort(topic, num_words, reverse=True)
+        bestn = matutils.argsort(topic, topn, reverse=True)
         beststr = [(self.id2word[id], topic[id]) for id in bestn]
         return beststr