gensim models show_topic/print_topic parameter num_words changed to t…

…opn to match other topic models. Backwards compatible (#1200) * Update CHANGELOG.txt * Update CHANGELOG.txt * Release version typo fix * Typo in version * show_topic parameter num_words changed to topn show_topic parameter num_words changed to topn in order to make it consistent with LdaModel show_topic parameter num_words changed to topn both old and new param with deprecation warning ldamallet now supports both num_words and topn parameters for show_topic with deprecation warning for the num_words. hdpmodel show_topic supports old and new param show_topic in hdpmodel now supports both num_words and topn parameters to make it consistent across all models, with deprecation warning for num_words dtmmodel topn/num_words with deprecation warning Inconsistency between api and code removed for topn/num_words by adding support for both params with proper deprecation warning hdpmodel show_topic supports old and new param show_topic in hdpmodel now supports both num_words and topn parameters to make it consistent across all models, with deprecation warning for num_words - checks should pass this time hdpmodel show_topic supports old and new para dtmmodel topn/num_words with deprecation warning ldamallet show_topic param fixed ldamallet now supports both num_words and topn parameters for show_topic with deprecation warning for the num_words. dtmmodel topn/num_words with deprecation warning dtmmodel is now compatible with both topn/num_words parameters for show_topic and others with proper deprecation warnings. hdpmodel num_words changed to topn with deprecation warning To make the code consistent with the api- parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words hdpmodel num_words changed to topn with deprecation warning To make the code consistent with the api- parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words hdpmodel num_words changed to topn with deprecation warning To make the code consistent with the api- parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words dtmmodel num_words changed to topn with deprecation warning To make the code consistent with the api- parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words ldamallet num_words changed to topn with deprecation warning To make the code consistent with the api- parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words hdpmodel num_words changed to topn with deprecation warning To make the code consistent with the api- parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words ldamallet num_words changed to topn with deprecation warning To make the code consistent with the api- parameters num_words changed to topn (for print_topic/show_topic method), with deprecation warning for num_words * hdpmodel topn/num_words conflict resolved * dtmmodel topn/show_topic conflict resolved * ldamallet topn/num_words conflict resolved * whitespace error resolved * whitespace error resolved * split multi-line comments in hdpmodel * splitting multi-line comments in dtmmodel * splitting multi-line comments for ldamallet
piskvorky · May 23, 2017 · 834e130 · 834e130
1 parent 5242a32
commit 834e130
Show file tree

Hide file tree

Showing 3 changed files with 42 additions and 14 deletions.
diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py
@@ -47,7 +47,6 @@
 meanchangethresh = 0.00001
 rhot_bound = 0.0
 
-
 def expect_log_sticks(sticks):
     """
     For stick-breaking hdp, return the E[log(sticks)]
@@ -436,20 +435,25 @@ def update_expectations(self):
         self.m_timestamp[:] = self.m_updatect
         self.m_status_up_to_date = True
 
-    def show_topic(self, topic_id, num_words=20, log=False, formatted=False):
+    def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words=None):
         """
         Print the `num_words` most probable words for topic `topic_id`.
 
         Set `formatted=True` to return the topics as a list of strings, or
         `False` as lists of (weight, word) pairs.
 
         """
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
         if not self.m_status_up_to_date:
             self.update_expectations()
         betas = self.m_lambda + self.m_eta
         hdp_formatter = HdpTopicFormatter(self.id2word, betas)
-        return hdp_formatter.show_topic(topic_id, num_words, log, formatted)
-        
+        return hdp_formatter.show_topic(topic_id, topn, log, formatted)
+
     def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True):
         """
         Print the `num_words` most probable words for `num_topics` number of topics.
@@ -608,18 +612,27 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
 
         return shown
 
-    def print_topic(self, topic_id, num_words):
-        return self.show_topic(topic_id, num_words, formatted=True)
+    def print_topic(self, topic_id, topn= None, num_words=None):
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for print_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
+        return self.show_topic(topic_id, topn, formatted=True)
 
-    def show_topic(self, topic_id, num_words, log=False, formatted=False):
+    def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words= None,):
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
 
         lambdak = list(self.data[topic_id, :])
         lambdak = lambdak / sum(lambdak)
 
         temp = zip(lambdak, xrange(len(lambdak)))
         temp = sorted(temp, key=lambda x: x[0], reverse=True)
 
-        topic_terms = self.show_topic_terms(temp, num_words)
+        topic_terms = self.show_topic_terms(temp, topn)
 
         if formatted:
             topic = self.format_topic(topic_id, topic_terms)

diff --git a/gensim/models/wrappers/dtmmodel.py b/gensim/models/wrappers/dtmmodel.py
@@ -283,26 +283,36 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
                 #     topic))
         return shown
 
-    def show_topic(self, topicid, time, num_words=50):
+    def show_topic(self, topicid, time, topn=50, num_words=None):
         """
         Return `num_words` most probable words for the given `topicid`, as a list of
         `(word_probability, word)` 2-tuples.
 
         """
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
         topics = self.lambda_[:, :, time]
         topic = topics[topicid]
         # liklihood to probability
         topic = np.exp(topic)
         # normalize to probability dist
         topic = topic / topic.sum()
         # sort according to prob
-        bestn = matutils.argsort(topic, num_words, reverse=True)
+        bestn = matutils.argsort(topic, topn, reverse=True)
         beststr = [(topic[id], self.id2word[id]) for id in bestn]
         return beststr
 
-    def print_topic(self, topicid, time, num_words=10):
+    def print_topic(self, topicid, time, topn=10, num_words=None):
         """Return the given topic, formatted as a string."""
-        return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, num_words)])
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for print_topic(() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
+        return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn)])
 
     def dtm_vis(self, corpus, time):
         """

diff --git a/gensim/models/wrappers/ldamallet.py b/gensim/models/wrappers/ldamallet.py
@@ -240,14 +240,19 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
                 logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic)
         return shown
 
-    def show_topic(self, topicid, num_words=10):
+    def show_topic(self, topicid, topn=10, num_words=None):
+        if num_words is not None:  # deprecated num_words is used
+            logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
+            logger.warning("Please use topn instead.")
+            topn = num_words
+
         if self.word_topics is None:
             logger.warning(
                 "Run train or load_word_topics before showing topics."
             )
         topic = self.word_topics[topicid]
         topic = topic / topic.sum()  # normalize to probability dist
-        bestn = matutils.argsort(topic, num_words, reverse=True)
+        bestn = matutils.argsort(topic, topn, reverse=True)
         beststr = [(self.id2word[id], topic[id]) for id in bestn]
         return beststr