-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gensim models show_topic/print_topic parameter num_words changed to topn to match other topic models #1200
gensim models show_topic/print_topic parameter num_words changed to topn to match other topic models #1200
Changes from all commits
1c63c9a
280a488
ddeb002
f2ac3a9
cf09e8c
b61287a
3ade404
9e6522e
87c4e9c
9c74b40
7b30025
de79c8e
d4f9cc5
d8e9c0f
7c118fc
432f840
b42e181
3067cb0
e838391
5d47ec4
a18de8d
67b1a17
df13670
78da89a
fb3f303
adc447d
333fd4d
61dc832
5f71f66
b3d210c
c7f9824
42ac76d
113a0af
51683f1
29ab15f
f949ce6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,7 +47,6 @@ | |
meanchangethresh = 0.00001 | ||
rhot_bound = 0.0 | ||
|
||
|
||
def expect_log_sticks(sticks): | ||
""" | ||
For stick-breaking hdp, return the E[log(sticks)] | ||
|
@@ -436,7 +435,7 @@ def update_expectations(self): | |
self.m_timestamp[:] = self.m_updatect | ||
self.m_status_up_to_date = True | ||
|
||
def show_topic(self, topic_id, num_words=20, log=False, formatted=False): | ||
def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words=None): | ||
""" | ||
Print the `num_words` most probable words for `topics` number of topics. | ||
Set `topics=-1` to print all topics. | ||
|
@@ -445,12 +444,17 @@ def show_topic(self, topic_id, num_words=20, log=False, formatted=False): | |
`False` as lists of (weight, word) pairs. | ||
|
||
""" | ||
if num_words is not None: # deprecated num_words is used | ||
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.") | ||
logger.warning("Please use topn instead.") | ||
topn = num_words | ||
|
||
if not self.m_status_up_to_date: | ||
self.update_expectations() | ||
betas = self.m_lambda + self.m_eta | ||
hdp_formatter = HdpTopicFormatter(self.id2word, betas) | ||
return hdp_formatter.show_topic(topic_id, num_words, log, formatted) | ||
return hdp_formatter.show_topic(topic_id, topn, log, formatted) | ||
|
||
def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True): | ||
""" | ||
Print the `num_words` most probable words for `topics` number of topics. | ||
|
@@ -609,18 +613,27 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True): | |
|
||
return shown | ||
|
||
def print_topic(self, topic_id, num_words): | ||
return self.show_topic(topic_id, num_words, formatted=True) | ||
def print_topic(self, topic_id, topn= None, num_words=None): | ||
if num_words is not None: # deprecated num_words is used | ||
logger.warning("The parameter num_words for print_topic() would be deprecated in the updated version.") | ||
logger.warning("Please use topn instead.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No need for two messages, one warning is enough (concatenate the messages). |
||
topn = num_words | ||
|
||
return self.show_topic(topic_id, topn, formatted=True) | ||
|
||
def show_topic(self, topic_id, num_words, log=False, formatted=False): | ||
def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words= None,): | ||
if num_words is not None: # deprecated num_words is used | ||
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dtto |
||
logger.warning("Please use topn instead.") | ||
topn = num_words | ||
|
||
lambdak = list(self.data[topic_id, :]) | ||
lambdak = lambdak / sum(lambdak) | ||
|
||
temp = zip(lambdak, xrange(len(lambdak))) | ||
temp = sorted(temp, key=lambda x: x[0], reverse=True) | ||
|
||
topic_terms = self.show_topic_terms(temp, num_words) | ||
topic_terms = self.show_topic_terms(temp, topn) | ||
|
||
if formatted: | ||
topic = self.format_topic(topic_id, topic_terms) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -283,26 +283,36 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted | |
# topic)) | ||
return shown | ||
|
||
def show_topic(self, topicid, time, num_words=50): | ||
def show_topic(self, topicid, time, topn=50, num_words=None): | ||
""" | ||
Return `num_words` most probable words for the given `topicid`, as a list of | ||
`(word_probability, word)` 2-tuples. | ||
|
||
""" | ||
if num_words is not None: # deprecated num_words is used | ||
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.") | ||
logger.warning("Please use topn instead.") | ||
topn = num_words | ||
|
||
topics = self.lambda_[:, :, time] | ||
topic = topics[topicid] | ||
# liklihood to probability | ||
topic = np.exp(topic) | ||
# normalize to probability dist | ||
topic = topic / topic.sum() | ||
# sort according to prob | ||
bestn = matutils.argsort(topic, num_words, reverse=True) | ||
bestn = matutils.argsort(topic, topn, reverse=True) | ||
beststr = [(topic[id], self.id2word[id]) for id in bestn] | ||
return beststr | ||
|
||
def print_topic(self, topicid, time, num_words=10): | ||
def print_topic(self, topicid, time, topn=10, num_words=None): | ||
"""Return the given topic, formatted as a string.""" | ||
return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, num_words)]) | ||
if num_words is not None: # deprecated num_words is used | ||
logger.warning("The parameter num_words for print_topic(() would be deprecated in the updated version.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dtto. Also, too many opening brackets |
||
logger.warning("Please use topn instead.") | ||
topn = num_words | ||
|
||
return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn)]) | ||
|
||
def dtm_vis(self, corpus, time): | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be
warnings.warn
, not a logging message (will spam logs).