Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

gensim models show_topic/print_topic parameter num_words changed to topn to match other topic models #1200

Merged
merged 36 commits into from
May 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
1c63c9a
Merge branch 'release-0.12.3rc1'
tmylk Nov 5, 2015
280a488
Merge branch 'release-0.12.3'
tmylk Nov 6, 2015
ddeb002
Merge branch 'release-0.12.3'
tmylk Nov 6, 2015
f2ac3a9
Update CHANGELOG.txt
tmylk Nov 6, 2015
cf09e8c
Update CHANGELOG.txt
tmylk Nov 6, 2015
b61287a
resolve merge conflict in Changelog
tmylk Jan 29, 2016
3ade404
Merge branch 'release-0.12.4' with #596
tmylk Jan 31, 2016
9e6522e
Merge branch 'release-0.13.0'
tmylk Jun 10, 2016
87c4e9c
Merge branch 'release-0.13.0'
tmylk Jun 10, 2016
9c74b40
Release version typo fix
tmylk Jun 10, 2016
7b30025
Merge branch 'release-0.13.0rc1'
tmylk Jun 10, 2016
de79c8e
Merge branch 'release-0.13.0'
tmylk Jun 22, 2016
d4f9cc5
Merge branch 'release-0.13.1'
tmylk Jun 23, 2016
d8e9c0f
Merge branch 'release-0.13.2'
tmylk Aug 26, 2016
7c118fc
Merge branch 'release-0.13.2'
tmylk Aug 26, 2016
432f840
Merge branch 'release-0.13.3'
tmylk Oct 20, 2016
b42e181
Merge branch 'release-0.13.3'
tmylk Oct 21, 2016
3067cb0
Win and OSX build fix
tmylk Oct 21, 2016
e838391
Merge branch 'release-0.13.4'
tmylk Dec 25, 2016
5d47ec4
Merge branch 'release-0.13.4.1'
tmylk Jan 4, 2017
a18de8d
Merge branch 'release-1.0.0rc1'
tmylk Jan 31, 2017
67b1a17
Typo in version
tmylk Jan 31, 2017
df13670
Fix merge conflict
tmylk Feb 17, 2017
78da89a
Merge branch 'release-1.0.0'
tmylk Feb 24, 2017
fb3f303
Merge branch 'release-1.0.1'
tmylk Mar 3, 2017
adc447d
Merge branch 'release-1.0.1'
tmylk Mar 3, 2017
333fd4d
Merge branch 'release-1.0.1'
tmylk Mar 3, 2017
61dc832
show_topic parameter num_words changed to topn
prakhar2b Mar 9, 2017
5f71f66
hdpmodel topn/num_words conflict resolved
prakhar2b May 2, 2017
b3d210c
dtmmodel topn/show_topic conflict resolved
prakhar2b May 2, 2017
c7f9824
ldamallet topn/num_words conflict resolved
prakhar2b May 2, 2017
42ac76d
whitespace error resolved
prakhar2b May 2, 2017
113a0af
whitespace error resolved
prakhar2b May 2, 2017
51683f1
split multi-line comments in hdpmodel
prakhar2b May 20, 2017
29ab15f
splitting multi-line comments in dtmmodel
prakhar2b May 20, 2017
f949ce6
splitting multi-line comments for ldamallet
prakhar2b May 20, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions gensim/models/hdpmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
meanchangethresh = 0.00001
rhot_bound = 0.0


def expect_log_sticks(sticks):
"""
For stick-breaking hdp, return the E[log(sticks)]
Expand Down Expand Up @@ -436,7 +435,7 @@ def update_expectations(self):
self.m_timestamp[:] = self.m_updatect
self.m_status_up_to_date = True

def show_topic(self, topic_id, num_words=20, log=False, formatted=False):
def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words=None):
"""
Print the `num_words` most probable words for `topics` number of topics.
Set `topics=-1` to print all topics.
Expand All @@ -445,12 +444,17 @@ def show_topic(self, topic_id, num_words=20, log=False, formatted=False):
`False` as lists of (weight, word) pairs.

"""
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

if not self.m_status_up_to_date:
self.update_expectations()
betas = self.m_lambda + self.m_eta
hdp_formatter = HdpTopicFormatter(self.id2word, betas)
return hdp_formatter.show_topic(topic_id, num_words, log, formatted)
return hdp_formatter.show_topic(topic_id, topn, log, formatted)

def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True):
"""
Print the `num_words` most probable words for `topics` number of topics.
Expand Down Expand Up @@ -609,18 +613,27 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):

return shown

def print_topic(self, topic_id, num_words):
return self.show_topic(topic_id, num_words, formatted=True)
def print_topic(self, topic_id, topn= None, num_words=None):
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for print_topic() would be deprecated in the updated version.")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be warnings.warn, not a logging message (will spam logs).

logger.warning("Please use topn instead.")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need for two messages, one warning is enough (concatenate the messages).

topn = num_words

return self.show_topic(topic_id, topn, formatted=True)

def show_topic(self, topic_id, num_words, log=False, formatted=False):
def show_topic(self, topic_id, topn=20, log=False, formatted=False, num_words= None,):
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dtto

logger.warning("Please use topn instead.")
topn = num_words

lambdak = list(self.data[topic_id, :])
lambdak = lambdak / sum(lambdak)

temp = zip(lambdak, xrange(len(lambdak)))
temp = sorted(temp, key=lambda x: x[0], reverse=True)

topic_terms = self.show_topic_terms(temp, num_words)
topic_terms = self.show_topic_terms(temp, topn)

if formatted:
topic = self.format_topic(topic_id, topic_terms)
Expand Down
18 changes: 14 additions & 4 deletions gensim/models/wrappers/dtmmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,26 +283,36 @@ def show_topics(self, num_topics=10, times=5, num_words=10, log=False, formatted
# topic))
return shown

def show_topic(self, topicid, time, num_words=50):
def show_topic(self, topicid, time, topn=50, num_words=None):
"""
Return `num_words` most probable words for the given `topicid`, as a list of
`(word_probability, word)` 2-tuples.

"""
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

topics = self.lambda_[:, :, time]
topic = topics[topicid]
# liklihood to probability
topic = np.exp(topic)
# normalize to probability dist
topic = topic / topic.sum()
# sort according to prob
bestn = matutils.argsort(topic, num_words, reverse=True)
bestn = matutils.argsort(topic, topn, reverse=True)
beststr = [(topic[id], self.id2word[id]) for id in bestn]
return beststr

def print_topic(self, topicid, time, num_words=10):
def print_topic(self, topicid, time, topn=10, num_words=None):
"""Return the given topic, formatted as a string."""
return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, num_words)])
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for print_topic(() would be deprecated in the updated version.")
Copy link
Owner

@piskvorky piskvorky May 27, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dtto.

Also, too many opening brackets (().

logger.warning("Please use topn instead.")
topn = num_words

return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, time, topn)])

def dtm_vis(self, corpus, time):
"""
Expand Down
9 changes: 7 additions & 2 deletions gensim/models/wrappers/ldamallet.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,12 +240,17 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic)
return shown

def show_topic(self, topicid, num_words=10):
def show_topic(self, topicid, topn=10, num_words=None):
if num_words is not None: # deprecated num_words is used
logger.warning("The parameter num_words for show_topic() would be deprecated in the updated version.")
logger.warning("Please use topn instead.")
topn = num_words

if self.word_topics is None:
logger.warn("Run train or load_word_topics before showing topics.")
topic = self.word_topics[topicid]
topic = topic / topic.sum() # normalize to probability dist
bestn = matutils.argsort(topic, num_words, reverse=True)
bestn = matutils.argsort(topic, topn, reverse=True)
beststr = [(self.id2word[id], topic[id]) for id in bestn]
return beststr

Expand Down