Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LdaMallet Fixes #771

Merged
merged 3 commits into from
Jul 5, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ Changes
=======
0.13.2

* wordtopics has changed to word_topics in ldamallet, and fixed issue #764. (@bhargavvader, #771)
- assigning wordtopics value of word_topics to keep backward compatibility, for now
* topics, topn parameters changed to num_topics and num_words in show_topics() and print_topics()(@droudy, #747)
- In hdpmodel and dtmmodel
- NOT BACKWARDS COMPATIBLE!
Expand Down
16 changes: 9 additions & 7 deletions gensim/models/wrappers/ldamallet.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ def train(self, corpus):
logger.info("training MALLET LDA with %s", cmd)
check_output(cmd, shell=True)
self.word_topics = self.load_word_topics()
# NOTE - we are still keeping the wordtopics variable to not break backward compatibility.
# word_topics has replaced wordtopics throughout the code; wordtopics just stores the values of word_topics when train is called.
self.wordtopics = self.word_topics

def __getitem__(self, bow, iterations=100):
is_corpus, corpus = utils.is_corpus(bow)
Expand Down Expand Up @@ -200,7 +203,6 @@ def load_word_topics(self):
continue
tokenid = word2id[token]
word_topics[int(topic), tokenid] += 1.0
logger.info("loaded assigned topics for %i tokens", word_topics.sum())
self.print_topics(15)
return word_topics

Expand Down Expand Up @@ -233,25 +235,25 @@ def show_topics(self, num_topics=10, num_words=10, log=False, formatted=True):
shown = []
for i in chosen_topics:
if formatted:
topic = self.print_topic(i, topn=num_words)
topic = self.print_topic(i, num_words=num_words)
else:
topic = self.show_topic(i, topn=num_words)
topic = self.show_topic(i, num_words=num_words)
shown.append(topic)
if log:
logger.info("topic #%i (%.3f): %s", i, self.alpha[i], topic)
return shown

def show_topic(self, topicid, topn=10):
def show_topic(self, topicid, num_words=10):
if self.word_topics is None:
logger.warn("Run train or load_word_topics before showing topics.")
topic = self.word_topics[topicid]
topic = topic / topic.sum() # normalize to probability dist
bestn = matutils.argsort(topic, topn, reverse=True)
bestn = matutils.argsort(topic, num_words, reverse=True)
beststr = [(topic[id], self.id2word[id]) for id in bestn]
return beststr

def print_topic(self, topicid, topn=10):
return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, topn)])
def print_topic(self, topicid, num_words=10):
return ' + '.join(['%.3f*%s' % v for v in self.show_topic(topicid, num_words)])


def get_version(self, direc_path):
Expand Down