-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Creating unified base class for all topic models, #938 #946
Changes from all commits
05edaf2
48396c1
e673305
de80720
272d7fc
f4f92d3
f9d28a0
0b47408
ac4e93b
098be5f
a215aed
6de2121
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
class BaseTopicModel(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All Python classes must inherit from object (new-style classes). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
def print_topic(self, topicno, topn=10): | ||
""" | ||
Return a single topic as a formatted string. See `show_topic()` for parameters. | ||
|
||
>>> lsimodel.print_topic(10, topn=5) | ||
'-0.340 * "category" + 0.298 * "$M$" + 0.183 * "algebra" + -0.174 * "functor" + -0.168 * "operator"' | ||
|
||
""" | ||
return ' + '.join(['%.3f*"%s"' % (v, k) for k, v in self.show_topic(topicno, topn)]) | ||
|
||
def print_topics(self, num_topics=20, num_words=10): | ||
"""Alias for `show_topics()` that prints the `num_words` most | ||
probable words for `topics` number of topics to log. | ||
Set `topics=-1` to print all topics.""" | ||
return self.show_topics(num_topics=num_topics, num_words=num_words, log=True) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
import scipy.special as sp | ||
|
||
from gensim import interfaces, utils, matutils | ||
from gensim.models import basemodel | ||
from six.moves import xrange | ||
|
||
logger = logging.getLogger(__name__) | ||
|
@@ -125,7 +126,7 @@ def set_zero(self): | |
self.m_var_beta_ss.fill(0.0) | ||
|
||
|
||
class HdpModel(interfaces.TransformationABC): | ||
class HdpModel(interfaces.TransformationABC, basemodel.BaseTopicModel): | ||
""" | ||
The constructor estimates Hierachical Dirichlet Process model parameters based | ||
on a training corpus: | ||
|
@@ -453,12 +454,6 @@ def update_expectations(self): | |
self.m_timestamp[:] = self.m_updatect | ||
self.m_status_up_to_date = True | ||
|
||
def print_topics(self, num_topics=20, num_words=20): | ||
"""Alias for `show_topics()` that prints the `num_words` most | ||
probable words for `topics` number of topics to log. | ||
Set `topics=-1` to print all topics.""" | ||
return self.show_topics(num_topics=num_topics, num_words=num_words, log=True) | ||
|
||
def show_topics(self, num_topics=20, num_words=20, log=False, formatted=True): | ||
""" | ||
Print the `num_words` most probable words for `topics` number of topics. | ||
|
@@ -612,10 +607,9 @@ def show_topic_terms(self, topic_data, num_words): | |
def format_topic(self, topic_id, topic_terms): | ||
if self.STYLE_GENSIM == self.style: | ||
fmt = ' + '.join(['%.3f*%s' % (weight, word) for (word, weight) in topic_terms]) | ||
fmt = 'topic %i: %s' % (topic_id, fmt) | ||
else: | ||
fmt = '\n'.join([' %20s %.8f' % (word, weight) for (word, weight) in topic_terms]) | ||
fmt = 'topic %i:\n%s' % (topic_id, fmt) | ||
|
||
fmt = (topic_id,fmt) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PEP8: space after comma. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added. |
||
return fmt | ||
#endclass HdpTopicFormatter | ||
# endclass HdpTopicFormatter | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please revert There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please revert all changes to HdpTopicFormatter |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Copyright (C) 2010 Radim Rehurek <[email protected]> | ||
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html | ||
|
||
""" | ||
Automated tests for checking transformation algorithms (the models package). | ||
""" | ||
|
||
import six | ||
|
||
class TestBaseTopicModel(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All tests should inherit from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't want it to be a real test so adding |
||
def testPrintTopic(self): | ||
topics = self.model.show_topics(formatted=True) | ||
for topic_no, topic in topics: | ||
self.assertTrue(isinstance(topic_no, int)) | ||
self.assertTrue(isinstance(topic, str) or isinstance(topic, unicode)) | ||
|
||
def testPrintTopics(self): | ||
topics = self.model.print_topics() | ||
|
||
for topic_no, topic in topics: | ||
self.assertTrue(isinstance(topic_no, int)) | ||
self.assertTrue(isinstance(topic, str) or isinstance(topic, unicode)) | ||
|
||
def testShowTopic(self): | ||
topic = self.model.show_topic(1) | ||
|
||
for k, v in topic: | ||
self.assertTrue(isinstance(k, six.string_types)) | ||
self.assertTrue(isinstance(v, float)) | ||
|
||
def testShowTopics(self): | ||
topics = self.model.show_topics(formatted=False) | ||
|
||
for topic_no, topic in topics: | ||
self.assertTrue(isinstance(topic_no, int)) | ||
self.assertTrue(isinstance(topic, list)) | ||
for k, v in topic: | ||
self.assertTrue(isinstance(k, six.string_types)) | ||
self.assertTrue(isinstance(v, float)) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
from gensim.corpora import mmcorpus, Dictionary | ||
from gensim.models import hdpmodel | ||
from gensim import matutils | ||
from gensim.test import test_basemodel | ||
|
||
|
||
module_path = os.path.dirname(__file__) # needed because sample data files are located in the same folder | ||
|
@@ -47,24 +48,15 @@ def testfile(): | |
return os.path.join(tempfile.gettempdir(), 'gensim_models.tst') | ||
|
||
|
||
|
||
class TestHdpModel(unittest.TestCase): | ||
class TestHdpModel(unittest.TestCase, test_basemodel.TestBaseTopicModel): | ||
def setUp(self): | ||
self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm')) | ||
self.class_ = hdpmodel.HdpModel | ||
self.model = self.class_(corpus, id2word=dictionary) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. please add the override here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added. |
||
def testShowTopics(self): | ||
topics = self.model.show_topics(formatted=False, num_topics=20, num_words=20) | ||
|
||
for topic_no, topic in topics: | ||
self.assertTrue(isinstance(topic_no, int)) | ||
self.assertTrue(isinstance(topic, list)) | ||
for k, v in topic: | ||
self.assertTrue(isinstance(k, six.string_types)) | ||
self.assertTrue(isinstance(v, float)) | ||
|
||
|
||
def testShowTopic(self): | ||
# TODO create show_topic in HdpModel and then test | ||
return | ||
|
||
if __name__ == '__main__': | ||
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why these changes?
Expect to see just 1 line added about the new fix.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was fixing my previous hyperlinks, but alright. I'll add just one line no issues.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixes to your prev links are ok. You can leave them in.