-
-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix datatype parameter for KeyedVectors.load_word2vec_format
. Fix #1682
#1819
Changes from 16 commits
b923043
35a8f8a
aaa7c2a
a8f44c5
37b39f4
8e095d7
310690d
de98f2e
805daf6
466f37f
a76aec6
049fb91
c157d79
164cf63
991bcb6
0904460
96d8aa5
17f6b39
6f53175
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
2 2 | ||
kangaroo.n.01 -0.0007369244245224787 -8.269973595356034e-05 | ||
horse.n.01 -0.0008546282343595379 0.0007694142576316829 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/usr/bin/env python | ||
# -*- coding: utf-8 -*- | ||
# | ||
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html | ||
|
||
""" | ||
Automated tests for checking various matutils functions. | ||
""" | ||
|
||
import logging | ||
import unittest | ||
|
||
import numpy as np | ||
|
||
from gensim.test.utils import datapath | ||
from gensim.models.keyedvectors import KeyedVectors | ||
|
||
|
||
class TestDataType(unittest.TestCase): | ||
def load_model(self, datatype): | ||
path = datapath('test.kv.txt') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. A slightly more descriptive name would be helpful, there's already a lot of test data and it can easily get confusing. |
||
kv = KeyedVectors.load_word2vec_format(path, binary=False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hanging indent please (not vertical). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess you are talking about line 22-23. I have merged them. |
||
datatype=datatype) | ||
return kv | ||
|
||
def test_high_precision(self): | ||
kv = self.load_model(np.float64) | ||
self.assertAlmostEqual(kv['horse.n.01'][0], -0.0008546282343595379) | ||
self.assertEqual(kv['horse.n.01'][0].dtype, np.float64) | ||
|
||
def test_medium_precision(self): | ||
kv = self.load_model(np.float32) | ||
self.assertAlmostEqual(kv['horse.n.01'][0], -0.00085462822) | ||
self.assertEqual(kv['horse.n.01'][0].dtype, np.float32) | ||
|
||
def test_low_precision(self): | ||
kv = self.load_model(np.float16) | ||
self.assertAlmostEqual(kv['horse.n.01'][0], -0.00085449) | ||
self.assertEqual(kv['horse.n.01'][0].dtype, np.float16) | ||
|
||
def test_type_conversion(self): | ||
path = datapath('test.kv.txt') | ||
binary_path = datapath('test.kv.bin') | ||
model1 = KeyedVectors.load_word2vec_format(path, datatype=np.float16) | ||
model1.save_word2vec_format(binary_path, binary=True) | ||
model2 = KeyedVectors.load_word2vec_format(binary_path, datatype=np.float64, binary=True) | ||
self.assertAlmostEqual(model1["horse.n.01"][0], np.float16(model2["horse.n.01"][0])) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Another test to verify that the |
||
|
||
if __name__ == '__main__': | ||
logging.root.setLevel(logging.WARNING) | ||
unittest.main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a file header, like in the other test files.