Skip to content

Commit

Permalink
Fix Mallet wrapper and tests for HDPTransform (#1555)
Browse files Browse the repository at this point in the history
* fix type in mallet wrapper

* fix tests for sklearn wrapper

* debug commit for test

* fix seeding and precision

* fix pep8 & try to fix unreproducable error

* debug unreproduced error

* fix test

* remove debug output
  • Loading branch information
menshikh-iv authored Sep 1, 2017
1 parent 26b285e commit 9caf055
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
2 changes: 1 addition & 1 deletion gensim/models/wrappers/ldamallet.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def __getitem__(self, bow, iterations=100):

def load_word_topics(self):
logger.info("loading assigned topics from %s", self.fstate())
word_topics = numpy.zeros((self.num_topics, self.num_terms), dtype=numpy.float32)
word_topics = numpy.zeros((self.num_topics, self.num_terms), dtype=numpy.float64)
if hasattr(self.id2word, 'token2id'):
word2id = self.id2word.token2id
else:
Expand Down
20 changes: 10 additions & 10 deletions gensim/test/test_sklearn_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -846,7 +846,7 @@ def testModelNotFitted(self):
class TestHdpTransformer(unittest.TestCase):
def setUp(self):
numpy.random.seed(0)
self.model = HdpTransformer(id2word=dictionary)
self.model = HdpTransformer(id2word=dictionary, random_state=42)
self.corpus = mmcorpus.MmCorpus(datapath('testcorpus.mm'))
self.model.fit(self.corpus)

Expand All @@ -855,23 +855,23 @@ def testTransform(self):
doc = self.corpus[0]
transformed_doc = self.model.transform(doc)
expected_doc = [[0.81043386270128193, 0.049357139518070477, 0.035840906753517532, 0.026542006926698079, 0.019925705902962578, 0.014776690981729117, 0.011068909979528148]]
self.assertTrue(numpy.allclose(transformed_doc, expected_doc))
self.assertTrue(numpy.allclose(transformed_doc, expected_doc, atol=1e-2))

# tranform multiple documents
docs = [self.corpus[0], self.corpus[1]]
transformed_docs = self.model.transform(docs)
expected_docs = [[0.81043386270128193, 0.049357139518070477, 0.035840906753517532, 0.026542006926698079, 0.019925705902962578, 0.014776690981729117, 0.011068909979528148],
[0.0368655605, 0.709055041, 0.194436428, 0.0151706795, 0.0113863652, 1.00000000e-12, 1.00000000e-12]]
self.assertTrue(numpy.allclose(transformed_docs[0], expected_docs[0]))
self.assertTrue(numpy.allclose(transformed_docs[1], expected_docs[1]))
[0.03795908, 0.39542609, 0.50650585, 0.0151082, 0.01132749, 0., 0.]]
self.assertTrue(numpy.allclose(transformed_docs[0], expected_docs[0], atol=1e-2))
self.assertTrue(numpy.allclose(transformed_docs[1], expected_docs[1], atol=1e-2))

def testPartialFit(self):
for i in range(10):
for i in range(5):
self.model.partial_fit(X=self.corpus) # fit against the model again
doc = list(self.corpus)[0] # transform only the first document
transformed = self.model.transform(doc)
expected = numpy.array([0.76777752, 0.01757334, 0.01600339, 0.01374061, 0.01275931, 0.01126313, 0.01058131, 0.01167185])
passed = numpy.allclose(sorted(transformed[0]), sorted(expected), atol=1e-1)

transformed = self.model.transform(list(self.corpus)[0])
expected = numpy.array([0.77901173, 0.0232508, 0.02054655, 0.01769651, 0.01600487, 0.01478038, 0.01237056, 0.01194372, 0.01070444])
passed = numpy.allclose(transformed[0], expected, atol=1e-2)
self.assertTrue(passed)

def testSetGetParams(self):
Expand Down

0 comments on commit 9caf055

Please sign in to comment.