From 169fbfcb5eaacb9b433a3fe7c7aac266da85b1b8 Mon Sep 17 00:00:00 2001 From: Jayant Jain Date: Wed, 15 Nov 2017 15:49:14 +0530 Subject: [PATCH] Adds option to specify dtype for PoincareModel and corresponding unittest --- gensim/models/poincare.py | 7 +++++-- gensim/test/test_poincare.py | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/gensim/models/poincare.py b/gensim/models/poincare.py index b129587714..bdf627684c 100644 --- a/gensim/models/poincare.py +++ b/gensim/models/poincare.py @@ -69,7 +69,7 @@ class PoincareModel(utils.SaveLoad): """ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsilon=1e-5, - burn_in=10, burn_in_alpha=0.01, init_range=(-0.001, 0.001), seed=0): + burn_in=10, burn_in_alpha=0.01, init_range=(-0.001, 0.001), dtype=np.float64, seed=0): """Initialize and train a Poincare embedding model from an iterable of relations. Parameters @@ -95,6 +95,8 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil Learning rate for burn-in initialization, ignored if `burn_in` is 0. init_range : 2-tuple (float, float) Range within which the vectors are randomly initialized. + dtype : numpy.dtype + The numpy dtype to use for the vectors in the model (numpy.float64, numpy.float32 etc). seed : int, optional Seed for random to ensure reproducibility. @@ -127,6 +129,7 @@ def __init__(self, train_data, size=50, alpha=0.1, negative=10, workers=1, epsil self.epsilon = epsilon self.burn_in = burn_in self._burn_in_done = False + self.dtype = dtype self.seed = seed self._np_random = np_random.RandomState(seed) self.init_range = init_range @@ -172,7 +175,7 @@ def _load_relations(self): def _init_embeddings(self): """Randomly initialize vectors for the items in the vocab.""" shape = (len(self.kv.index2word), self.size) - self.kv.syn0 = self._np_random.uniform(self.init_range[0], self.init_range[1], shape) + self.kv.syn0 = self._np_random.uniform(self.init_range[0], self.init_range[1], shape).astype(self.dtype) def _get_candidate_negatives(self): """Returns candidate negatives of size `self.negative` from the negative examples buffer. diff --git a/gensim/test/test_poincare.py b/gensim/test/test_poincare.py index 30df824e60..4e4d215114 100644 --- a/gensim/test/test_poincare.py +++ b/gensim/test/test_poincare.py @@ -107,6 +107,11 @@ def test_vector_shape(self): model = PoincareModel(self.data, size=20) self.assertEqual(model.kv.syn0.shape, (7, 20)) + def test_vector_dtype(self): + """Tests whether vectors are initialized with the correct size.""" + model = PoincareModel(self.data, dtype=np.float32) + self.assertEqual(model.kv.syn0.dtype, np.float32) + def test_training(self): """Tests that vectors are different before and after training.""" model = PoincareModel(self.data_large, burn_in=0, negative=3)