piskvorky · menshikh-iv · Apr 16, 2018 · Jan 31, 2018 · Jan 31, 2018 · Mar 21, 2018
diff --git a/gensim/matutils.py b/gensim/matutils.py
@@ -668,7 +668,7 @@ def ret_log_normalize_vec(vec, axis=1):
 
 def unitvec(vec, norm='l2', return_norm=False):
     """Scale a vector to unit length.
-
+    
     Parameters
     ----------
     vec : {numpy.ndarray, scipy.sparse, list of (int, float)}
@@ -677,49 +677,53 @@ def unitvec(vec, norm='l2', return_norm=False):
         Normalization that will be used.
     return_norm : bool, optional
         If True - returns the length of vector `vec`.
-
+        
     Returns
     -------
     numpy.ndarray, scipy.sparse, list of (int, float)}
         Normalized vector in same format as `vec`.
     float
         Length of `vec` before normalization.
-
+        
     Notes
     -----
     Zero-vector will be unchanged.
-
+    
     """
     if norm not in ('l1', 'l2'):
         raise ValueError("'%s' is not a supported norm. Currently supported norms are 'l1' and 'l2'." % norm)
+
     if scipy.sparse.issparse(vec):
         vec = vec.tocsr()
         if norm == 'l1':
             veclen = np.sum(np.abs(vec.data))
         if norm == 'l2':
             veclen = np.sqrt(np.sum(vec.data ** 2))
         if veclen > 0.0:
-            if return_norm:
-                return vec / veclen, veclen
-            else:
+            if np.issubdtype(vec.dtype, np.int):
+                vec = vec.astype(np.float)
                 return vec / veclen
+            else:
+                vec /= veclen
+                return vec.astype(vec.dtype)
         else:
             if return_norm:
                 return vec, 1.
             else:
                 return vec
 
     if isinstance(vec, np.ndarray):
-        vec = np.asarray(vec, dtype=float)
+        vec = np.asarray(vec, dtype=vec.dtype)
         if norm == 'l1':
             veclen = np.sum(np.abs(vec))
         if norm == 'l2':
             veclen = blas_nrm2(vec)
         if veclen > 0.0:
-            if return_norm:
-                return blas_scal(1.0 / veclen, vec), veclen
+            if np.issubdtype(vec.dtype, np.int):
+                vec = vec.astype(np.float)
+                return blas_scal(1.0 / veclen, vec).astype(vec.dtype)
             else:
-                return blas_scal(1.0 / veclen, vec)
+                return blas_scal(1.0 / veclen, vec).astype(vec.dtype)
         else:
             if return_norm:
                 return vec, 1

diff --git a/gensim/test/test_matutils.py b/gensim/test/test_matutils.py
@@ -140,6 +140,46 @@ def testDirichletExpectation(self):
                 msg = "dirichlet_expectation_2d failed for dtype={}".format(dtype)
                 self.assertTrue(np.allclose(known_good, test_values), msg)
 
+class UnitvecTestCase(unittest.TestCase):
+    # test unitvec
+    def manual_unitvec(self, vec):
+        self.vec = vec
+        self.vec = self.vec.astype(np.float)
+        if sparse.issparse(self.vec):
+            vec_sum_of_squares = self.vec.multiply(self.vec)
+                  unit = 1. / np.sqrt(vec_sum_of_squares.sum())
+            return self.vec.multiply(unit)
+              elif not sparse.issparse(self.vec):
+            sum_vec_squared = np.sum(self.vec ** 2)
+                  self.vec /= np.sqrt(sum_vec_squared)
+            return self.vec
+
+    def test_inputs(self):
+        input_dtypes = [np.float32, np.float64, np.int32, np.int64, float, int]
+        input_arrtypes = ['sparse', 'dense']
+        for dtype_ in input_dtypes:
+            for arrtype in input_arrtypes:
+                if arrtype == 'dense':
+                    if dtype_ == np.float32 or dtype_ == np.float64:
+                        input_vector = np.random.uniform(size=(5,)).astype(dtype_)
+                        unit_vector = unitvec_with_bug.unitvec(input_vector)
+                        man_unit_vector = self.manual_unitvec(input_vector)
+                        self.assertEqual(input_vector.dtype, unit_vector.dtype)
+                        self.assertTrue(np.allclose(unit_vector, man_unit_vector))
+                    else:
+                        input_vector = np.random.randint(10, size=5).astype(dtype_)
+                        unit_vector = unitvec_with_bug.unitvec(input_vector)
+                        man_unit_vector = self.manual_unitvec(input_vector)
+                        self.assertTrue(np.allclose(unit_vector, man_unit_vector))
+                else:
+                    input_vector = sparse.csr_matrix(np.asarray([[1, 0, 0, 0, 0, 3, 0, 0], [0, 0, 4, 3, 0, 0, 0, 0]]).astype(dtype_))
+                    unit_vector = unitvec_with_bug.unitvec(input_vector)
+                    man_unit_vector = self.manual_unitvec(input_vector)
+                    if dtype_ == np.float32 or dtype_ == np.float64:
+                        self.assertEqual(input_vector.dtype, unit_vector.dtype)
+                        self.assertTrue(np.allclose(unit_vector.data, man_unit_vector.data, atol=1e-3))
+                    else:
+                        self.assertTrue(np.allclose(unit_vector.data, man_unit_vector.data, atol=1e-3))
 
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)