From bfc0707ffdb914d523f01809010586074a1a9d83 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Tue, 28 Apr 2020 11:35:44 +0900 Subject: [PATCH 1/3] get tests to pass on Py27 --- gensim/corpora/dictionary.py | 5 ++++- gensim/corpora/sharded_corpus.py | 13 +++++++++---- gensim/models/doc2vec.py | 5 ++++- gensim/models/fasttext.py | 5 ++++- gensim/models/hdpmodel.py | 11 ++++++++--- gensim/test/test_corpora_dictionary.py | 5 ++++- setup.py | 22 +++++++++++++++++----- tox.ini | 4 ++-- 8 files changed, 52 insertions(+), 18 deletions(-) diff --git a/gensim/corpora/dictionary.py b/gensim/corpora/dictionary.py index ba3795062d..f1bbf8eac5 100644 --- a/gensim/corpora/dictionary.py +++ b/gensim/corpora/dictionary.py @@ -9,7 +9,10 @@ from __future__ import with_statement from collections import defaultdict -from collections.abc import Mapping +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping import sys import logging import itertools diff --git a/gensim/corpora/sharded_corpus.py b/gensim/corpora/sharded_corpus.py index 6f90b715cf..2f1de3b68e 100644 --- a/gensim/corpora/sharded_corpus.py +++ b/gensim/corpora/sharded_corpus.py @@ -24,7 +24,6 @@ import math import numpy import scipy.sparse as sparse -import time from six.moves import range @@ -32,6 +31,12 @@ from gensim.corpora import IndexedCorpus from gensim.interfaces import TransformedCorpus +import six +if six.PY2: + from time import time as perf_counter +else: + from time import perf_counter + logger = logging.getLogger(__name__) #: Specifies which dtype should be used for serializing the shards. @@ -280,12 +285,12 @@ def init_shards(self, output_prefix, corpus, shardsize=4096, dtype=_default_dtyp self.dim = proposed_dim self.offsets = [0] - start_time = time.perf_counter() + start_time = perf_counter() logger.info('Running init from corpus.') for n, doc_chunk in enumerate(gensim.utils.grouper(corpus, chunksize=shardsize)): - logger.info('Chunk no. %d at %f s', n, time.perf_counter() - start_time) + logger.info('Chunk no. %d at %f s', n, perf_counter() - start_time) current_shard = numpy.zeros((len(doc_chunk), self.dim), dtype=dtype) logger.debug('Current chunk dimension: %d x %d', len(doc_chunk), self.dim) @@ -300,7 +305,7 @@ def init_shards(self, output_prefix, corpus, shardsize=4096, dtype=_default_dtyp self.save_shard(current_shard) - end_time = time.perf_counter() + end_time = perf_counter() logger.info('Built %d shards in %f s.', self.n_shards, end_time - start_time) def init_by_clone(self): diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py index c5cb5b40a2..884760666b 100644 --- a/gensim/models/doc2vec.py +++ b/gensim/models/doc2vec.py @@ -71,7 +71,10 @@ from Queue import Queue # noqa:F401 from collections import namedtuple, defaultdict -from collections.abc import Iterable +try: + from collections.abc import Iterable +except ImportError: + from collections import Iterable from timeit import default_timer from numpy import zeros, float32 as REAL, empty, ones, \ diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py index d2da493ec9..2fd195a013 100644 --- a/gensim/models/fasttext.py +++ b/gensim/models/fasttext.py @@ -285,7 +285,10 @@ import numpy as np from numpy import ones, vstack, float32 as REAL import six -from collections.abc import Iterable +try: + from collections.abc import Iterable +except ImportError: + from collections import Iterable import gensim.models._fasttext_bin diff --git a/gensim/models/hdpmodel.py b/gensim/models/hdpmodel.py index 3682f94b39..2cba2f2155 100755 --- a/gensim/models/hdpmodel.py +++ b/gensim/models/hdpmodel.py @@ -52,7 +52,6 @@ from __future__ import with_statement import logging -import time import warnings import numpy as np @@ -65,6 +64,12 @@ from gensim.utils import deprecated +import six +if six.PY2: + from time import time as perf_counter +else: + from time import perf_counter + logger = logging.getLogger(__name__) meanchangethresh = 0.00001 @@ -464,7 +469,7 @@ def update(self, corpus): """ save_freq = max(1, int(10000 / self.chunksize)) # save every 10k docs, roughly chunks_processed = 0 - start_time = time.perf_counter() + start_time = perf_counter() while True: for chunk in utils.grouper(corpus, self.chunksize): @@ -513,7 +518,7 @@ def update_finished(self, start_time, chunks_processed, docs_processed): (self.max_chunks and chunks_processed == self.max_chunks) # time limit reached - or (self.max_time and time.perf_counter() - start_time > self.max_time) + or (self.max_time and perf_counter() - start_time > self.max_time) # no limits and whole corpus has been processed once or (not self.max_chunks and not self.max_time and docs_processed >= self.m_D)) diff --git a/gensim/test/test_corpora_dictionary.py b/gensim/test/test_corpora_dictionary.py index 1791e96bb5..4c50b464de 100644 --- a/gensim/test/test_corpora_dictionary.py +++ b/gensim/test/test_corpora_dictionary.py @@ -7,7 +7,10 @@ Unit tests for the `corpora.Dictionary` class. """ -from collections.abc import Mapping +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping from itertools import chain import logging import unittest diff --git a/setup.py b/setup.py index cc28af193e..6e81748da0 100644 --- a/setup.py +++ b/setup.py @@ -20,8 +20,9 @@ from setuptools import setup, find_packages, Extension from setuptools.command.build_ext import build_ext -if sys.version_info[:2] < (3, 5): - raise Exception('This version of gensim needs 3.5 or later.') +PY2 = sys.version_info[:2] == (2, 7) +if sys.version_info[:2] < (3, 5) and not PY2: + raise Exception('This version of gensim requires Py2.7, or Py3.5 or greater') c_extensions = { 'gensim.models.word2vec_inner': 'gensim/models/word2vec_inner.c', @@ -316,7 +317,18 @@ def run(self): if (3, 0) < sys.version_info < (3, 7): linux_testenv.extend(['nmslib']) -NUMPY_STR = 'numpy >= 1.11.3' +if PY2: + # + # https://www.scipy.org/scipylib/faq.html#python-version-support + # + NUMPY_STR = 'numpy <= 1.16.1' + SCIPY_STR = 'scipy <= 1.2.3' + SO_STR = 'smart_open == 1.10.1' +else: + NUMPY_STR = 'numpy >= 1.11.3' + SCIPY_STR = 'scipy >= 0.18.1' + SO_STR = 'smart_open' + # # We pin the Cython version for reproducibility. We expect our extensions # to build with any sane version of Cython, so we should update this pin @@ -326,9 +338,9 @@ def run(self): install_requires = [ NUMPY_STR, - 'scipy >= 0.18.1', + SCIPY_STR, 'six >= 1.5.0', - 'smart_open >= 1.8.1', + SO_STR, ] setup_requires = [NUMPY_STR] diff --git a/tox.ini b/tox.ini index 12c8aa692b..52b171b459 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] minversion = 2.0 -envlist = {py36,py37,py38}-{win,linux}, flake8, docs, docs-upload, download-wheels, upload-wheels, test-pypi +envlist = {py27,py36,py37,py38}-{win,linux}, flake8, docs, docs-upload, download-wheels, upload-wheels, test-pypi skipsdist = True platform = linux: linux win: win64 @@ -17,7 +17,7 @@ ignore = F821 ; TODO remove me when all examples in docstrings will be executab exclude=.venv, .git, .tox, dist, doc, build, gensim/models/deprecated [pytest] -addopts = -rfxEXs --durations=20 --showlocals --reruns 3 --reruns-delay 1 +addopts = -rfxEXs --durations=20 --showlocals [testenv] recreate = True From c981d090f99b24f07337cddd5b113a6df2db562c Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Tue, 28 Apr 2020 13:00:44 +0900 Subject: [PATCH 2/3] enable Py27 builds under Appveyor and TravisCI --- .travis.yml | 3 +++ appveyor.yml | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/.travis.yml b/.travis.yml index 283530fefa..c63f5e0e66 100644 --- a/.travis.yml +++ b/.travis.yml @@ -32,6 +32,9 @@ matrix: - python: '3.6' env: TOXENV="py36-linux" + - python: '2.7' + env: TOXENV="py27-linux" + install: - pip install tox diff --git a/appveyor.yml b/appveyor.yml index 25148a0e56..8ea423b8b8 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -31,6 +31,11 @@ environment: PYTHON_ARCH: "64" TOXENV: "py36-win" + - PYTHON: "C:\\Python27-x64" + PYTHON_VERSION: "2.7.17" + PYTHON_ARCH: "64" + TOXENV: "py27-win" + init: - "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%" - "ECHO \"%APPVEYOR_SCHEDULED_BUILD%\"" From 605f24a7ed82bab60394d24356caadb3ef2b508d Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Tue, 28 Apr 2020 13:24:56 +0900 Subject: [PATCH 3/3] fix utils.open invocation --- gensim/test/test_utils_any2vec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/test/test_utils_any2vec.py b/gensim/test/test_utils_any2vec.py index f4c5c2c430..ea0faff403 100644 --- a/gensim/test/test_utils_any2vec.py +++ b/gensim/test/test_utils_any2vec.py @@ -24,7 +24,7 @@ def save_dict_to_word2vec_formated_file(fname, word2vec_dict): - with gensim.utils.open(fname, "bw") as f: + with gensim.utils.open(fname, "wb") as f: num_words = len(word2vec_dict) vector_length = len(list(word2vec_dict.values())[0])