diff --git a/.gitignore b/.gitignore index 019e1812f7..8853bd683a 100644 --- a/.gitignore +++ b/.gitignore @@ -42,7 +42,6 @@ Thumbs.db # Other # ######### -.tox/ .cache/ .project .pydevproject diff --git a/.travis.yml b/.travis.yml index 11e06f2f63..b0b952766d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -26,7 +26,7 @@ env: # them here for now. They'll get picked up by the multibuild stuff # running in multibuild/common_utils.sh. # - - TEST_DEPENDS="pytest mock cython nmslib pyemd testfixtures Morfessor==2.0.2a4 python-levenshtein==0.12.0 visdom==0.1.8.9 scikit-learn" + - TEST_DEPENDS="pytest mock cython nmslib pyemd testfixtures python-levenshtein==0.12.0 visdom==0.1.8.9 scikit-learn" matrix: # diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0eeb90591b..09f2f5a870 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,11 +20,9 @@ Also, please check the [Gensim FAQ](https://github.com/RaRe-Technologies/gensim/ - For windows: `pip install -e .[test-win]` 5. Implement your changes 6. Check that everything's OK in your branch: - - Check it for PEP8: `tox -e flake8` - - Build its documentation (works only for MacOS/Linux): `tox -e docs` (documentation stored in `docs/src/_build`) - - Run unit tests: `tox -e py{version}-{os}`, for example `tox -e py35-linux` or `tox -e py36-win` where - - `{version}` is one of `35`, `36` - - `{os}` is either `win` or `linux` + - Check it for PEP8: `flake8 --ignore E12,W503 --max-line-length 120 --show-source gensim` + - Build its documentation (works only for MacOS/Linux): `make -C docs/src html` (documentation stored in `docs/src/_build`) + - Run unit tests: `pytest -v gensim/test` 7. Add files, commit and push: `git add ... ; git commit -m "my commit message"; git push origin my-feature` 8. [Create a PR](https://help.github.com/articles/creating-a-pull-request/) on Github. Write a **clear description** for your PR, including all the context and relevant information, such as: - The issue that you fixed, e.g. `Fixes #123` diff --git a/gensim/models/callbacks.py b/gensim/models/callbacks.py index 42f250cb91..c5560441af 100644 --- a/gensim/models/callbacks.py +++ b/gensim/models/callbacks.py @@ -234,9 +234,7 @@ def get_value(self, **kwargs): Key word arguments to override the object's internal attributes. One of the following parameters are expected: - * `model` - pre-trained topic model of type :class:`~gensim.models.ldamodel.LdaModel`, or one - of its wrappers, such as :class:`~gensim.models.wrappers.ldamallet.LdaMallet` or - :class:`~gensim.models.wrappers.ldavowpalwabbit.LdaVowpalWabbit`. + * `model` - pre-trained topic model of type :class:`~gensim.models.ldamodel.LdaModel`. * `topics` - list of tokenized topics. Returns @@ -290,10 +288,8 @@ def get_value(self, **kwargs): ---------- **kwargs Key word arguments to override the object's internal attributes. - A trained topic model is expected using the 'model' key. This can be of type - :class:`~gensim.models.ldamodel.LdaModel`, or one of its wrappers, such as - :class:`~gensim.models.wrappers.ldamallet.LdaMallet` or - :class:`~gensim.models.wrapper.ldavowpalwabbit.LdaVowpalWabbit`. + A trained topic model is expected using the 'model' key. + This must be of type :class:`~gensim.models.ldamodel.LdaModel`. Returns ------- @@ -354,8 +350,8 @@ def get_value(self, **kwargs): ---------- **kwargs Key word arguments to override the object's internal attributes. - Two models of type :class:`~gensim.models.ldamodelLdaModel` or its wrappers are expected using the keys - `model` and `other_model`. + Two models of type :class:`~gensim.models.ldamodelLdaModel` + are expected using the keys `model` and `other_model`. Returns ------- @@ -424,8 +420,8 @@ def get_value(self, **kwargs): ---------- **kwargs Key word arguments to override the object's internal attributes. - Two models of type :class:`~gensim.models.ldamodel.LdaModel` or its wrappers are expected using the keys - `model` and `other_model`. + Two models of type :class:`~gensim.models.ldamodel.LdaModel` + are expected using the keys `model` and `other_model`. Returns ------- diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py index b3c89640a7..d6df976153 100644 --- a/gensim/models/coherencemodel.py +++ b/gensim/models/coherencemodel.py @@ -132,8 +132,7 @@ def __init__(self, model=None, topics=None, texts=None, corpus=None, dictionary= model : :class:`~gensim.models.basemodel.BaseTopicModel`, optional Pre-trained topic model, should be provided if topics is not provided. Currently supports :class:`~gensim.models.ldamodel.LdaModel`, - :class:`~gensim.models.ldamulticore.LdaMulticore`, :class:`~gensim.models.wrappers.ldamallet.LdaMallet` and - :class:`~gensim.models.wrappers.ldavowpalwabbit.LdaVowpalWabbit`. + :class:`~gensim.models.ldamulticore.LdaMulticore`. Use `topics` parameter to plug in an as yet unsupported model. topics : list of list of str, optional List of tokenized topics, if this is preferred over model - dictionary should be provided. diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index 8f86b807f2..6fb9e329d8 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -9,8 +9,7 @@ and various similarity look-ups. Since trained word vectors are independent from the way they were trained (:class:`~gensim.models.word2vec.Word2Vec`, -:class:`~gensim.models.fasttext.FastText`, -:class:`~gensim.models.wrappers.varembed.VarEmbed` etc), they can be represented by a standalone structure, +:class:`~gensim.models.fasttext.FastText` etc), they can be represented by a standalone structure, as implemented in this module. The structure is called "KeyedVectors" and is essentially a mapping between *keys* diff --git a/gensim/test/test_dtm.py b/gensim/test/test_dtm.py deleted file mode 100644 index 0e57d15e7e..0000000000 --- a/gensim/test/test_dtm.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -""" -Automated tests for DTM/DIM model -""" - - -import logging -from subprocess import CalledProcessError -import gensim -import os -import unittest -from gensim import corpora -from gensim.test.utils import datapath - - -class TestDtmModel(unittest.TestCase): - - def setUp(self): - self.time_slices = [3, 7] - self.corpus = corpora.mmcorpus.MmCorpus(datapath('dtm_test.mm')) - self.id2word = corpora.Dictionary.load(datapath('dtm_test.dict')) - # first you need to setup the environment variable $DTM_PATH for the dtm executable file - self.dtm_path = os.environ.get('DTM_PATH', None) - if not self.dtm_path: - self.skipTest("$DTM_PATH is not properly set up.") - - def test_dtm(self): - if self.dtm_path is not None: - model = gensim.models.wrappers.DtmModel( - self.dtm_path, self.corpus, self.time_slices, num_topics=2, - id2word=self.id2word, model='dtm', initialize_lda=True, - rng_seed=1 - ) - topics = model.show_topics(num_topics=2, times=2, num_words=10) - self.assertEqual(len(topics), 4) - - one_topic = model.show_topic(topicid=1, time=1, topn=10) - self.assertEqual(len(one_topic), 10) - self.assertEqual(one_topic[0][1], u'idexx') - - def test_dim(self): - if self.dtm_path is not None: - model = gensim.models.wrappers.DtmModel( - self.dtm_path, self.corpus, self.time_slices, num_topics=2, - id2word=self.id2word, model='fixed', initialize_lda=True, - rng_seed=1 - ) - topics = model.show_topics(num_topics=2, times=2, num_words=10) - self.assertEqual(len(topics), 4) - - one_topic = model.show_topic(topicid=1, time=1, topn=10) - self.assertEqual(len(one_topic), 10) - self.assertEqual(one_topic[0][1], u'skills') - - # In stderr expect "Error opening file /tmp/a65419_train_out/initial-lda-ss.dat. Failing." - def test_called_process_error(self): - if self.dtm_path is not None: - with self.assertRaises(CalledProcessError): - gensim.models.wrappers.DtmModel( - self.dtm_path, self.corpus, self.time_slices, num_topics=2, - id2word=self.id2word, model='dtm', initialize_lda=False, - rng_seed=1 - ) - - -if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) - unittest.main() diff --git a/gensim/utils.py b/gensim/utils.py index 78d64b88e6..0619296888 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1863,7 +1863,7 @@ def keep_vocab_item(word, count, min_count, trim_rule=None): def check_output(stdout=subprocess.PIPE, *popenargs, **kwargs): r"""Run OS command with the given arguments and return its output as a byte string. - Backported from Python 2.7 with a few minor modifications. Widely used for :mod:`gensim.models.wrappers`. + Backported from Python 2.7 with a few minor modifications. Used in word2vec/glove2word2vec tests. Behaves very similar to https://docs.python.org/2/library/subprocess.html#subprocess.check_output. Examples diff --git a/release/upload_docs.sh b/release/upload_docs.sh index d454eaa157..3bec935a8e 100644 --- a/release/upload_docs.sh +++ b/release/upload_docs.sh @@ -1,3 +1,4 @@ -tox -e compile,docs +python setup.py build_ext --inplace cd docs/src +make html make upload