Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix issues of flake8==3.7.1 #2365

Merged
merged 2 commits into from
Jan 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions gensim/models/deprecated/fasttext.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,9 +601,9 @@ def train(self, sentences, total_examples=None, total_words=None,
"""
self.neg_labels = []
if self.negative > 0:
# precompute negative labels optimization for pure-python training
self.neg_labels = zeros(self.negative + 1)
self.neg_labels[0] = 1.
# precompute negative labels optimization for pure-python training
self.neg_labels = zeros(self.negative + 1)
self.neg_labels[0] = 1.

Word2Vec.train(
self, sentences, total_examples=self.corpus_count, epochs=self.iter,
Expand Down
3 changes: 2 additions & 1 deletion gensim/models/ldaseqmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1635,6 +1635,7 @@ def df_obs(x, *args):
if model == "DTM":
deriv = sslm.compute_obs_deriv(word, word_counts, totals, mean_deriv_mtx, deriv)
elif model == "DIM":
deriv = sslm.compute_obs_deriv_fixed(p.word, p.word_counts, p.totals, p.sslm, p.mean_deriv_mtx, deriv) # noqa:F821
deriv = sslm.compute_obs_deriv_fixed(
p.word, p.word_counts, p.totals, p.sslm, p.mean_deriv_mtx, deriv) # noqa:F821

return np.negative(deriv)
82 changes: 41 additions & 41 deletions gensim/models/utils_any2vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,48 +247,48 @@ def ft_ngram_hashes(word, minn, maxn, num_buckets, fb_compatible=True):


def _save_word2vec_format(fname, vocab, vectors, fvocab=None, binary=False, total_vec=None):
"""Store the input-hidden weight matrix in the same format used by the original
C word2vec-tool, for compatibility.

Parameters
----------
fname : str
The file path used to save the vectors in.
vocab : dict
The vocabulary of words.
vectors : numpy.array
The vectors to be stored.
fvocab : str, optional
File path used to save the vocabulary.
binary : bool, optional
If True, the data wil be saved in binary word2vec format, else it will be saved in plain text.
total_vec : int, optional
Explicitly specify total number of vectors
(in case word vectors are appended with document vectors afterwards).

"""
if not (vocab or vectors):
raise RuntimeError("no input")
if total_vec is None:
total_vec = len(vocab)
vector_size = vectors.shape[1]
if fvocab is not None:
logger.info("storing vocabulary in %s", fvocab)
with utils.smart_open(fvocab, 'wb') as vout:
for word, vocab_ in sorted(iteritems(vocab), key=lambda item: -item[1].count):
vout.write(utils.to_utf8("%s %s\n" % (word, vocab_.count)))
logger.info("storing %sx%s projection weights into %s", total_vec, vector_size, fname)
assert (len(vocab), vector_size) == vectors.shape
with utils.smart_open(fname, 'wb') as fout:
fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
# store in sorted order: most frequent words at the top
"""Store the input-hidden weight matrix in the same format used by the original
C word2vec-tool, for compatibility.

Parameters
----------
fname : str
The file path used to save the vectors in.
vocab : dict
The vocabulary of words.
vectors : numpy.array
The vectors to be stored.
fvocab : str, optional
File path used to save the vocabulary.
binary : bool, optional
If True, the data wil be saved in binary word2vec format, else it will be saved in plain text.
total_vec : int, optional
Explicitly specify total number of vectors
(in case word vectors are appended with document vectors afterwards).

"""
if not (vocab or vectors):
raise RuntimeError("no input")
if total_vec is None:
total_vec = len(vocab)
vector_size = vectors.shape[1]
if fvocab is not None:
logger.info("storing vocabulary in %s", fvocab)
with utils.smart_open(fvocab, 'wb') as vout:
for word, vocab_ in sorted(iteritems(vocab), key=lambda item: -item[1].count):
row = vectors[vocab_.index]
if binary:
row = row.astype(REAL)
fout.write(utils.to_utf8(word) + b" " + row.tostring())
else:
fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join(repr(val) for val in row))))
vout.write(utils.to_utf8("%s %s\n" % (word, vocab_.count)))
logger.info("storing %sx%s projection weights into %s", total_vec, vector_size, fname)
assert (len(vocab), vector_size) == vectors.shape
with utils.smart_open(fname, 'wb') as fout:
fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
# store in sorted order: most frequent words at the top
for word, vocab_ in sorted(iteritems(vocab), key=lambda item: -item[1].count):
row = vectors[vocab_.index]
if binary:
row = row.astype(REAL)
fout.write(utils.to_utf8(word) + b" " + row.tostring())
else:
fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join(repr(val) for val in row))))


def _load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
Expand Down
13 changes: 11 additions & 2 deletions gensim/similarities/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,15 @@
"""

# bring classes directly into package namespace, to save some typing
from .docsim import Similarity, MatrixSimilarity, SparseMatrixSimilarity, SoftCosineSimilarity, WmdSimilarity # noqa:F401
from .termsim import TermSimilarityIndex, UniformTermSimilarityIndex, SparseTermSimilarityMatrix # noqa:F401

from .docsim import ( # noqa:F401
Similarity,
MatrixSimilarity,
SparseMatrixSimilarity,
SoftCosineSimilarity,
WmdSimilarity)
from .termsim import ( # noqa:F401
TermSimilarityIndex,
UniformTermSimilarityIndex,
SparseTermSimilarityMatrix)
from .levenshtein import LevenshteinSimilarityIndex # noqa:F401
2 changes: 1 addition & 1 deletion gensim/test/test_ldaseqmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


class TestLdaSeq(unittest.TestCase):
# we are setting up a DTM model and fitting it, and checking topic-word and doc-topic results.
# we are setting up a DTM model and fitting it, and checking topic-word and doc-topic results.
def setUp(self):
texts = [
[u'senior', u'studios', u'studios', u'studios', u'creators', u'award', u'mobile', u'currently',
Expand Down