dask · TomAugspurger · Oct 4, 2018 · Oct 4, 2018 · Oct 4, 2018 · Oct 4, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -23,6 +23,7 @@ install:
   # Replace dep1 dep2 ... with your dependencies
   - conda env create --file=ci/environment-3.6.yml --name=dask-ml
   - source activate dask-ml
+  - pip install pip --upgrade
   - python -m pip install -e .
 
 script:

diff --git a/ci/environment-2.7.yml b/ci/environment-2.7.yml
@@ -29,6 +29,7 @@ dependencies:
   - sphinx_rtd_theme
   - sphinx-gallery
   - tensorflow
+  - testpath<0.4
   - tornado
   - toolz
   - xgboost

diff --git a/ci/environment-3.6.yml b/ci/environment-3.6.yml
@@ -37,6 +37,7 @@ dependencies:
   - sphinx_rtd_theme
   - sphinx-gallery
   - tensorflow
+  - testpath<0.4
   - tornado
   - toolz
   - xgboost

diff --git a/ci/install-circle.sh b/ci/install-circle.sh
@@ -8,5 +8,6 @@ conda config --add channels conda-forge
 conda env create -f ci/environment-${PYTHON}.yml --name=${ENV_NAME} --quiet
 conda env list
 source activate ${ENV_NAME}
+pip install pip --upgrade
 pip install --no-deps --quiet -e .
 conda list -n ${ENV_NAME}
diff --git a/dask_ml/cluster/spectral.py b/dask_ml/cluster/spectral.py
@@ -142,7 +142,7 @@ def __init__(
         eigen_solver=None,
         random_state=None,
         n_init=10,
-        gamma=1.,
+        gamma=1.0,
         affinity="rbf",
         n_neighbors=10,
         eigen_tol=0.0,

diff --git a/dask_ml/decomposition/pca.py b/dask_ml/decomposition/pca.py
@@ -211,7 +211,7 @@ def _fit(self, X):
             # Small problem, just call full PCA
             if max(X.shape) <= 500:
                 solver = "full"
-            elif n_components >= 1 and n_components < .8 * min(X.shape):
+            elif n_components >= 1 and n_components < 0.8 * min(X.shape):
                 solver = "randomized"
             # This is also the case of n_components in (0,1)
             else:
@@ -281,7 +281,7 @@ def _fit(self, X):
             else:
                 noise_variance = explained_variance[n_components:].mean()
         else:
-            noise_variance = 0.
+            noise_variance = 0.0
 
         (
             self.n_samples_,
@@ -427,8 +427,8 @@ def score_samples(self, X):
         Xr = X - self.mean_
         n_features = X.shape[1]
         precision = self.get_precision()  # [n_features, n_features]
-        log_like = -.5 * (Xr * (da.dot(Xr, precision))).sum(axis=1)
-        log_like -= .5 * (n_features * da.log(2. * np.pi) - fast_logdet(precision))
+        log_like = -0.5 * (Xr * (da.dot(Xr, precision))).sum(axis=1)
+        log_like -= 0.5 * (n_features * da.log(2.0 * np.pi) - fast_logdet(precision))
         return log_like
 
     def score(self, X, y=None):

diff --git a/dask_ml/decomposition/truncated_svd.py b/dask_ml/decomposition/truncated_svd.py
@@ -7,7 +7,7 @@
 
 class TruncatedSVD(BaseEstimator, TransformerMixin):
     def __init__(
-        self, n_components=2, algorithm="tsqr", n_iter=5, random_state=None, tol=0.
+        self, n_components=2, algorithm="tsqr", n_iter=5, random_state=None, tol=0.0
     ):
         """Dimensionality reduction using truncated SVD (aka LSA).
 

diff --git a/dask_ml/linear_model/glm.py b/dask_ml/linear_model/glm.py
@@ -228,7 +228,7 @@ def predict(self, X):
         C : array, shape = [n_samples,]
             Predicted class labels for each sample
         """
-        return self.predict_proba(X) > .5  # TODO: verify, multiclass broken
+        return self.predict_proba(X) > 0.5  # TODO: verify, multiclass broken
 
     def predict_proba(self, X):
         """Probability estimates for samples in X.

diff --git a/dask_ml/metrics/regression.py b/dask_ml/metrics/regression.py
@@ -84,7 +84,7 @@ def r2_score(
         output_scores[valid_score] = 1 - (
             numerator[valid_score] / denominator[valid_score]
         )
-        output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
+        output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0
 
     result = output_scores.mean(axis=0)
     if compute:

diff --git a/dask_ml/model_selection/utils_test.py b/dask_ml/model_selection/utils_test.py
@@ -29,9 +29,9 @@ def transform(self, X):
 
     def score(self, X=None, Y=None):
         if self.foo_param > 1:
-            score = 1.
+            score = 1.0
         else:
-            score = 0.
+            score = 0.0
         return score
 
     def get_params(self, deep=False):
@@ -184,7 +184,7 @@ def predict(self, T):
 
     def score(self, X=None, Y=None):
         if self.foo_param > 1:
-            score = 1.
+            score = 1.0
         else:
-            score = 0.
+            score = 0.0
         return score
diff --git a/dask_ml/naive_bayes.py b/dask_ml/naive_bayes.py
@@ -112,7 +112,7 @@ def _joint_log_likelihood(self, X):
         jll = []
         for i in range(np.size(self.classes_)):
             jointi = da.log(self.class_prior_[i])
-            n_ij = -0.5 * da.sum(da.log(2. * np.pi * self.sigma_[i, :]))
+            n_ij = -0.5 * da.sum(da.log(2.0 * np.pi * self.sigma_[i, :]))
             n_ij -= 0.5 * da.sum(
                 ((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1
             )

diff --git a/dask_ml/preprocessing/data.py b/dask_ml/preprocessing/data.py
@@ -156,7 +156,7 @@ def fit(self, X, y=None):
                 ]
             )
 
-        quantiles = [da.percentile(col, [q_min, 50., q_max]) for col in X.T]
+        quantiles = [da.percentile(col, [q_min, 50.0, q_max]) for col in X.T]
         quantiles = da.vstack(quantiles).compute()
         self.center_ = quantiles[:, 1]
         self.scale_ = quantiles[:, 2] - quantiles[:, 0]

diff --git a/docs/source/preprocessing.rst b/docs/source/preprocessing.rst
@@ -167,7 +167,7 @@ In this toy example, we use a dataset with two columns. ``'A'`` is numeric and
    pipe = make_pipeline(
       Categorizer(),
       DummyEncoder(),
-      LogisticRegression()
+      LogisticRegression(solver='lbfgs')
    )
    pipe.fit(X, y)
 

diff --git a/setup.cfg b/setup.cfg
@@ -30,5 +30,3 @@ source=dask_ml
 addopts = -rsx -v --durations=10
 minversion = 3.2
 xfail_strict = true
-filterwarnings =
-    error:::sklearn[.*]
diff --git a/tests/model_selection/dask_searchcv/test_model_selection.py b/tests/model_selection/dask_searchcv/test_model_selection.py
@@ -84,9 +84,9 @@ def _start(self, dsk):
 def test_visualize():
     pytest.importorskip("graphviz")
 
-    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0)
+    X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
     clf = SVC(random_state=0, gamma="auto")
-    grid = {"C": [.1, .5, .9]}
+    grid = {"C": [0.1, 0.5, 0.9]}
     gs = dcv.GridSearchCV(clf, grid).fit(X, y)
 
     assert hasattr(gs, "dask_graph_")

diff --git a/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py b/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py
@@ -123,7 +123,7 @@ def test_grid_search_no_score():
     # Test grid-search on classifier that has no score function.
     clf = LinearSVC(random_state=0)
     X, y = make_blobs(random_state=0, centers=2)
-    Cs = [.1, 1, 10]
+    Cs = [0.1, 1, 10]
     clf_no_score = LinearSVCNoScore(random_state=0)
 
     # XXX: It seems there's some global shared state in LinearSVC - fitting
@@ -152,9 +152,9 @@ def test_grid_search_no_score():
 
 
 def test_grid_search_score_method():
-    X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0)
+    X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
     clf = LinearSVC(random_state=0)
-    grid = {"C": [.1]}
+    grid = {"C": [0.1]}
 
     search_no_scoring = dcv.GridSearchCV(clf, grid, scoring=None).fit(X, y)
     search_accuracy = dcv.GridSearchCV(clf, grid, scoring="accuracy").fit(X, y)
@@ -260,7 +260,7 @@ def test_classes__property():
     # Test that classes_ property matches best_estimator_.classes_
     X = np.arange(100).reshape(10, 10)
     y = np.array([0] * 5 + [1] * 5)
-    Cs = [.1, 1, 10]
+    Cs = [0.1, 1, 10]
 
     grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {"C": Cs})
     grid_search.fit(X, y)
@@ -418,7 +418,7 @@ def test_grid_search_sparse():
     y_pred2 = cv.predict(X_[180:])
     C2 = cv.best_estimator_.C
 
-    assert np.mean(y_pred == y_pred2) >= .9
+    assert np.mean(y_pred == y_pred2) >= 0.9
     assert C == C2
 
 
@@ -611,14 +611,16 @@ def test_gridsearch_no_predict():
     # test grid-search with an estimator without predict.
     # slight duplication of a test from KDE
     def custom_scoring(estimator, X):
-        return 42 if estimator.bandwidth == .1 else 0
+        return 42 if estimator.bandwidth == 0.1 else 0
 
-    X, _ = make_blobs(cluster_std=.1, random_state=1, centers=[[0, 1], [1, 0], [0, 0]])
+    X, _ = make_blobs(cluster_std=0.1, random_state=1, centers=[[0, 1], [1, 0], [0, 0]])
     search = dcv.GridSearchCV(
-        KernelDensity(), param_grid=dict(bandwidth=[.01, .1, 1]), scoring=custom_scoring
+        KernelDensity(),
+        param_grid=dict(bandwidth=[0.01, 0.1, 1]),
+        scoring=custom_scoring,
     )
     search.fit(X)
-    assert search.best_params_["bandwidth"] == .1
+    assert search.best_params_["bandwidth"] == 0.1
     assert search.best_score_ == 42
 
 
@@ -852,15 +854,15 @@ def test_search_iid_param():
 
         # Test the first candidate
         assert search.cv_results_["param_C"][0] == 1
-        assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
+        assert_array_almost_equal(test_cv_scores, [1, 1.0 / 3.0])
         assert_array_almost_equal(train_cv_scores, [1, 1])
 
         # for first split, 1/4 of dataset is in test, for second 3/4.
         # take weighted average and weighted std
-        expected_test_mean = 1 * 1. / 4. + 1. / 3. * 3. / 4.
+        expected_test_mean = 1 * 1.0 / 4.0 + 1.0 / 3.0 * 3.0 / 4.0
         expected_test_std = np.sqrt(
-            1. / 4 * (expected_test_mean - 1) ** 2
-            + 3. / 4 * (expected_test_mean - 1. / 3.) ** 2
+            1.0 / 4 * (expected_test_mean - 1) ** 2
+            + 3.0 / 4 * (expected_test_mean - 1.0 / 3.0) ** 2
         )
         assert_almost_equal(test_mean, expected_test_mean)
         assert_almost_equal(test_std, expected_test_std)
@@ -911,7 +913,7 @@ def test_search_iid_param():
 
         assert search.cv_results_["param_C"][0] == 1
         # scores are the same as above
-        assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
+        assert_array_almost_equal(test_cv_scores, [1, 1.0 / 3.0])
         # Unweighted mean/std is used
         assert_almost_equal(test_mean, np.mean(test_cv_scores))
         assert_almost_equal(test_std, np.std(test_cv_scores))
@@ -984,7 +986,7 @@ def test_grid_search_correct_score_results():
     n_splits = 3
     clf = LinearSVC(random_state=0)
     X, y = make_blobs(random_state=0, centers=2)
-    Cs = [.1, 1, 10]
+    Cs = [0.1, 1, 10]
     for score in ["f1", "roc_auc"]:
         # XXX: It seems there's some global shared state in LinearSVC - fitting
         # multiple `SVC` instances in parallel using threads sometimes results

diff --git a/tests/model_selection/test_incremental.py b/tests/model_selection/test_incremental.py
@@ -131,7 +131,7 @@ def test_partial_fit_doesnt_mutate_inputs():
 def test_explicit(c, s, a, b):
     X, y = make_classification(n_samples=1000, n_features=10, chunks=(200, 10))
     model = SGDClassifier(tol=1e-3, penalty="elasticnet")
-    params = [{"alpha": .1}, {"alpha": .2}]
+    params = [{"alpha": 0.1}, {"alpha": 0.2}]
 
     def additional_calls(scores):
         """ Progress through predefined updates, checking along the way """

diff --git a/tests/preprocessing/test_data.py b/tests/preprocessing/test_data.py
@@ -216,7 +216,7 @@ def test_basic(self):
         X = rs.uniform(size=(100, 3), chunks=50)
         a.fit(X)
         b.fit(X)
-        assert_estimator_equal(a, b, atol=.02)
+        assert_estimator_equal(a, b, atol=0.02)
 
         # set the quantiles, so that from here out, we're exact
         a.quantiles_ = b.quantiles_