Skip to content

Commit

Permalink
Fix CI issues (#382)
Browse files Browse the repository at this point in the history
* Adds pip upgrade to CI

* Set max version number for testpath

* Format with new release 18.9b0 of black

* Add LogisticRegression solver to fix docs build

* Removes filterwarnings from setup.cfg
  • Loading branch information
jrbourbeau authored and TomAugspurger committed Oct 4, 2018
1 parent 3260747 commit 61e8786
Show file tree
Hide file tree
Showing 19 changed files with 56 additions and 52 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ install:
# Replace dep1 dep2 ... with your dependencies
- conda env create --file=ci/environment-3.6.yml --name=dask-ml
- source activate dask-ml
- pip install pip --upgrade
- python -m pip install -e .

script:
Expand Down
1 change: 1 addition & 0 deletions ci/environment-2.7.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies:
- sphinx_rtd_theme
- sphinx-gallery
- tensorflow
- testpath<0.4
- tornado
- toolz
- xgboost
Expand Down
1 change: 1 addition & 0 deletions ci/environment-3.6.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ dependencies:
- sphinx_rtd_theme
- sphinx-gallery
- tensorflow
- testpath<0.4
- tornado
- toolz
- xgboost
Expand Down
1 change: 1 addition & 0 deletions ci/install-circle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ conda config --add channels conda-forge
conda env create -f ci/environment-${PYTHON}.yml --name=${ENV_NAME} --quiet
conda env list
source activate ${ENV_NAME}
pip install pip --upgrade
pip install --no-deps --quiet -e .
conda list -n ${ENV_NAME}
2 changes: 1 addition & 1 deletion dask_ml/cluster/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def __init__(
eigen_solver=None,
random_state=None,
n_init=10,
gamma=1.,
gamma=1.0,
affinity="rbf",
n_neighbors=10,
eigen_tol=0.0,
Expand Down
8 changes: 4 additions & 4 deletions dask_ml/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def _fit(self, X):
# Small problem, just call full PCA
if max(X.shape) <= 500:
solver = "full"
elif n_components >= 1 and n_components < .8 * min(X.shape):
elif n_components >= 1 and n_components < 0.8 * min(X.shape):
solver = "randomized"
# This is also the case of n_components in (0,1)
else:
Expand Down Expand Up @@ -281,7 +281,7 @@ def _fit(self, X):
else:
noise_variance = explained_variance[n_components:].mean()
else:
noise_variance = 0.
noise_variance = 0.0

(
self.n_samples_,
Expand Down Expand Up @@ -427,8 +427,8 @@ def score_samples(self, X):
Xr = X - self.mean_
n_features = X.shape[1]
precision = self.get_precision() # [n_features, n_features]
log_like = -.5 * (Xr * (da.dot(Xr, precision))).sum(axis=1)
log_like -= .5 * (n_features * da.log(2. * np.pi) - fast_logdet(precision))
log_like = -0.5 * (Xr * (da.dot(Xr, precision))).sum(axis=1)
log_like -= 0.5 * (n_features * da.log(2.0 * np.pi) - fast_logdet(precision))
return log_like

def score(self, X, y=None):
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/decomposition/truncated_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class TruncatedSVD(BaseEstimator, TransformerMixin):
def __init__(
self, n_components=2, algorithm="tsqr", n_iter=5, random_state=None, tol=0.
self, n_components=2, algorithm="tsqr", n_iter=5, random_state=None, tol=0.0
):
"""Dimensionality reduction using truncated SVD (aka LSA).
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/linear_model/glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def predict(self, X):
C : array, shape = [n_samples,]
Predicted class labels for each sample
"""
return self.predict_proba(X) > .5 # TODO: verify, multiclass broken
return self.predict_proba(X) > 0.5 # TODO: verify, multiclass broken

def predict_proba(self, X):
"""Probability estimates for samples in X.
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def r2_score(
output_scores[valid_score] = 1 - (
numerator[valid_score] / denominator[valid_score]
)
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0

result = output_scores.mean(axis=0)
if compute:
Expand Down
8 changes: 4 additions & 4 deletions dask_ml/model_selection/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def transform(self, X):

def score(self, X=None, Y=None):
if self.foo_param > 1:
score = 1.
score = 1.0
else:
score = 0.
score = 0.0
return score

def get_params(self, deep=False):
Expand Down Expand Up @@ -184,7 +184,7 @@ def predict(self, T):

def score(self, X=None, Y=None):
if self.foo_param > 1:
score = 1.
score = 1.0
else:
score = 0.
score = 0.0
return score
2 changes: 1 addition & 1 deletion dask_ml/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def _joint_log_likelihood(self, X):
jll = []
for i in range(np.size(self.classes_)):
jointi = da.log(self.class_prior_[i])
n_ij = -0.5 * da.sum(da.log(2. * np.pi * self.sigma_[i, :]))
n_ij = -0.5 * da.sum(da.log(2.0 * np.pi * self.sigma_[i, :]))
n_ij -= 0.5 * da.sum(
((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1
)
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/preprocessing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def fit(self, X, y=None):
]
)

quantiles = [da.percentile(col, [q_min, 50., q_max]) for col in X.T]
quantiles = [da.percentile(col, [q_min, 50.0, q_max]) for col in X.T]
quantiles = da.vstack(quantiles).compute()
self.center_ = quantiles[:, 1]
self.scale_ = quantiles[:, 2] - quantiles[:, 0]
Expand Down
2 changes: 1 addition & 1 deletion docs/source/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ In this toy example, we use a dataset with two columns. ``'A'`` is numeric and
pipe = make_pipeline(
Categorizer(),
DummyEncoder(),
LogisticRegression()
LogisticRegression(solver='lbfgs')
)
pipe.fit(X, y)
Expand Down
2 changes: 0 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,3 @@ source=dask_ml
addopts = -rsx -v --durations=10
minversion = 3.2
xfail_strict = true
filterwarnings =
error:::sklearn[.*]
4 changes: 2 additions & 2 deletions tests/model_selection/dask_searchcv/test_model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def _start(self, dsk):
def test_visualize():
pytest.importorskip("graphviz")

X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0)
X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
clf = SVC(random_state=0, gamma="auto")
grid = {"C": [.1, .5, .9]}
grid = {"C": [0.1, 0.5, 0.9]}
gs = dcv.GridSearchCV(clf, grid).fit(X, y)

assert hasattr(gs, "dask_graph_")
Expand Down
32 changes: 17 additions & 15 deletions tests/model_selection/dask_searchcv/test_model_selection_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def test_grid_search_no_score():
# Test grid-search on classifier that has no score function.
clf = LinearSVC(random_state=0)
X, y = make_blobs(random_state=0, centers=2)
Cs = [.1, 1, 10]
Cs = [0.1, 1, 10]
clf_no_score = LinearSVCNoScore(random_state=0)

# XXX: It seems there's some global shared state in LinearSVC - fitting
Expand Down Expand Up @@ -152,9 +152,9 @@ def test_grid_search_no_score():


def test_grid_search_score_method():
X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0)
X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
clf = LinearSVC(random_state=0)
grid = {"C": [.1]}
grid = {"C": [0.1]}

search_no_scoring = dcv.GridSearchCV(clf, grid, scoring=None).fit(X, y)
search_accuracy = dcv.GridSearchCV(clf, grid, scoring="accuracy").fit(X, y)
Expand Down Expand Up @@ -260,7 +260,7 @@ def test_classes__property():
# Test that classes_ property matches best_estimator_.classes_
X = np.arange(100).reshape(10, 10)
y = np.array([0] * 5 + [1] * 5)
Cs = [.1, 1, 10]
Cs = [0.1, 1, 10]

grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {"C": Cs})
grid_search.fit(X, y)
Expand Down Expand Up @@ -418,7 +418,7 @@ def test_grid_search_sparse():
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator_.C

assert np.mean(y_pred == y_pred2) >= .9
assert np.mean(y_pred == y_pred2) >= 0.9
assert C == C2


Expand Down Expand Up @@ -611,14 +611,16 @@ def test_gridsearch_no_predict():
# test grid-search with an estimator without predict.
# slight duplication of a test from KDE
def custom_scoring(estimator, X):
return 42 if estimator.bandwidth == .1 else 0
return 42 if estimator.bandwidth == 0.1 else 0

X, _ = make_blobs(cluster_std=.1, random_state=1, centers=[[0, 1], [1, 0], [0, 0]])
X, _ = make_blobs(cluster_std=0.1, random_state=1, centers=[[0, 1], [1, 0], [0, 0]])
search = dcv.GridSearchCV(
KernelDensity(), param_grid=dict(bandwidth=[.01, .1, 1]), scoring=custom_scoring
KernelDensity(),
param_grid=dict(bandwidth=[0.01, 0.1, 1]),
scoring=custom_scoring,
)
search.fit(X)
assert search.best_params_["bandwidth"] == .1
assert search.best_params_["bandwidth"] == 0.1
assert search.best_score_ == 42


Expand Down Expand Up @@ -852,15 +854,15 @@ def test_search_iid_param():

# Test the first candidate
assert search.cv_results_["param_C"][0] == 1
assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
assert_array_almost_equal(test_cv_scores, [1, 1.0 / 3.0])
assert_array_almost_equal(train_cv_scores, [1, 1])

# for first split, 1/4 of dataset is in test, for second 3/4.
# take weighted average and weighted std
expected_test_mean = 1 * 1. / 4. + 1. / 3. * 3. / 4.
expected_test_mean = 1 * 1.0 / 4.0 + 1.0 / 3.0 * 3.0 / 4.0
expected_test_std = np.sqrt(
1. / 4 * (expected_test_mean - 1) ** 2
+ 3. / 4 * (expected_test_mean - 1. / 3.) ** 2
1.0 / 4 * (expected_test_mean - 1) ** 2
+ 3.0 / 4 * (expected_test_mean - 1.0 / 3.0) ** 2
)
assert_almost_equal(test_mean, expected_test_mean)
assert_almost_equal(test_std, expected_test_std)
Expand Down Expand Up @@ -911,7 +913,7 @@ def test_search_iid_param():

assert search.cv_results_["param_C"][0] == 1
# scores are the same as above
assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
assert_array_almost_equal(test_cv_scores, [1, 1.0 / 3.0])
# Unweighted mean/std is used
assert_almost_equal(test_mean, np.mean(test_cv_scores))
assert_almost_equal(test_std, np.std(test_cv_scores))
Expand Down Expand Up @@ -984,7 +986,7 @@ def test_grid_search_correct_score_results():
n_splits = 3
clf = LinearSVC(random_state=0)
X, y = make_blobs(random_state=0, centers=2)
Cs = [.1, 1, 10]
Cs = [0.1, 1, 10]
for score in ["f1", "roc_auc"]:
# XXX: It seems there's some global shared state in LinearSVC - fitting
# multiple `SVC` instances in parallel using threads sometimes results
Expand Down
2 changes: 1 addition & 1 deletion tests/model_selection/test_incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_partial_fit_doesnt_mutate_inputs():
def test_explicit(c, s, a, b):
X, y = make_classification(n_samples=1000, n_features=10, chunks=(200, 10))
model = SGDClassifier(tol=1e-3, penalty="elasticnet")
params = [{"alpha": .1}, {"alpha": .2}]
params = [{"alpha": 0.1}, {"alpha": 0.2}]

def additional_calls(scores):
""" Progress through predefined updates, checking along the way """
Expand Down
2 changes: 1 addition & 1 deletion tests/preprocessing/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_basic(self):
X = rs.uniform(size=(100, 3), chunks=50)
a.fit(X)
b.fit(X)
assert_estimator_equal(a, b, atol=.02)
assert_estimator_equal(a, b, atol=0.02)

# set the quantiles, so that from here out, we're exact
a.quantiles_ = b.quantiles_
Expand Down
Loading

0 comments on commit 61e8786

Please sign in to comment.