Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix CI issues #382

Merged
merged 5 commits into from
Oct 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ install:
# Replace dep1 dep2 ... with your dependencies
- conda env create --file=ci/environment-3.6.yml --name=dask-ml
- source activate dask-ml
- pip install pip --upgrade
- python -m pip install -e .

script:
Expand Down
1 change: 1 addition & 0 deletions ci/environment-2.7.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ dependencies:
- sphinx_rtd_theme
- sphinx-gallery
- tensorflow
- testpath<0.4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What prompted this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

@jrbourbeau jrbourbeau Oct 4, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Definitely just a quick fix until a new release is out, but it seems to be working for the moment

- tornado
- toolz
- xgboost
Expand Down
1 change: 1 addition & 0 deletions ci/environment-3.6.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ dependencies:
- sphinx_rtd_theme
- sphinx-gallery
- tensorflow
- testpath<0.4
- tornado
- toolz
- xgboost
Expand Down
1 change: 1 addition & 0 deletions ci/install-circle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,6 @@ conda config --add channels conda-forge
conda env create -f ci/environment-${PYTHON}.yml --name=${ENV_NAME} --quiet
conda env list
source activate ${ENV_NAME}
pip install pip --upgrade
pip install --no-deps --quiet -e .
conda list -n ${ENV_NAME}
2 changes: 1 addition & 1 deletion dask_ml/cluster/spectral.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def __init__(
eigen_solver=None,
random_state=None,
n_init=10,
gamma=1.,
gamma=1.0,
affinity="rbf",
n_neighbors=10,
eigen_tol=0.0,
Expand Down
8 changes: 4 additions & 4 deletions dask_ml/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ def _fit(self, X):
# Small problem, just call full PCA
if max(X.shape) <= 500:
solver = "full"
elif n_components >= 1 and n_components < .8 * min(X.shape):
elif n_components >= 1 and n_components < 0.8 * min(X.shape):
solver = "randomized"
# This is also the case of n_components in (0,1)
else:
Expand Down Expand Up @@ -281,7 +281,7 @@ def _fit(self, X):
else:
noise_variance = explained_variance[n_components:].mean()
else:
noise_variance = 0.
noise_variance = 0.0

(
self.n_samples_,
Expand Down Expand Up @@ -427,8 +427,8 @@ def score_samples(self, X):
Xr = X - self.mean_
n_features = X.shape[1]
precision = self.get_precision() # [n_features, n_features]
log_like = -.5 * (Xr * (da.dot(Xr, precision))).sum(axis=1)
log_like -= .5 * (n_features * da.log(2. * np.pi) - fast_logdet(precision))
log_like = -0.5 * (Xr * (da.dot(Xr, precision))).sum(axis=1)
log_like -= 0.5 * (n_features * da.log(2.0 * np.pi) - fast_logdet(precision))
return log_like

def score(self, X, y=None):
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/decomposition/truncated_svd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class TruncatedSVD(BaseEstimator, TransformerMixin):
def __init__(
self, n_components=2, algorithm="tsqr", n_iter=5, random_state=None, tol=0.
self, n_components=2, algorithm="tsqr", n_iter=5, random_state=None, tol=0.0
):
"""Dimensionality reduction using truncated SVD (aka LSA).

Expand Down
2 changes: 1 addition & 1 deletion dask_ml/linear_model/glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def predict(self, X):
C : array, shape = [n_samples,]
Predicted class labels for each sample
"""
return self.predict_proba(X) > .5 # TODO: verify, multiclass broken
return self.predict_proba(X) > 0.5 # TODO: verify, multiclass broken

def predict_proba(self, X):
"""Probability estimates for samples in X.
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/metrics/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def r2_score(
output_scores[valid_score] = 1 - (
numerator[valid_score] / denominator[valid_score]
)
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0

result = output_scores.mean(axis=0)
if compute:
Expand Down
8 changes: 4 additions & 4 deletions dask_ml/model_selection/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@ def transform(self, X):

def score(self, X=None, Y=None):
if self.foo_param > 1:
score = 1.
score = 1.0
else:
score = 0.
score = 0.0
return score

def get_params(self, deep=False):
Expand Down Expand Up @@ -184,7 +184,7 @@ def predict(self, T):

def score(self, X=None, Y=None):
if self.foo_param > 1:
score = 1.
score = 1.0
else:
score = 0.
score = 0.0
return score
2 changes: 1 addition & 1 deletion dask_ml/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def _joint_log_likelihood(self, X):
jll = []
for i in range(np.size(self.classes_)):
jointi = da.log(self.class_prior_[i])
n_ij = -0.5 * da.sum(da.log(2. * np.pi * self.sigma_[i, :]))
n_ij = -0.5 * da.sum(da.log(2.0 * np.pi * self.sigma_[i, :]))
n_ij -= 0.5 * da.sum(
((X - self.theta_[i, :]) ** 2) / (self.sigma_[i, :]), 1
)
Expand Down
2 changes: 1 addition & 1 deletion dask_ml/preprocessing/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def fit(self, X, y=None):
]
)

quantiles = [da.percentile(col, [q_min, 50., q_max]) for col in X.T]
quantiles = [da.percentile(col, [q_min, 50.0, q_max]) for col in X.T]
quantiles = da.vstack(quantiles).compute()
self.center_ = quantiles[:, 1]
self.scale_ = quantiles[:, 2] - quantiles[:, 0]
Expand Down
2 changes: 1 addition & 1 deletion docs/source/preprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ In this toy example, we use a dataset with two columns. ``'A'`` is numeric and
pipe = make_pipeline(
Categorizer(),
DummyEncoder(),
LogisticRegression()
LogisticRegression(solver='lbfgs')
)
pipe.fit(X, y)

Expand Down
2 changes: 0 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,3 @@ source=dask_ml
addopts = -rsx -v --durations=10
minversion = 3.2
xfail_strict = true
filterwarnings =
error:::sklearn[.*]
4 changes: 2 additions & 2 deletions tests/model_selection/dask_searchcv/test_model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def _start(self, dsk):
def test_visualize():
pytest.importorskip("graphviz")

X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0)
X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
clf = SVC(random_state=0, gamma="auto")
grid = {"C": [.1, .5, .9]}
grid = {"C": [0.1, 0.5, 0.9]}
gs = dcv.GridSearchCV(clf, grid).fit(X, y)

assert hasattr(gs, "dask_graph_")
Expand Down
32 changes: 17 additions & 15 deletions tests/model_selection/dask_searchcv/test_model_selection_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def test_grid_search_no_score():
# Test grid-search on classifier that has no score function.
clf = LinearSVC(random_state=0)
X, y = make_blobs(random_state=0, centers=2)
Cs = [.1, 1, 10]
Cs = [0.1, 1, 10]
clf_no_score = LinearSVCNoScore(random_state=0)

# XXX: It seems there's some global shared state in LinearSVC - fitting
Expand Down Expand Up @@ -152,9 +152,9 @@ def test_grid_search_no_score():


def test_grid_search_score_method():
X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0)
X, y = make_classification(n_samples=100, n_classes=2, flip_y=0.2, random_state=0)
clf = LinearSVC(random_state=0)
grid = {"C": [.1]}
grid = {"C": [0.1]}

search_no_scoring = dcv.GridSearchCV(clf, grid, scoring=None).fit(X, y)
search_accuracy = dcv.GridSearchCV(clf, grid, scoring="accuracy").fit(X, y)
Expand Down Expand Up @@ -260,7 +260,7 @@ def test_classes__property():
# Test that classes_ property matches best_estimator_.classes_
X = np.arange(100).reshape(10, 10)
y = np.array([0] * 5 + [1] * 5)
Cs = [.1, 1, 10]
Cs = [0.1, 1, 10]

grid_search = dcv.GridSearchCV(LinearSVC(random_state=0), {"C": Cs})
grid_search.fit(X, y)
Expand Down Expand Up @@ -418,7 +418,7 @@ def test_grid_search_sparse():
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator_.C

assert np.mean(y_pred == y_pred2) >= .9
assert np.mean(y_pred == y_pred2) >= 0.9
assert C == C2


Expand Down Expand Up @@ -611,14 +611,16 @@ def test_gridsearch_no_predict():
# test grid-search with an estimator without predict.
# slight duplication of a test from KDE
def custom_scoring(estimator, X):
return 42 if estimator.bandwidth == .1 else 0
return 42 if estimator.bandwidth == 0.1 else 0

X, _ = make_blobs(cluster_std=.1, random_state=1, centers=[[0, 1], [1, 0], [0, 0]])
X, _ = make_blobs(cluster_std=0.1, random_state=1, centers=[[0, 1], [1, 0], [0, 0]])
search = dcv.GridSearchCV(
KernelDensity(), param_grid=dict(bandwidth=[.01, .1, 1]), scoring=custom_scoring
KernelDensity(),
param_grid=dict(bandwidth=[0.01, 0.1, 1]),
scoring=custom_scoring,
)
search.fit(X)
assert search.best_params_["bandwidth"] == .1
assert search.best_params_["bandwidth"] == 0.1
assert search.best_score_ == 42


Expand Down Expand Up @@ -852,15 +854,15 @@ def test_search_iid_param():

# Test the first candidate
assert search.cv_results_["param_C"][0] == 1
assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
assert_array_almost_equal(test_cv_scores, [1, 1.0 / 3.0])
assert_array_almost_equal(train_cv_scores, [1, 1])

# for first split, 1/4 of dataset is in test, for second 3/4.
# take weighted average and weighted std
expected_test_mean = 1 * 1. / 4. + 1. / 3. * 3. / 4.
expected_test_mean = 1 * 1.0 / 4.0 + 1.0 / 3.0 * 3.0 / 4.0
expected_test_std = np.sqrt(
1. / 4 * (expected_test_mean - 1) ** 2
+ 3. / 4 * (expected_test_mean - 1. / 3.) ** 2
1.0 / 4 * (expected_test_mean - 1) ** 2
+ 3.0 / 4 * (expected_test_mean - 1.0 / 3.0) ** 2
)
assert_almost_equal(test_mean, expected_test_mean)
assert_almost_equal(test_std, expected_test_std)
Expand Down Expand Up @@ -911,7 +913,7 @@ def test_search_iid_param():

assert search.cv_results_["param_C"][0] == 1
# scores are the same as above
assert_array_almost_equal(test_cv_scores, [1, 1. / 3.])
assert_array_almost_equal(test_cv_scores, [1, 1.0 / 3.0])
# Unweighted mean/std is used
assert_almost_equal(test_mean, np.mean(test_cv_scores))
assert_almost_equal(test_std, np.std(test_cv_scores))
Expand Down Expand Up @@ -984,7 +986,7 @@ def test_grid_search_correct_score_results():
n_splits = 3
clf = LinearSVC(random_state=0)
X, y = make_blobs(random_state=0, centers=2)
Cs = [.1, 1, 10]
Cs = [0.1, 1, 10]
for score in ["f1", "roc_auc"]:
# XXX: It seems there's some global shared state in LinearSVC - fitting
# multiple `SVC` instances in parallel using threads sometimes results
Expand Down
2 changes: 1 addition & 1 deletion tests/model_selection/test_incremental.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_partial_fit_doesnt_mutate_inputs():
def test_explicit(c, s, a, b):
X, y = make_classification(n_samples=1000, n_features=10, chunks=(200, 10))
model = SGDClassifier(tol=1e-3, penalty="elasticnet")
params = [{"alpha": .1}, {"alpha": .2}]
params = [{"alpha": 0.1}, {"alpha": 0.2}]

def additional_calls(scores):
""" Progress through predefined updates, checking along the way """
Expand Down
2 changes: 1 addition & 1 deletion tests/preprocessing/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def test_basic(self):
X = rs.uniform(size=(100, 3), chunks=50)
a.fit(X)
b.fit(X)
assert_estimator_equal(a, b, atol=.02)
assert_estimator_equal(a, b, atol=0.02)

# set the quantiles, so that from here out, we're exact
a.quantiles_ = b.quantiles_
Expand Down
Loading