Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@gen_cluster(client=True)
def _test_sha_max_iter(c, s, a, b):
model = SGDClassifier(tol=1e-3)
params = {"alpha": np.logspace(-3, 0, num=1000)}
search = SuccessiveHalvingSearchCV(
model, params, n_initial_parameters=n, n_initial_iter=r
)
X, y = make_classification()
yield search.fit(X, y, classes=np.unique(y))
calls = set(search.cv_results_["partial_fit_calls"]) - {1}
assert min(calls) == r
# One model trained to completion
assert (
search.cv_results_["partial_fit_calls"] == max(calls)
@gen_cluster(client=True, timeout=5000)
def test_min_max_iter(c, s, a, b):
# This test makes sure Hyperband works with max_iter=1.
# Tests for max_iter < 1 are in test_incremental.py.
values = scipy.stats.uniform(0, 1)
X, y = make_classification(n_samples=10, n_features=4, chunks=10)
max_iter = 1
h = HyperbandSearchCV(ConstantFunction(), {"value": values}, max_iter=max_iter)
yield h.fit(X, y)
assert h.best_score_ > 0
@gen_cluster(client=True)
def test_search_patience_infeasible_tol(c, s, a, b):
X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
rng = check_random_state(42)
params = {"value": rng.rand(1000)}
model = ConstantFunction()
max_iter = 10
score_increase = -10
search = IncrementalSearchCV(
model, params, max_iter=max_iter, patience=3, tol=score_increase, decay_rate=0
)
yield search.fit(X, y, classes=[0, 1])
hist = pd.DataFrame(search.history_)
assert hist.partial_fit_calls.max() == max_iter
@gen_cluster(client=True)
def test_search_patience_infeasible_tol(c, s, a, b):
X, y = make_classification(n_samples=100, n_features=5)
params = {"value": np.random.RandomState(42).rand(1000)}
model = ConstantFunction()
search = SuccessiveHalvingSearchCV(
model,
params,
patience=2,
tol=np.nan,
n_initial_parameters=20,
n_initial_iter=4,
max_iter=1000,
)
yield search.fit(X, y, classes=[0, 1])
@gen_cluster(client=True)
def test_min_max_iter(c, s, a, b):
X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
est = SGDClassifier()
params = {"alpha": np.logspace(-3, 0)}
search = IncrementalSearchCV(est, params, max_iter=0)
with pytest.raises(ValueError, match="max_iter < 1 is not supported"):
yield search.fit(X, y, classes=[0, 1])
@gen_cluster(client=True)
def test_gridsearch(c, s, a, b):
X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
model = SGDClassifier(tol=1e-3)
params = {"alpha": np.logspace(-2, 10, 3), "l1_ratio": np.linspace(0.01, 1, 2)}
search = IncrementalSearchCV(model, params, n_initial_parameters="grid")
yield search.fit(X, y, classes=[0, 1])
assert {frozenset(d["params"].items()) for d in search.history_} == {
frozenset(d.items()) for d in ParameterGrid(params)
}
@gen_cluster(client=True)
def test_transform(c, s, a, b):
X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
model = MiniBatchKMeans(random_state=0)
params = {"n_clusters": [3, 4, 5], "n_init": [1, 2]}
search = IncrementalSearchCV(model, params, n_initial_parameters="grid")
yield search.fit(X, y)
X_, = yield c.compute([X])
result = search.transform(X_)
assert result.shape == (100, search.best_estimator_.n_clusters)
@gen_cluster(client=True, timeout=None)
async def test_async(c, s, a, b):
"""Test asynchronous operations."""
da = dask.array.arange(0, 25, chunks=5, dtype=float).reshape((5, 5))
q = ureg.Quantity(da, units_)
x = q + ureg.Quantity(5, units_)
y = x.persist()
assert str(y)
assert dask.is_dask_collection(y)
assert len(x.__dask_graph__()) > len(y.__dask_graph__())
assert not futures_of(x)
assert futures_of(y)
future = c.compute(y)
@gen_cluster(client=True, timeout=5000)
def test_same_random_state_same_params(c, s, a, b):
# This makes sure parameters are sampled correctly when random state is
# specified.
# This test makes sure random state is *correctly* passed to successive
# halvings from Hyperband
seed = 0
values = scipy.stats.uniform(0, 1)
h = HyperbandSearchCV(
ConstantFunction(), {"value": values}, random_state=seed, max_iter=9
)
# Make a class for passive random sampling
passive = IncrementalSearchCV(
ConstantFunction(),
{"value": values},
@gen_cluster(client=True, timeout=5000)
def test_correct_params(c, s, a, b):
# Makes sure that Hyperband has the correct parameters.
# Implemented because Hyperband wraps SHA. Again, makes sure that parameters
# are correctly passed to SHA (had a case where max_iter= flag not passed to
# SuccessiveHalvingSearchCV but it should have been)
est = ConstantFunction()
X, y = make_classification(n_samples=10, n_features=4, chunks=10)
params = {"value": np.linspace(0, 1)}
search = HyperbandSearchCV(est, params, max_iter=9)
base = {
"estimator",
"estimator__value",
"estimator__sleep",
"parameters",