Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
enable_proctitle_on_current()
enable_proctitle_on_children()
if sys.platform.startswith('linux'):
import resource # module fails importing on Windows
soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
limit = max(soft, hard // 2)
resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard))
addr = uri_from_host_port('', None, 0)
loop = IOLoop.current()
services = {}
bokeh = False
with ignoring(ImportError):
from distributed.bokeh.scheduler import BokehScheduler
services[('bokeh', 0)] = (BokehScheduler, {})
bokeh = True
scheduler = Scheduler(loop=loop, services=services)
scheduler.start(addr)
install_signal_handlers(loop)
app_client.kv['dask.scheduler'] = scheduler.address.encode()
if bokeh:
bokeh_port = scheduler.services['bokeh'].port
bokeh_host = urlparse(scheduler.address).hostname
bokeh_address = 'http://%s:%d' % (bokeh_host, bokeh_port)
def bisect(target, left, right):
if left == right:
return left
mid = (left + right) // 2
value = max(
startstop["stop"] for startstop in self.buffer[mid]["startstops"]
)
if value < target:
return bisect(target, mid + 1, right)
else:
return bisect(target, left, mid)
if isinstance(start, str):
start = time() - parse_timedelta(start)
if start is not None:
start = bisect(start, 0, len(self.buffer))
if isinstance(stop, str):
stop = time() - parse_timedelta(stop)
if stop is not None:
stop = bisect(stop, 0, len(self.buffer))
if count is not None:
if start is None and stop is None:
stop = len(self.buffer)
start = stop - count
elif start is None and stop is not None:
start = stop - count
elif start is not None and stop is None:
stop = start + count
def set_tcp_timeout(comm):
"""
Set kernel-level TCP timeout on the stream.
"""
if comm.closed():
return
timeout = dask.config.get("distributed.comm.timeouts.tcp")
timeout = int(parse_timedelta(timeout, default="seconds"))
sock = comm.socket
# Default (unsettable) value on Windows
# https://msdn.microsoft.com/en-us/library/windows/desktop/dd877220(v=vs.85).aspx
nprobes = 10
assert timeout >= nprobes + 1, "Timeout too low"
idle = max(2, timeout // 4)
interval = max(1, (timeout - idle) // nprobes)
idle = timeout - interval * nprobes
assert idle > 0
try:
if sys.platform.startswith("win"):
logger.debug("Setting TCP keepalive: idle=%d, interval=%d", idle, interval)
def test_dask_cv_single(self):
test_cluster = LocalCluster(1)
test_client = Client(test_cluster)
iris = load_iris()
reg = tree.DecisionTreeClassifier()
cv_score = test_client.submit(cross_val_score,reg,iris.data,iris.target)
self.assertGreater(cv_score.result().mean(), 0)
test_cluster.scale_up(4)
_cv_results = {'reg_%i':test_client.submit(cross_val_score,
tree.DecisionTreeClassifier(min_samples_leaf=i),iris.data,iris.target)
for i in range(5)}
cv_results = test_client.gather(list(_cv_results.values()))
for cv_result in cv_results:
self.assertGreaterEqual(cv_result.mean(), 0)
def test_grids_list_post(self):
iris = load_iris()
client = DjangoClient()
response = client.post(reverse('grids_list'), data={'classifier':'DecisionTreeClassifier'})
self.assertEqual(201, response.status_code)
print(response.data)
gs = ATGridSearchCV(tree.DecisionTreeClassifier,{'criterion':['gini','entropy'],
'max_depth':range(1,6),
'max_features':['auto','log2']},
client_kwargs={'address':LocalCluster()},
uuid=response.data.get('uuid',''),
webserver_url=self.live_server_url)
gs.fit(iris.data, iris.target)
response = client.get(reverse('grids_list'))
self.assertEqual(200,response.status_code)
self.assertEqual(1, len(response.data))
@gen_cluster(client=True)
def _test_sha_max_iter(c, s, a, b):
model = SGDClassifier(tol=1e-3)
params = {"alpha": np.logspace(-3, 0, num=1000)}
search = SuccessiveHalvingSearchCV(
model, params, n_initial_parameters=n, n_initial_iter=r
)
X, y = make_classification()
yield search.fit(X, y, classes=np.unique(y))
calls = set(search.cv_results_["partial_fit_calls"]) - {1}
assert min(calls) == r
# One model trained to completion
assert (
search.cv_results_["partial_fit_calls"] == max(calls)
@gen_cluster(client=True, timeout=5000)
def test_min_max_iter(c, s, a, b):
# This test makes sure Hyperband works with max_iter=1.
# Tests for max_iter < 1 are in test_incremental.py.
values = scipy.stats.uniform(0, 1)
X, y = make_classification(n_samples=10, n_features=4, chunks=10)
max_iter = 1
h = HyperbandSearchCV(ConstantFunction(), {"value": values}, max_iter=max_iter)
yield h.fit(X, y)
assert h.best_score_ > 0
@gen_cluster(client=True)
def test_search_patience_infeasible_tol(c, s, a, b):
X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
rng = check_random_state(42)
params = {"value": rng.rand(1000)}
model = ConstantFunction()
max_iter = 10
score_increase = -10
search = IncrementalSearchCV(
model, params, max_iter=max_iter, patience=3, tol=score_increase, decay_rate=0
)
yield search.fit(X, y, classes=[0, 1])
hist = pd.DataFrame(search.history_)
assert hist.partial_fit_calls.max() == max_iter
@gen_cluster(client=True)
def test_search_patience_infeasible_tol(c, s, a, b):
X, y = make_classification(n_samples=100, n_features=5)
params = {"value": np.random.RandomState(42).rand(1000)}
model = ConstantFunction()
search = SuccessiveHalvingSearchCV(
model,
params,
patience=2,
tol=np.nan,
n_initial_parameters=20,
n_initial_iter=4,
max_iter=1000,
)
yield search.fit(X, y, classes=[0, 1])
@gen_cluster(client=True)
def test_min_max_iter(c, s, a, b):
X, y = make_classification(n_samples=100, n_features=5, chunks=(10, 5))
est = SGDClassifier()
params = {"alpha": np.logspace(-3, 0)}
search = IncrementalSearchCV(est, params, max_iter=0)
with pytest.raises(ValueError, match="max_iter < 1 is not supported"):
yield search.fit(X, y, classes=[0, 1])