Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# setups (yet) as it hasn't been ran
setup_id = openml.setups.setup_exists(flow)
self.assertFalse(setup_id)
setup_id = openml.setups.setup_exists(flow)
self.assertFalse(setup_id)
# now run the flow on an easy task:
task = openml.tasks.get_task(115) # diabetes
run = openml.runs.run_flow_on_task(flow, task)
# spoof flow id, otherwise the sentinel is ignored
run.flow_id = flow.flow_id
run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
# download the run, as it contains the right setup id
run = openml.runs.get_run(run.run_id)
# execute the function we are interested in
setup_id = openml.setups.setup_exists(flow)
self.assertEqual(setup_id, run.setup_id)
def test_study_attach_illegal(self):
run_list = openml.runs.list_runs(size=10)
self.assertEqual(len(run_list), 10)
run_list_more = openml.runs.list_runs(size=20)
self.assertEqual(len(run_list_more), 20)
study = openml.study.create_study(
alias=None,
benchmark_suite=None,
name='study with illegal runs',
description='none',
run_ids=list(run_list.keys())
)
study.publish()
TestBase._mark_entity_for_removal('study', study.id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], study.id))
study_original = openml.study.get_study(study.id)
with self.assertRaisesRegex(openml.exceptions.OpenMLServerException,
'Problem attaching entities.'):
def test_openml_param_name_to_sklearn(self):
scaler = sklearn.preprocessing.StandardScaler(with_mean=False)
boosting = sklearn.ensemble.AdaBoostClassifier(
base_estimator=sklearn.tree.DecisionTreeClassifier())
model = sklearn.pipeline.Pipeline(steps=[
('scaler', scaler), ('boosting', boosting)])
flow = self.extension.model_to_flow(model)
task = openml.tasks.get_task(115)
run = openml.runs.run_flow_on_task(flow, task)
run = run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from {}: {}".format(__file__.split('/')[-1], run.run_id))
run = openml.runs.get_run(run.run_id)
setup = openml.setups.get_setup(run.setup_id)
# make sure to test enough parameters
self.assertGreater(len(setup.parameters), 15)
for parameter in setup.parameters.values():
sklearn_name = self.extension._openml_param_name_to_sklearn(parameter, flow)
# test the inverse. Currently, OpenML stores the hyperparameter
# fullName as flow.name + flow.version + parameter.name on the
# server (but this behaviour is not documented and might or might
# not change in the future. Hence, we won't offer this
def test_tagging(self):
runs = openml.runs.list_runs(size=1)
run_id = list(runs.keys())[0]
run = openml.runs.get_run(run_id)
tag = "testing_tag_{}_{}".format(self.id(), time())
run_list = openml.runs.list_runs(tag=tag)
self.assertEqual(len(run_list), 0)
run.push_tag(tag)
run_list = openml.runs.list_runs(tag=tag)
self.assertEqual(len(run_list), 1)
self.assertIn(run_id, run_list)
run.remove_tag(tag)
run_list = openml.runs.list_runs(tag=tag)
self.assertEqual(len(run_list), 0)
del flow.parameters['random_state']
for component in flow.components.values():
_remove_random_state(component)
flow = self.extension.model_to_flow(clf)
flow, _ = self._add_sentinel_to_flow_name(flow, sentinel)
if not openml.flows.flow_exists(flow.name, flow.external_version):
flow.publish()
TestBase._mark_entity_for_removal('flow', (flow.flow_id, flow.name))
TestBase.logger.info("collected from test_run_functions: {}".format(flow.flow_id))
task = openml.tasks.get_task(task_id)
X, y = task.get_X_and_y()
self.assertEqual(np.count_nonzero(np.isnan(X)), n_missing_vals)
run = openml.runs.run_flow_on_task(
flow=flow,
task=task,
seed=seed,
avoid_duplicate_runs=openml.config.avoid_duplicate_runs,
)
run_ = run.publish()
TestBase._mark_entity_for_removal('run', run.run_id)
TestBase.logger.info("collected from test_run_functions: {}".format(run.run_id))
self.assertEqual(run_, run)
self.assertIsInstance(run.dataset_id, int)
# This is only a smoke check right now
# TODO add a few asserts here
run._to_xml()
if run.trace is not None:
# This is only a smoke check right now
for data_id in df["data_id"].values:
link = "<a href="\"https://www.openml.org/d/"">"
tick_text.append(link)
hover_text = []
if parameter == 'None':
color = [1] * 1000
hover_text = df["value"]
marker = dict(opacity=0.8, symbol='diamond',
color=color, # set color equal to a variable
colorscale='Jet')
print ('None')
else:
color = []
for run_id in df.run_id[:1000]:
p = pd.DataFrame(runs.get_runs([run_id])[0].parameter_settings)
row = p[p['oml:name'] == parameter]
if row.empty:
color.append('0')
else:
color.append(row['oml:value'].values[0])
hover_text.append(row['oml:value'].values[0])
if color[0].isdigit():
print(color)
color = list(map(int, color))
else:
color = pd.DataFrame(color)[0].astype('category').cat.codes
marker = dict(opacity=0.8, symbol='diamond',
color=color, # set color equal to a variable
colorscale='Jet', colorbar=dict(title='Colorbar'))</a>
############################################################################
# Runs: Easily explore models
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^
# We can run (many) scikit-learn algorithms on (many) OpenML tasks.
# Get a task
task = openml.tasks.get_task(403)
# Build any classifier or pipeline
clf = tree.ExtraTreeClassifier()
# Create a flow
flow = openml.flows.sklearn_to_flow(clf)
# Run the flow
run = openml.runs.run_flow_on_task(flow, task)
# pprint(vars(run), depth=2)
############################################################################
# Share the run on the OpenML server
#
# So far the run is only available locally. By calling the publish function, the run is send to the OpenML server:
myrun = run.publish()
# For this tutorial, our configuration publishes to the test server
# as to not pollute the main server.
print("Uploaded to http://test.openml.org/r/" + str(myrun.run_id))
############################################################################
# We can now also inspect the flow object which was automatically created:
model_original.set_params(**hyperparameters_original)
# solve the task and upload the result (this implicitly creates the flow)
run = openml.runs.run_model_on_task(
model_original,
task,
avoid_duplicate_runs=False)
run_original = run.publish() # this implicitly uploads the flow
###############################################################################
# 2) Download the flow and solve the same task again.
###############################################################################
# obtain setup id (note that the setup id is assigned by the OpenML server -
# therefore it was not yet available in our local copy of the run)
run_downloaded = openml.runs.get_run(run_original.run_id)
setup_id = run_downloaded.setup_id
# after this, we can easily reinstantiate the model
model_duplicate = openml.setups.initialize_model(setup_id)
# it will automatically have all the hyperparameters set
# and run the task again
run_duplicate = openml.runs.run_model_on_task(
model_duplicate, task, avoid_duplicate_runs=False)
###############################################################################
# 3) We will verify that the obtained results are exactly the same.
###############################################################################
# the run has stored all predictions in the field data content
"""
if task_ids is not None:
for task_id in task_ids:
tasks.functions.get_task(task_id)
if dataset_ids is not None:
for dataset_id in dataset_ids:
datasets.functions.get_dataset(dataset_id)
if flow_ids is not None:
for flow_id in flow_ids:
flows.functions.get_flow(flow_id)
if run_ids is not None:
for run_id in run_ids:
runs.functions.get_run(run_id)
# * Add the line **cachedir = 'MYDIR'** to the config file, replacing 'MYDIR' with the path to the cache directory. By default, OpenML will use **~/.openml/cache** as the cache directory.
# * Run the code below, replacing 'YOURDIR' with the path to the cache directory.
import os
# Uncomment and set your OpenML cache directory
# openml.config.cache_directory = os.path.expanduser('YOURDIR')
############################################################################
# Simple Example
# ^^^^^^^^^^^^^^
# Download the OpenML task for the eeg-eye-state.
task = openml.tasks.get_task(403)
data = openml.datasets.get_dataset(task.dataset_id)
clf = neighbors.KNeighborsClassifier(n_neighbors=5)
flow = openml.flows.sklearn_to_flow(clf)
run = openml.runs.run_flow_on_task(flow, task, avoid_duplicate_runs=False)
# Publish the experiment on OpenML (optional, requires an API key).
# For this tutorial, our configuration publishes to the test server
# as to not pollute the main server.
myrun = run.publish()
print("kNN on %s: http://test.openml.org/r/%d" % (data.name, myrun.run_id))